Files
impala/tests/metadata/test_load.py
Nong Li fd35cee887 Reorganize/reduce end to end test time.
This patch does a few things:
1) Move the metadata tests into their own folder under tests/. I think it's useful to
loosely categorize them so it's easier to run a subset of the tests that are most
useful for the changes you are making.

2) Reduce the test vectors for query_tests. We should have identical coverage in
the daily exhaustive runs but the normal runs should be much better. In particular,
deemphasizing scanner tests since that code is more stable now.

3) Misc test cleanup/consolidate python test files/etc.

Change-Id: I03c2f34877aed192c2a50665bd5e15fa85e12f1e
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3831
Tested-by: jenkins
Reviewed-by: Nong Li <nong@cloudera.com>
2014-08-17 12:43:57 -07:00

60 lines
2.3 KiB
Python

#!/usr/bin/env python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
# Functional tests for LOAD DATA statements.
import pytest
from tests.common.test_vector import *
from tests.common.impala_test_suite import *
from subprocess import call
class TestLoadData(ImpalaTestSuite):
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestLoadData, cls).add_test_dimensions()
cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
def setup_method(self, method):
# Cleanup any existing files in the test tables and staging directories.
call(["hadoop", "fs", "-rm", "-r", "-f", "/test-warehouse/test_load*"], shell=False)
call(["hadoop", "fs", "-rm", "-r", "-f", "/tmp/load_data/"], shell=False)
# Create staging directories.
for i in range(1, 6):
call(["hadoop", "fs", "-mkdir", "-p", "/tmp/load_data/%d" % i], shell=False)
# Copy some data files from existing tables to validate load.
for i in range(1, 4):
call(["hadoop", "fs", "-cp",
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
"/tmp/load_data/%d" % i], shell=False)
# Each partition in alltypesaggmultifiles should have 4 data files.
for i in range(4, 6):
call(["hadoop", "fs", "-cp",
'/test-warehouse/alltypesaggmultifiles/year=2010/month=1/day=1/*',
'/tmp/load_data/%d/' % i], shell=False)
# Make some hidden files.
call(["hadoop", "fs", "-cp",
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
"/tmp/load_data/3/.100101.txt"], shell=False)
call(["hadoop", "fs", "-cp",
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
"/tmp/load_data/3/_100101.txt"], shell=False)
@classmethod
def __assert_hdfs_path_exists(cls, path):
assert 0 == call(["hadoop", "fs", "-test", "-e", path], shell=False),\
"Path does not exist."
def test_load(self, vector):
self.run_test_case('QueryTest/load', vector)
# The hidden files should not have been moved as part of the load operation.
self.__assert_hdfs_path_exists("/tmp/load_data/3/.100101.txt")
self.__assert_hdfs_path_exists("/tmp/load_data/3/_100101.txt")