diff --git a/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test b/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test index 1bbd34d1a..a7ca3a2e9 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test +++ b/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test @@ -1,10 +1,10 @@ ==== ---- QUERY # Test maximally nested struct. -create external table max_nesting_depth.struct_tbl -like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct/file.parq' +create external table $DATABASE.struct_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/struct/file.parq' stored as parquet -location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct/' +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/struct/' ==== ---- QUERY select f. @@ -13,7 +13,7 @@ select f. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0 -from max_nesting_depth.struct_tbl +from $DATABASE.struct_tbl ---- RESULTS 42 ---- TYPES @@ -21,14 +21,14 @@ int ==== ---- QUERY # Test maximally nested array. -create external table max_nesting_depth.int_array_tbl -like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_array/file.parq' +create external table $DATABASE.int_array_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/int_array/file.parq' stored as parquet -location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_array/' +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/int_array/' ==== ---- QUERY # Test absolute table ref executed with a single scan. -select * from max_nesting_depth.int_array_tbl.f. +select * from $DATABASE.int_array_tbl.f. item.item.item.item.item.item.item.item.item.item. item.item.item.item.item.item.item.item.item.item. item.item.item.item.item.item.item.item.item.item. @@ -46,7 +46,7 @@ int ==== ---- QUERY # Test relative ref executed with deeply nested subplans. -select * from max_nesting_depth.int_array_tbl.f t0, +select * from $DATABASE.int_array_tbl.f t0, t0.item t1, t1.item t2, t2.item t3, t3.item t4, t4.item t5, t5.item t6, t6.item t7, t7.item t8, t8.item t9, t9.item t10, t10.item t11, t11.item t12, t12.item t13, t13.item t14, t14.item t15, @@ -74,14 +74,14 @@ int ==== ---- QUERY # Test maximally nested array of struct. -create external table max_nesting_depth.struct_array_tbl -like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_array/file.parq' +create external table $DATABASE.struct_array_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/struct_array/file.parq' stored as parquet -location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_array/' +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/struct_array/' ==== ---- QUERY # Test absolute table ref executed with a single scan. -select * from max_nesting_depth.struct_array_tbl.f. +select * from $DATABASE.struct_array_tbl.f. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. f0.f0.f0.f0.f0.f0.f0.f0.f0 @@ -92,7 +92,7 @@ int ==== ---- QUERY # Test relative ref executed with deeply nested subplans. -select * from max_nesting_depth.struct_array_tbl.f t0, +select * from $DATABASE.struct_array_tbl.f t0, t0.f0 t1, t1.f0 t2, t2.f0 t3, t3.f0 t4, t4.f0 t5, t5.f0 t6, t6.f0 t7, t7.f0 t8, t8.f0 t9, t9.f0 t10, t10.f0 t11, t11.f0 t12, t12.f0 t13, t13.f0 t14, t14.f0 t15, @@ -110,14 +110,14 @@ int ==== ---- QUERY # Test maximally nested map. -create external table max_nesting_depth.int_map_tbl -like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_map/file.parq' +create external table $DATABASE.int_map_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/int_map/file.parq' stored as parquet -location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_map/' +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/int_map/' ==== ---- QUERY # Test absolute table ref executed with a single scan. -select t.value from max_nesting_depth.int_map_tbl.f. +select t.value from $DATABASE.int_map_tbl.f. value.value.value.value.value.value.value.value.value.value. value.value.value.value.value.value.value.value.value.value. value.value.value.value.value.value.value.value.value.value. @@ -135,7 +135,7 @@ int ==== ---- QUERY # Test relative ref executed with deeply nested subplans. -select t98.value from max_nesting_depth.int_map_tbl.f t0, +select t98.value from $DATABASE.int_map_tbl.f t0, t0.value t1, t1.value t2, t2.value t3, t3.value t4, t4.value t5, t5.value t6, t6.value t7, t7.value t8, t8.value t9, t9.value t10, t10.value t11, t11.value t12, t12.value t13, t13.value t14, t14.value t15, @@ -163,14 +163,14 @@ int ==== ---- QUERY # Test maximally nested map of struct. -create external table max_nesting_depth.struct_map_tbl -like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_map/file.parq' +create external table $DATABASE.struct_map_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/struct_map/file.parq' stored as parquet -location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_map/' +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/max_nesting_depth/struct_map/' ==== ---- QUERY # Test absolute table ref executed with a single scan. -select t.value from max_nesting_depth.struct_map_tbl.f. +select t.value from $DATABASE.struct_map_tbl.f. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. f0.f0.f0.f0.f0.f0.f0.f0.f0 t @@ -181,7 +181,7 @@ int ==== ---- QUERY # Test relative ref executed with deeply nested subplans. -select t49.value from max_nesting_depth.struct_map_tbl.f t0, +select t49.value from $DATABASE.struct_map_tbl.f t0, t0.f0 t1, t1.f0 t2, t2.f0 t3, t3.f0 t4, t4.f0 t5, t5.f0 t6, t6.f0 t7, t7.f0 t8, t8.f0 t9, t9.f0 t10, t10.f0 t11, t11.f0 t12, t12.f0 t13, t13.f0 t14, t14.f0 t15, diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index cf3763625..55be260ad 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -18,9 +18,7 @@ # under the License. import os -import pytest -import random -from subprocess import call, check_call +from subprocess import check_call from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite @@ -79,10 +77,8 @@ class TestNestedTypes(ImpalaTestSuite): @SkipIfOldAggsJoins.nested_types class TestParquetArrayEncodings(ImpalaTestSuite): - # Create a unique database name so we can run multiple instances of this test class in - # parallel - DATABASE = "test_parquet_list_encodings_" + str(random.randint(0, 10**10)) - TESTFILE_DIR = os.environ['IMPALA_HOME'] + "/testdata/parquet_nested_types_encodings" + TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'], + "testdata/parquet_nested_types_encodings") @classmethod def get_workload(self): @@ -94,17 +90,6 @@ class TestParquetArrayEncodings(ImpalaTestSuite): cls.TestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'parquet') - @classmethod - def setup_class(cls): - super(TestParquetArrayEncodings, cls).setup_class() - cls.cleanup_db(cls.DATABASE) - cls.client.execute("create database if not exists " + cls.DATABASE) - - @classmethod - def teardown_class(cls): - cls.cleanup_db(cls.DATABASE) - super(TestParquetArrayEncodings, cls).teardown_class() - # $ parquet-tools schema SingleFieldGroupInList.parquet # message SingleFieldGroupInList { # optional group single_element_groups (LIST) { @@ -120,11 +105,11 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # ..count = 1234 # .single_element_group: # ..count = 2345 - def test_single_field_group_in_list(self, vector): - tablename = self._create_test_table("SingleFieldGroupInList", - "SingleFieldGroupInList.parquet", "col1 array>") - - full_name = "%s.%s" % (self.DATABASE, tablename) + def test_single_field_group_in_list(self, vector, unique_database): + tablename = "SingleFieldGroupInList" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "SingleFieldGroupInList.parquet", + "col1 array>") result = self.client.execute("select item.count from %s.col1" % full_name) assert len(result.data) == 2 @@ -151,11 +136,11 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # .array = 34 # .array = 35 # .array = 36 - def test_avro_primitive_in_list(self, vector): - tablename = self._create_test_table("AvroPrimitiveInList", - "AvroPrimitiveInList.parquet", "col1 array") - - full_name = "%s.%s" % (self.DATABASE, tablename) + def test_avro_primitive_in_list(self, vector, unique_database): + tablename = "AvroPrimitiveInList" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "AvroPrimitiveInList.parquet", + "col1 array") result = self.client.execute("select item from %s.col1" % full_name) assert len(result.data) == 3 @@ -185,13 +170,13 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # ..count = 1234 # .array: # ..count = 2345 - def test_avro_single_field_group_in_list(self, vector): -# Note that the field name does not match the field name in the file schema. - tablename = self._create_test_table("AvroSingleFieldGroupInList", + def test_avro_single_field_group_in_list(self, vector, unique_database): + tablename = "AvroSingleFieldGroupInList" + full_name = "%s.%s" % (unique_database, tablename) + # Note that the field name does not match the field name in the file schema. + self._create_test_table(unique_database, tablename, "AvroSingleFieldGroupInList.parquet", "col1 array>") - full_name = "%s.%s" % (self.DATABASE, tablename) - result = self.client.execute("select item.f1 from %s.col1" % full_name) assert len(result.data) == 2 assert result.data == ['1234', '2345'] @@ -205,7 +190,7 @@ class TestParquetArrayEncodings(ImpalaTestSuite): assert len(result.data) == 1 assert result.data == ['2'] - # $ parquet-tools schema bad-avro.parquet + # $ parquet-tools schema bad-avro.parquet # message org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray { # required group int_arrays_column (LIST) { # repeated group array (LIST) { @@ -213,8 +198,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # } # } # } - # - # $ parquet-tools cat bad-avro.parquet + # + # $ parquet-tools cat bad-avro.parquet # int_arrays_column: # .array: # ..array = 0 @@ -228,7 +213,7 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # ..array = 6 # ..array = 7 # ..array = 8 - # + # # int_arrays_column: # .array: # ..array = 0 @@ -242,13 +227,13 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # ..array = 6 # ..array = 7 # ..array = 8 - # + # # [Same int_arrays_column repeated 8x more] - def test_avro_array_of_arrays(self, vector): - tablename = self._create_test_table("AvroArrayOfArrays", "bad-avro.parquet", - "col1 array>") - - full_name = "%s.%s" % (self.DATABASE, tablename) + def test_avro_array_of_arrays(self, vector, unique_database): + tablename = "AvroArrayOfArrays" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "bad-avro.parquet", + "col1 array>") result = self.client.execute("select item from %s.col1.item" % full_name) assert len(result.data) == 9 * 10 @@ -281,12 +266,12 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # .list_of_ints_tuple = 34 # .list_of_ints_tuple = 35 # .list_of_ints_tuple = 36 - def test_thrift_primitive_in_list(self, vector): - tablename = self._create_test_table("ThriftPrimitiveInList", + def test_thrift_primitive_in_list(self, vector, unique_database): + tablename = "ThriftPrimitiveInList" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "ThriftPrimitiveInList.parquet", "col1 array") - full_name = "%s.%s" % (self.DATABASE, tablename) - result = self.client.execute("select item from %s.col1" % full_name) assert len(result.data) == 3 assert result.data == ['34', '35', '36'] @@ -315,12 +300,12 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # ..count = 1234 # .single_element_groups_tuple: # ..count = 2345 - def test_thrift_single_field_group_in_list(self, vector): - tablename = self._create_test_table("ThriftSingleFieldGroupInList", + def test_thrift_single_field_group_in_list(self, vector, unique_database): + tablename = "ThriftSingleFieldGroupInList" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "ThriftSingleFieldGroupInList.parquet", "col1 array>") - full_name = "%s.%s" % (self.DATABASE, tablename) - result = self.client.execute("select item.f1 from %s.col1" % full_name) assert len(result.data) == 2 assert result.data == ['1234', '2345'] @@ -334,7 +319,7 @@ class TestParquetArrayEncodings(ImpalaTestSuite): assert len(result.data) == 1 assert result.data == ['2'] - # $ parquet-tools schema bad-thrift.parquet + # $ parquet-tools schema bad-thrift.parquet # message ParquetSchema { # required group intListsColumn (LIST) { # repeated group intListsColumn_tuple (LIST) { @@ -342,8 +327,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # } # } # } - # - # $ parquet-tools cat bad-thrift.parquet + # + # $ parquet-tools cat bad-thrift.parquet # intListsColumn: # .intListsColumn_tuple: # ..intListsColumn_tuple_tuple = 0 @@ -357,11 +342,11 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # ..intListsColumn_tuple_tuple = 6 # ..intListsColumn_tuple_tuple = 7 # ..intListsColumn_tuple_tuple = 8 - def test_thrift_array_of_arrays(self, vector): - tablename = self._create_test_table("ThriftArrayOfArrays", "bad-thrift.parquet", - "col1 array>") - - full_name = "%s.%s" % (self.DATABASE, tablename) + def test_thrift_array_of_arrays(self, vector, unique_database): + tablename = "ThriftArrayOfArrays" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "bad-thrift.parquet", + "col1 array>") result = self.client.execute("select item from %s.col1.item" % full_name) assert len(result.data) == 9 @@ -391,12 +376,12 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # list_of_ints = 34 # list_of_ints = 35 # list_of_ints = 36 - def test_unannotated_list_of_primitives(self, vector): - tablename = self._create_test_table("UnannotatedListOfPrimitives", + def test_unannotated_list_of_primitives(self, vector, unique_database): + tablename = "UnannotatedListOfPrimitives" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "UnannotatedListOfPrimitives.parquet", "col1 array") - full_name = "%s.%s" % (self.DATABASE, tablename) - result = self.client.execute("select item from %s.col1" % full_name) assert len(result.data) == 3 assert result.data == ['34', '35', '36'] @@ -425,12 +410,12 @@ class TestParquetArrayEncodings(ImpalaTestSuite): # list_of_points: # .x = 2.0 # .y = 2.0 - def test_unannotated_list_of_groups(self, vector): - tablename = self._create_test_table("UnannotatedListOfGroups", + def test_unannotated_list_of_groups(self, vector, unique_database): + tablename = "UnannotatedListOfGroups" + full_name = "%s.%s" % (unique_database, tablename) + self._create_test_table(unique_database, tablename, "UnannotatedListOfGroups.parquet", "col1 array>") - full_name = "%s.%s" % (self.DATABASE, tablename) - result = self.client.execute("select f1, f2 from %s.col1" % full_name) assert len(result.data) == 2 assert result.data == ['1\t1', '2\t2'] @@ -444,23 +429,17 @@ class TestParquetArrayEncodings(ImpalaTestSuite): assert len(result.data) == 1 assert result.data == ['2'] - def _create_test_table(self, tablename, filename, columns): - """Returns a unique tablename based on the input 'tablename'. This allows multiple - instances of the same test to be run in parallel (e.g. during an exhaustive run).""" - tablename = "%s_%s" % (tablename, random.randint(0, 10**5)) - location = get_fs_path("/test-warehouse/%s_%s" % (self.DATABASE, tablename)) + def _create_test_table(self, dbname, tablename, filename, columns): + """Creates a table in the given database with the given name and columns. Copies + the file with the given name from TESTFILE_DIR into the table.""" + location = get_fs_path("/test-warehouse/%s.db/%s" % (dbname, tablename)) self.client.execute("create table %s.%s (%s) stored as parquet location '%s'" % - (self.DATABASE, tablename, columns, location)) + (dbname, tablename, columns, location)) local_path = self.TESTFILE_DIR + "/" + filename check_call(["hadoop", "fs", "-put", local_path, location], shell=False) - self.client.execute("invalidate metadata %s.%s" % (self.DATABASE, tablename)) - return tablename @SkipIfOldAggsJoins.nested_types class TestMaxNestingDepth(ImpalaTestSuite): - TEST_DB = 'max_nesting_depth' - TEST_HDFS_ROOT_DIR = "%s/max_nesting_depth/" % WAREHOUSE - # Should be kept in sync with the FE's Type.MAX_NESTING_DEPTH MAX_NESTING_DEPTH = 100 @@ -474,52 +453,29 @@ class TestMaxNestingDepth(ImpalaTestSuite): cls.TestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'parquet') - def setup_method(self, method): - self.cleanup_db(TestMaxNestingDepth.TEST_DB) - self.client.execute("create database %s location '%s/%s.db'" % - (TestMaxNestingDepth.TEST_DB, WAREHOUSE, - TestMaxNestingDepth.TEST_DB)) - if method.__name__ == "test_max_nesting_depth": - # Clean up HDFS directory and copy the test files. - self.__remove_test_hdfs_dir() - self.__copy_test_hdfs_dir() - - def teardown_method(self, method): - self.cleanup_db(TestMaxNestingDepth.TEST_DB) - if method.__name__ == "test_max_nesting_depth": - self.__remove_test_hdfs_dir() - - def __remove_test_hdfs_dir(self): - call(["hdfs", "dfs", "-rm", "-r", - TestMaxNestingDepth.TEST_HDFS_ROOT_DIR], shell=False) - - def __copy_test_hdfs_dir(self): - check_call(["hdfs", "dfs", "-copyFromLocal", - "%s/testdata/max_nesting_depth" % os.environ['IMPALA_HOME'], - WAREHOUSE], shell=False) - - @pytest.mark.execute_serially - def test_max_nesting_depth(self, vector): + def test_max_nesting_depth(self, vector, unique_database): """Tests that Impala can scan Parquet files having complex types of the maximum nesting depth.""" - self.run_test_case('QueryTest/max-nesting-depth', vector) + check_call(["hdfs", "dfs", "-copyFromLocal", + "%s/testdata/max_nesting_depth" % os.environ['IMPALA_HOME'], + "%s/%s.db/" % (WAREHOUSE, unique_database)], shell=False) + self.run_test_case('QueryTest/max-nesting-depth', vector, unique_database) - @pytest.mark.execute_serially @SkipIfIsilon.hive @SkipIfS3.hive @SkipIfLocal.hive - def test_load_hive_table(self, vector): + def test_load_hive_table(self, vector, unique_database): """Tests that Impala rejects Hive-created tables with complex types that exceed the maximum nesting depth.""" # Type with a nesting depth of MAX_NESTING_DEPTH + 1 type_sql = ("array<" * self.MAX_NESTING_DEPTH) + "int" +\ (">" * self.MAX_NESTING_DEPTH) create_table_sql = "CREATE TABLE %s.above_max_depth (f %s) STORED AS PARQUET" %\ - (self.TEST_DB, type_sql) - check_call(["hive", "-e", create_table_sql], shell=False) - self.client.execute("invalidate metadata %s.above_max_depth" % self.TEST_DB) + (unique_database, type_sql) + self.run_stmt_in_hive(create_table_sql) + self.client.execute("invalidate metadata %s.above_max_depth" % unique_database) try: - self.client.execute("explain select 1 from %s.above_max_depth" % self.TEST_DB) + self.client.execute("explain select 1 from %s.above_max_depth" % unique_database) assert False, "Expected table loading to fail." except ImpalaBeeswaxException, e: assert "Type exceeds the maximum nesting depth" in str(e)