diff --git a/testdata/workloads/functional-query/queries/QueryTest/test-unmatched-schema.test b/testdata/workloads/functional-query/queries/QueryTest/test-unmatched-schema.test new file mode 100644 index 000000000..89f50e21b --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/test-unmatched-schema.test @@ -0,0 +1,153 @@ +# Test case to verify the scanners work properly when the table metadata (specifically the +# number of columns in the table) does not match the number of columns in the data file. +==== +---- QUERY +select * from jointbl_test +---- RESULTS +1001,'Name1',94611,5000 +1002,'Name2',94611,5000 +1003,'Name3',94611,5000 +1004,'Name4',94611,5000 +1005,'Name5',94611,5000 +1106,'Name6',94612,5000 +1006,'Name16',94612,5000 +1006,'Name6',94616,5000 +1106,'Name16',94612,5000 +1106,'Name6',94616,5000 +1006,'Name16',94616,5000 +1106,'Name16',94616,5000 +1106,'Name6',94612,15000 +1006,'Name16',94612,15000 +1006,'Name6',94616,15000 +1106,'Name16',94612,15000 +1106,'Name6',94616,15000 +1006,'Name16',94616,15000 +1106,'Name16',94616,15000 +---- TYPES +BIGINT, STRING, INT, INT +==== +---- QUERY +alter table jointbl_test add columns(new_col string) +---- RESULTS + +==== +---- QUERY +select * from jointbl_test +---- RESULTS +1001,'Name1',94611,5000,'' +1002,'Name2',94611,5000,'' +1003,'Name3',94611,5000,'' +1004,'Name4',94611,5000,'' +1005,'Name5',94611,5000,'' +1106,'Name6',94612,5000,'' +1006,'Name16',94612,5000,'' +1006,'Name6',94616,5000,'' +1106,'Name16',94612,5000,'' +1106,'Name6',94616,5000,'' +1006,'Name16',94616,5000,'' +1106,'Name16',94616,5000,'' +1106,'Name6',94612,15000,'' +1006,'Name16',94612,15000,'' +1006,'Name6',94616,15000,'' +1106,'Name16',94612,15000,'' +1106,'Name6',94616,15000,'' +1006,'Name16',94616,15000,'' +1106,'Name16',94616,15000,'' +---- TYPES +BIGINT, STRING, INT, INT, STRING +==== +---- QUERY +alter table jointbl_test add columns(new_int_col int) +---- RESULTS + +==== +---- QUERY +select * from jointbl_test +---- RESULTS +1001,'Name1',94611,5000,'',NULL +1002,'Name2',94611,5000,'',NULL +1003,'Name3',94611,5000,'',NULL +1004,'Name4',94611,5000,'',NULL +1005,'Name5',94611,5000,'',NULL +1106,'Name6',94612,5000,'',NULL +1006,'Name16',94612,5000,'',NULL +1006,'Name6',94616,5000,'',NULL +1106,'Name16',94612,5000,'',NULL +1106,'Name6',94616,5000,'',NULL +1006,'Name16',94616,5000,'',NULL +1106,'Name16',94616,5000,'',NULL +1106,'Name6',94612,15000,'',NULL +1006,'Name16',94612,15000,'',NULL +1006,'Name6',94616,15000,'',NULL +1106,'Name16',94612,15000,'',NULL +1106,'Name6',94616,15000,'',NULL +1006,'Name16',94616,15000,'',NULL +1106,'Name16',94616,15000,'',NULL +---- TYPES +BIGINT, STRING, INT, INT, STRING, INT +==== +---- QUERY +alter table jointbl_test drop column new_int_col +---- RESULTS + +==== +---- QUERY +select * from jointbl_test +---- RESULTS +1001,'Name1',94611,5000,'' +1002,'Name2',94611,5000,'' +1003,'Name3',94611,5000,'' +1004,'Name4',94611,5000,'' +1005,'Name5',94611,5000,'' +1106,'Name6',94612,5000,'' +1006,'Name16',94612,5000,'' +1006,'Name6',94616,5000,'' +1106,'Name16',94612,5000,'' +1106,'Name6',94616,5000,'' +1006,'Name16',94616,5000,'' +1106,'Name16',94616,5000,'' +1106,'Name6',94612,15000,'' +1006,'Name16',94612,15000,'' +1006,'Name6',94616,15000,'' +1106,'Name16',94612,15000,'' +1106,'Name6',94616,15000,'' +1006,'Name16',94616,15000,'' +1106,'Name16',94616,15000,'' +---- TYPES +BIGINT, STRING, INT, INT, STRING +==== +---- QUERY +alter table jointbl_test drop column new_col +---- RESULTS + +==== +---- QUERY +alter table jointbl_test drop column alltypes_id +---- RESULTS + +==== +---- QUERY +select * from jointbl_test +---- RESULTS +1001,'Name1',94611 +1002,'Name2',94611 +1003,'Name3',94611 +1004,'Name4',94611 +1005,'Name5',94611 +1106,'Name6',94612 +1006,'Name16',94612 +1006,'Name6',94616 +1106,'Name16',94612 +1106,'Name6',94616 +1006,'Name16',94616 +1106,'Name16',94616 +1106,'Name6',94612 +1006,'Name16',94612 +1006,'Name6',94616 +1106,'Name16',94612 +1106,'Name6',94616 +1006,'Name16',94616 +1106,'Name16',94616 +---- TYPES +BIGINT, STRING, INT +==== diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index a0790111f..069ae55a7 100755 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -221,7 +221,7 @@ class ImpalaTestSuite(BaseTestSuite): def execute_query_using_client(self, client, query, vector): self.change_database(client, vector.get_value('table_format')) - client.execute(query) + return client.execute(query) @execute_wrapper def execute_query_async(self, query, query_exec_options=None): diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py index 76bd93b6b..f7ceac30a 100644 --- a/tests/common/test_dimensions.py +++ b/tests/common/test_dimensions.py @@ -89,6 +89,11 @@ SINGLE_NODE_ONLY = [1] ALL_NODES_ONLY = [0] ALL_DISABLE_CODEGEN_OPTIONS = [True, False] +def create_single_exec_option_dimension(): + """Creates an exec_option dimension that will produce a single test vector""" + return create_exec_option_dimension(cluster_sizes=ALL_NODES_ONLY, + disable_codegen_options=[False], batch_sizes=[0]) + def create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES, disable_codegen_options=ALL_DISABLE_CODEGEN_OPTIONS, batch_sizes=ALL_BATCH_SIZES): diff --git a/tests/query_test/test_scanners_all_table_formats.py b/tests/query_test/test_scanners_all_table_formats.py index 7c9d0508b..49b0fe97c 100644 --- a/tests/query_test/test_scanners_all_table_formats.py +++ b/tests/query_test/test_scanners_all_table_formats.py @@ -12,6 +12,8 @@ import pytest from copy import copy from tests.common.test_vector import * from tests.common.impala_test_suite import * +from tests.util.test_file_parser import * +from tests.common.test_dimensions import create_single_exec_option_dimension class TestScannersAllTableFormats(ImpalaTestSuite): BATCH_SIZES = [0, 1, 16] @@ -33,3 +35,60 @@ class TestScannersAllTableFormats(ImpalaTestSuite): new_vector = copy(vector) new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size') self.run_test_case('QueryTest/scanners', new_vector) + + +# Test case to verify the scanners work properly when the table metadata (specifically the +# number of columns in the table) does not match the number of columns in the data file. +class TestUnmatchedSchema(ImpalaTestSuite): + @classmethod + def get_workload(cls): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestUnmatchedSchema, cls).add_test_dimensions() + # TODO: Does it add anything to enumerate all the supported compression codecs + # for each table format? + cls.TestMatrix.add_dimension(cls.create_table_info_dimension('exhaustive')) + cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) + # Avro has a more advanced schema evolution process which is covered in more depth + # in the test_avro_schema_evolution test suite. + cls.TestMatrix.add_constraint(\ + lambda v: v.get_value('table_format').file_format != 'avro') + + def __get_table_location(self, table_name, vector): + result = self.execute_query_using_client(self.client, + "describe formatted %s" % table_name, vector) + for row in result.data: + if 'Location:' in row: + return row.split('\t')[1] + # This should never happen. + assert 0, 'Unable to get location for table: ' + table_name + + def __create_test_table(self, vector): + """ + Creates the test table + + Cannot be done in a setup method because we need access to the current test vector + """ + self.__drop_test_table(vector) + self.execute_query_using_client(self.client, + "create external table jointbl_test like jointbl", vector) + + # Update the location of the new table to point the same location as the old table + location = self.__get_table_location('jointbl', vector) + self.execute_query_using_client(self.client, + "alter table jointbl_test set location '%s'" % location, vector) + + def __drop_test_table(self, vector): + self.execute_query_using_client(self.client, + "drop table if exists jointbl_test", vector) + + def test_unmatched_schema(self, vector): + table_format = vector.get_value('table_format') + if table_format.file_format in ['parquet', 'rc']: + pytest.xfail('IMPALA-499 + IMPALA-497') + + self.__create_test_table(vector) + self.run_test_case('QueryTest/test-unmatched-schema', vector) + self.__drop_test_table(vector)