mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-13958: Revisit hs2_parquet_constraint and hs2_text_constraint
hs2_parquet_constraint and hs2_text_constraint is meant to extend test vector dimension to also test non-default test protocol (other than beeswax), but limit it to only run against 'parquet/none' or 'text/none' format accordingly. This patch modifies these constraints to default_protocol_or_parquet_constraint and default_protocol_or_text_constraint respectively such that the full file format coverage happen for default_test_protocol configuration and limited for the other protocols. Drop hs2_parquet_constraint entirely from test_utf8_strings.py because that test is already constrained to single 'parquet/none' file format. Num modified rows validation in date-fileformat-support.test and date-partitioning.test are changed to check the NumModifiedRows counter from profile. Fix TestQueriesJsonTables to always run with beeswax protocol because its assertions relies on beeswax-specific return values. Run impala-isort and fix few flake8 issues and in modified test files. Testing: Run and pass the affected test files using exhaustive exploration and env var DEFAULT_TEST_PROTOCOL=hs2. Confirmed that full file format coverage happen for hs2 protocol. Note that DEFAULT_TEST_PROTOCOL=beeswax is still the default. Change-Id: I8be0a628842e29a8fcc036180654cd159f6a23c8 Reviewed-on: http://gerrit.cloudera.org:8080/22775 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
e9a706723d
commit
182aa5066e
@@ -45,18 +45,18 @@ NULL
|
||||
# Inserting text partitions to $DATABASE.date_tbl is OK.
|
||||
insert into $DATABASE.date_tbl partition (date_part)
|
||||
select date_col, date_part from functional.date_tbl;
|
||||
---- RESULTS
|
||||
date_part=0001-01-01: 7
|
||||
date_part=1399-06-27: 3
|
||||
date_part=2017-11-27: 10
|
||||
date_part=9999-12-31: 2
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 7
|
||||
NumModifiedRows: 3
|
||||
NumModifiedRows: 10
|
||||
NumModifiedRows: 2
|
||||
====
|
||||
---- QUERY
|
||||
# Inserting into parquet partition is supported.
|
||||
insert into $DATABASE.date_tbl partition(date_part='1899-12-31')
|
||||
select date_col from functional_parquet.date_tbl where date_part = '1399-06-27';
|
||||
---- RESULTS
|
||||
date_part=1899-12-31: 3
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 3
|
||||
====
|
||||
---- QUERY
|
||||
# Adding ORC partition works even though Impala cannot write ORC format.
|
||||
|
||||
@@ -25,29 +25,29 @@ AnalysisException: Partition spec already exists: (p=DATE '1300-01-01').
|
||||
---- QUERY
|
||||
# Date partition formatted differently in insert
|
||||
insert into $DATABASE.dtbl partition (p='1300-1-01') values ('1300-1-1');
|
||||
---- RESULTS
|
||||
p=1300-01-01: 1
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 1
|
||||
====
|
||||
---- QUERY
|
||||
insert into $DATABASE.dtbl partition (p='1300-01-1') values ('1300-1-02');
|
||||
---- RESULTS
|
||||
p=1300-01-01: 1
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 1
|
||||
====
|
||||
---- QUERY
|
||||
insert into $DATABASE.dtbl partition (p=DATE '1300-1-1') values ('1300-1-03');
|
||||
---- RESULTS
|
||||
p=1300-01-01: 1
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 1
|
||||
====
|
||||
---- QUERY
|
||||
# Insert into a new partition
|
||||
insert into $DATABASE.dtbl partition (p=DATE '1400-01-1') values ('1400-1-1');
|
||||
---- RESULTS
|
||||
p=1400-01-01: 1
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 1
|
||||
====
|
||||
---- QUERY
|
||||
insert into $DATABASE.dtbl partition (p='1400-1-01') values ('1400-1-2');
|
||||
---- RESULTS
|
||||
p=1400-01-01: 1
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 1
|
||||
====
|
||||
---- QUERY
|
||||
select p, c from $DATABASE.dtbl;
|
||||
@@ -86,9 +86,9 @@ UDF ERROR: String to Date parse failed. Invalid string val: '1400-01-'
|
||||
# Test that STRING is implicitly cast to DATE.
|
||||
insert into $DATABASE.dtbl partition(p) select * from $DATABASE.stbl
|
||||
where p in ('1400-1-1', '1400-1-01', '1500-01-1');
|
||||
---- RESULTS
|
||||
p=1400-01-01: 2
|
||||
p=1500-01-01: 1
|
||||
---- RUNTIME_PROFILE
|
||||
NumModifiedRows: 2
|
||||
NumModifiedRows: 1
|
||||
====
|
||||
---- QUERY
|
||||
select p, c from $DATABASE.dtbl;
|
||||
|
||||
@@ -18,18 +18,25 @@
|
||||
# Common test dimensions and associated utility functions.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
from builtins import range
|
||||
import copy
|
||||
import os
|
||||
import pytest
|
||||
from itertools import product
|
||||
import os
|
||||
|
||||
from builtins import range
|
||||
import pytest
|
||||
|
||||
from tests.common.test_vector import (
|
||||
EXEC_OPTION, PROTOCOL, TABLE_FORMAT,
|
||||
BEESWAX, HS2, HS2_HTTP,
|
||||
ImpalaTestDimension, ImpalaTestVector, assert_exec_option_key)
|
||||
from tests.util.filesystem_utils import (
|
||||
IS_HDFS)
|
||||
assert_exec_option_key,
|
||||
BEESWAX,
|
||||
EXEC_OPTION,
|
||||
HS2,
|
||||
HS2_HTTP,
|
||||
ImpalaTestDimension,
|
||||
ImpalaTestVector,
|
||||
PROTOCOL,
|
||||
TABLE_FORMAT,
|
||||
)
|
||||
from tests.util.filesystem_utils import IS_HDFS
|
||||
|
||||
WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
|
||||
|
||||
@@ -190,18 +197,18 @@ def create_client_protocol_no_strict_dimension():
|
||||
return ImpalaTestDimension('strict_hs2_protocol', False)
|
||||
|
||||
|
||||
def hs2_parquet_constraint(v):
|
||||
"""Constraint function, used to only run HS2 against Parquet format, because file format
|
||||
and the client protocol are orthogonal."""
|
||||
return (v.get_protocol() == BEESWAX
|
||||
def default_protocol_or_parquet_constraint(v):
|
||||
"""Constraint function, used to limit non-default test protocol against uncompressed
|
||||
parquet format, because file format and the client protocol are orthogonal."""
|
||||
return (v.get_protocol() == pytest.config.option.default_test_protocol
|
||||
or (v.get_table_format().file_format == 'parquet'
|
||||
and v.get_table_format().compression_codec == 'none'))
|
||||
|
||||
|
||||
def hs2_text_constraint(v):
|
||||
"""Constraint function, used to only run HS2 against uncompressed text, because file
|
||||
format and the client protocol are orthogonal."""
|
||||
return (v.get_protocol() == BEESWAX
|
||||
def default_protocol_or_text_constraint(v):
|
||||
"""Constraint function, used to limit non-default test protocol against uncompressed
|
||||
text format, because file format and the client protocol are orthogonal."""
|
||||
return (v.get_protocol() == pytest.config.option.default_test_protocol
|
||||
or (v.get_table_format().file_format == 'text'
|
||||
and v.get_table_format().compression_codec == 'none'))
|
||||
|
||||
|
||||
@@ -19,11 +19,16 @@ from __future__ import absolute_import, division, print_function
|
||||
from copy import deepcopy
|
||||
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.test_dimensions import (create_exec_option_dimension,
|
||||
create_client_protocol_dimension, hs2_parquet_constraint, hs2_text_constraint)
|
||||
from tests.common.test_dimensions import (
|
||||
create_client_protocol_dimension,
|
||||
create_exec_option_dimension,
|
||||
default_protocol_or_parquet_constraint,
|
||||
default_protocol_or_text_constraint,
|
||||
)
|
||||
|
||||
|
||||
class TestStringQueries(ImpalaTestSuite):
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestStringQueries, cls).add_test_dimensions()
|
||||
@@ -35,7 +40,7 @@ class TestStringQueries(ImpalaTestSuite):
|
||||
# Run these queries through both beeswax and HS2 to get coverage of CHAR/VARCHAR
|
||||
# returned via both protocols.
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(hs2_text_constraint)
|
||||
cls.ImpalaTestMatrix.add_constraint(default_protocol_or_text_constraint)
|
||||
|
||||
def test_chars(self, vector):
|
||||
self.run_test_case('QueryTest/chars', vector)
|
||||
@@ -57,6 +62,7 @@ class TestStringQueries(ImpalaTestSuite):
|
||||
|
||||
|
||||
class TestCharFormats(ImpalaTestSuite):
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestCharFormats, cls).add_test_dimensions()
|
||||
@@ -72,7 +78,7 @@ class TestCharFormats(ImpalaTestSuite):
|
||||
# Run these queries through both beeswax and HS2 to get coverage of CHAR/VARCHAR
|
||||
# returned via both protocols.
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
|
||||
cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
|
||||
|
||||
def test_char_format(self, vector):
|
||||
self.run_test_case('QueryTest/chars-formats', vector)
|
||||
|
||||
@@ -18,11 +18,16 @@
|
||||
# Targeted tests for date type.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from tests.common.file_utils import create_table_and_copy_files
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.skip import SkipIfFS
|
||||
from tests.common.test_dimensions import (create_exec_option_dimension_from_dict,
|
||||
create_client_protocol_dimension, hs2_parquet_constraint)
|
||||
from tests.common.test_dimensions import (
|
||||
create_client_protocol_dimension,
|
||||
create_exec_option_dimension_from_dict,
|
||||
create_uncompressed_text_dimension,
|
||||
default_protocol_or_parquet_constraint,
|
||||
)
|
||||
from tests.shell.util import create_impala_shell_executable_dimension
|
||||
|
||||
|
||||
@@ -46,12 +51,17 @@ class TestDateQueriesBase(ImpalaTestSuite):
|
||||
# Run these queries through both beeswax and HS2 to get coverage of date returned
|
||||
# via both protocols.
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
|
||||
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
|
||||
|
||||
|
||||
class TestDateQueriesAllFormat(TestDateQueriesBase):
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestDateQueriesAllFormat, cls).add_test_dimensions()
|
||||
# Limit to 'parquet/none' for non-default test protocol.
|
||||
cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
|
||||
|
||||
def test_queries(self, vector):
|
||||
if vector.get_value('table_format').file_format == 'avro':
|
||||
# Avro date test queries are in a separate test file.
|
||||
@@ -69,9 +79,9 @@ class TestDateQueriesTextFormat(TestDateQueriesBase):
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestDateQueriesTextFormat, cls).add_test_dimensions()
|
||||
# Only run this test class with 'text' table_format.
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format == 'text')
|
||||
# Only run this test class with 'text/none' table_format.
|
||||
cls.ImpalaTestMatrix.add_dimension(
|
||||
create_uncompressed_text_dimension(cls.get_workload()))
|
||||
|
||||
def test_partitioning(self, vector, unique_database):
|
||||
""" Test partitioning by DATE. """
|
||||
|
||||
@@ -18,12 +18,16 @@
|
||||
# Targeted tests for decimal type.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.common.impala_connection import IMPALA_CONNECTION_EXCEPTION
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.test_dimensions import (create_exec_option_dimension_from_dict,
|
||||
create_client_protocol_dimension, hs2_parquet_constraint)
|
||||
from tests.common.test_dimensions import (
|
||||
create_client_protocol_dimension,
|
||||
create_exec_option_dimension_from_dict,
|
||||
default_protocol_or_parquet_constraint,
|
||||
)
|
||||
from tests.util.filesystem_utils import IS_S3
|
||||
|
||||
|
||||
@@ -33,22 +37,22 @@ class TestDecimalQueries(ImpalaTestSuite):
|
||||
super(TestDecimalQueries, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_dimension(
|
||||
create_exec_option_dimension_from_dict({
|
||||
'decimal_v2' : ['false', 'true'],
|
||||
'batch_size' : [0, 1],
|
||||
'disable_codegen' : ['false', 'true'],
|
||||
'disable_codegen_rows_threshold' : [0]}))
|
||||
'decimal_v2': ['false', 'true'],
|
||||
'batch_size': [0, 1],
|
||||
'disable_codegen': ['false', 'true'],
|
||||
'disable_codegen_rows_threshold': [0]}))
|
||||
# Hive < 0.11 does not support decimal so we can't run these tests against the other
|
||||
# file formats.
|
||||
# TODO: Enable them on Hive >= 0.11.
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
||||
(v.get_value('table_format').file_format == 'text' and
|
||||
v.get_value('table_format').compression_codec == 'none') or
|
||||
v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 'json'])
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 'json']
|
||||
or (v.get_value('table_format').file_format == 'text'
|
||||
and v.get_value('table_format').compression_codec == 'none'))
|
||||
|
||||
# Run these queries through both beeswax and HS2 to get coverage of decimals returned
|
||||
# via both protocols.
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
|
||||
cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
|
||||
|
||||
def test_queries(self, vector):
|
||||
self.run_test_case('QueryTest/decimal', vector)
|
||||
@@ -75,8 +79,8 @@ class TestAvroDecimalQueries(ImpalaTestSuite):
|
||||
def add_test_dimensions(cls):
|
||||
super(TestAvroDecimalQueries, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
(v.get_value('table_format').file_format == 'avro' and
|
||||
v.get_value('table_format').compression_codec == 'snap'))
|
||||
v.get_value('table_format').file_format == 'avro'
|
||||
and v.get_value('table_format').compression_codec == 'snap')
|
||||
|
||||
def test_avro_queries(self, vector):
|
||||
self.run_test_case('QueryTest/decimal_avro', vector)
|
||||
@@ -91,7 +95,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
|
||||
def add_test_dimensions(cls):
|
||||
super(TestDecimalOverflowExprs, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
(v.get_value('table_format').file_format in ['kudu', 'parquet', 'text']))
|
||||
v.get_value('table_format').file_format in ['kudu', 'parquet', 'text'])
|
||||
|
||||
def test_insert_select_exprs(self, vector, unique_database):
|
||||
TBL_NAME_1 = '`{0}`.`overflowed_decimal_tbl_1`'.format(unique_database)
|
||||
|
||||
@@ -18,20 +18,28 @@
|
||||
# General Impala query tests
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import pytest
|
||||
import re
|
||||
from copy import deepcopy
|
||||
import re
|
||||
from subprocess import check_call
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.skip import (
|
||||
SkipIfEC, SkipIfCatalogV2, SkipIfNotHdfsMinicluster, SkipIfFS)
|
||||
from tests.common.skip import SkipIfFS, SkipIfNotHdfsMinicluster
|
||||
from tests.common.test_dimensions import (
|
||||
create_uncompressed_text_dimension, create_uncompressed_json_dimension,
|
||||
create_exec_option_dimension_from_dict, create_client_protocol_dimension,
|
||||
hs2_parquet_constraint, extend_exec_option_dimension, FILE_FORMAT_TO_STORED_AS_MAP,
|
||||
add_exec_option_dimension, create_exec_option_dimension)
|
||||
add_exec_option_dimension,
|
||||
create_client_protocol_dimension,
|
||||
create_exec_option_dimension,
|
||||
create_exec_option_dimension_from_dict,
|
||||
create_uncompressed_json_dimension,
|
||||
create_uncompressed_text_dimension,
|
||||
default_protocol_or_parquet_constraint,
|
||||
extend_exec_option_dimension,
|
||||
FILE_FORMAT_TO_STORED_AS_MAP,
|
||||
)
|
||||
from tests.common.test_vector import BEESWAX
|
||||
from tests.util.filesystem_utils import get_fs_path
|
||||
from subprocess import check_call
|
||||
|
||||
|
||||
class TestQueries(ImpalaTestSuite):
|
||||
|
||||
@@ -54,7 +62,7 @@ class TestQueries(ImpalaTestSuite):
|
||||
# Don't run all combinations of table format and protocol - the dimensions should
|
||||
# be orthogonal.
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
|
||||
cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
|
||||
|
||||
# Adding a test dimension here to test the small query opt in exhaustive.
|
||||
if cls.exploration_strategy() == 'exhaustive':
|
||||
@@ -212,6 +220,7 @@ class TestQueries(ImpalaTestSuite):
|
||||
pytest.xfail("null data does not appear to work in hbase")
|
||||
self.run_test_case('QueryTest/null_data', vector)
|
||||
|
||||
|
||||
# Tests in this class are only run against text/none either because that's the only
|
||||
# format that is supported, or the tests don't exercise the file format.
|
||||
class TestQueriesTextTables(ImpalaTestSuite):
|
||||
@@ -254,6 +263,13 @@ class TestQueriesTextTables(ImpalaTestSuite):
|
||||
# Tests in this class are only run against json/none either because that's the only
|
||||
# format that is supported, or the tests don't exercise the file format.
|
||||
class TestQueriesJsonTables(ImpalaTestSuite):
|
||||
|
||||
@classmethod
|
||||
def default_test_protocol(cls):
|
||||
# Some assertions in this test relies on beeswax-specific return values such as
|
||||
# Infinity, NaN, false, and true. HS2 returns inf, nan, False, and True instead.
|
||||
return BEESWAX
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestQueriesJsonTables, cls).add_test_dimensions()
|
||||
@@ -277,6 +293,7 @@ class TestQueriesJsonTables(ImpalaTestSuite):
|
||||
vector.get_value('exec_option')['abort_on_error'] = 0
|
||||
self.run_test_case('QueryTest/overflow_json', vector)
|
||||
|
||||
|
||||
# Tests in this class are only run against Parquet because the tests don't exercise the
|
||||
# file format.
|
||||
class TestQueriesParquetTables(ImpalaTestSuite):
|
||||
@@ -304,6 +321,7 @@ class TestQueriesParquetTables(ImpalaTestSuite):
|
||||
vector.get_value('exec_option')['num_nodes'] = 1
|
||||
self.run_test_case('QueryTest/single-node-large-sorts', vector)
|
||||
|
||||
|
||||
# Tests for queries in HDFS-specific tables, e.g. AllTypesAggMultiFilesNoPart.
|
||||
class TestHdfsQueries(ImpalaTestSuite):
|
||||
@classmethod
|
||||
@@ -387,6 +405,7 @@ class TestPartitionKeyScansWithMultipleBlocks(ImpalaTestSuite):
|
||||
"SELECT max(year) FROM %s.alltypes_multiblocks" % (unique_database))
|
||||
assert int(result.get_data()) == 2010
|
||||
|
||||
|
||||
class TestTopNReclaimQuery(ImpalaTestSuite):
|
||||
"""Test class to validate that TopN periodically reclaims tuple pool memory
|
||||
and runs with a lower memory footprint."""
|
||||
|
||||
@@ -16,24 +16,27 @@
|
||||
# under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.test_dimensions import (create_exec_option_dimension,
|
||||
create_client_protocol_dimension, hs2_parquet_constraint)
|
||||
from tests.common.test_dimensions import (
|
||||
create_client_protocol_dimension,
|
||||
create_exec_option_dimension,
|
||||
)
|
||||
|
||||
|
||||
class TestUtf8StringFunctions(ImpalaTestSuite):
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestUtf8StringFunctions, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_dimension(
|
||||
create_exec_option_dimension(disable_codegen_options=[False, True]))
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format in ['parquet'] and
|
||||
v.get_value('table_format').compression_codec in ['none'])
|
||||
v.get_value('table_format').file_format in ['parquet']
|
||||
and v.get_value('table_format').compression_codec in ['none'])
|
||||
# Run these queries through both beeswax and HS2 to get coverage of CHAR/VARCHAR
|
||||
# returned via both protocols.
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
|
||||
|
||||
def test_string_functions(self, vector):
|
||||
self.run_test_case('QueryTest/utf8-string-functions', vector)
|
||||
|
||||
Reference in New Issue
Block a user