mirror of
https://github.com/apache/impala.git
synced 2026-01-04 09:00:56 -05:00
This test takes ~2GB of JVM heap and is a likely cause of some OOM during EE tests. Change-Id: I4868eab51bf16e1ca6e0b4e98d7929195cd73803 Reviewed-on: http://gerrit.cloudera.org:8080/18411 Reviewed-by: Quanlong Huang <huangquanlong@gmail.com> Tested-by: Csaba Ringhofer <csringhofer@cloudera.com>
249 lines
11 KiB
Python
249 lines
11 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import pytest
|
|
import re
|
|
from random import randint
|
|
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.test_dimensions import create_exec_option_dimension
|
|
from tests.common.test_dimensions import create_uncompressed_text_dimension
|
|
from tests.common.test_vector import ImpalaTestDimension
|
|
from tests.util.test_file_parser import QueryTestSectionReader
|
|
|
|
class TestExprs(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestExprs, cls).add_test_dimensions()
|
|
# Test with and without expr rewrites to cover regular expr evaluations
|
|
# as well as constant folding, in particular, timestamp literals.
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
ImpalaTestDimension('enable_expr_rewrites', *[0,1]))
|
|
if cls.exploration_strategy() == 'core':
|
|
# Test with file format that supports codegen
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
|
v.get_value('table_format').file_format == 'parquet' and
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
def test_exprs(self, vector):
|
|
vector.get_value('exec_option')['enable_expr_rewrites'] = \
|
|
vector.get_value('enable_expr_rewrites')
|
|
# TODO: Enable some of these tests for Avro if possible
|
|
# Don't attempt to evaluate timestamp expressions with Avro tables (which don't
|
|
# support a timestamp type)"
|
|
table_format = vector.get_value('table_format')
|
|
if table_format.file_format == 'avro':
|
|
pytest.skip()
|
|
if table_format.file_format == 'hbase':
|
|
pytest.xfail("A lot of queries check for NULLs, which hbase does not recognize")
|
|
if table_format.file_format == 'kudu':
|
|
# Can't load LikeTbl without KUDU-1570.
|
|
pytest.xfail("Need support for Kudu tables with nullable PKs (KUDU-1570)")
|
|
self.run_test_case('QueryTest/exprs', vector)
|
|
|
|
# This will change the current database to matching table format and then execute
|
|
# select current_database(). An error will be thrown if multiple values are returned.
|
|
current_db = self.execute_scalar('select current_database()', vector=vector)
|
|
assert current_db == QueryTestSectionReader.get_db_name(table_format)
|
|
|
|
def test_special_strings(self, vector):
|
|
"""Test handling of expressions with "special" strings."""
|
|
vector.get_value('exec_option')['enable_expr_rewrites'] = \
|
|
vector.get_value('enable_expr_rewrites')
|
|
self.run_test_case('QueryTest/special-strings', vector)
|
|
|
|
# Tests very deep expression trees and expressions with many children. Impala defines
|
|
# a 'safe' upper bound on the expr depth and the number of expr children in the
|
|
# FE Expr.java and any changes to those limits should be reflected in this test.
|
|
# The expr limits primarily guard against stack overflows or similar problems
|
|
# causing crashes. Therefore, this tests succeeds if no Impalads crash.
|
|
class TestExprLimits(ImpalaTestSuite):
|
|
# Keep these in sync with Expr.java
|
|
EXPR_CHILDREN_LIMIT = 10000
|
|
EXPR_DEPTH_LIMIT = 1000
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestExprLimits, cls).add_test_dimensions()
|
|
if cls.exploration_strategy() != 'exhaustive':
|
|
# Ensure the test runs with codegen enabled and disabled, even when the
|
|
# exploration strategy is not exhaustive.
|
|
cls.ImpalaTestMatrix.clear_dimension('exec_option')
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=[0], disable_codegen_options=[False, True], batch_sizes=[0]))
|
|
|
|
# There is no reason to run these tests using all dimensions.
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
def test_expr_child_limit(self, vector):
|
|
# IN predicate
|
|
in_query = "select 1 IN("
|
|
for i in xrange(0, self.EXPR_CHILDREN_LIMIT - 1):
|
|
in_query += str(i)
|
|
if (i + 1 != self.EXPR_CHILDREN_LIMIT - 1):
|
|
in_query += ","
|
|
in_query += ")"
|
|
self.__exec_query(in_query)
|
|
|
|
# CASE expr
|
|
case_query = "select case "
|
|
for i in xrange(0, self.EXPR_CHILDREN_LIMIT/2):
|
|
case_query += " when true then 1"
|
|
case_query += " end"
|
|
self.__exec_query(case_query)
|
|
|
|
def test_expr_depth_limit(self, vector):
|
|
# Compound predicates
|
|
and_query = "select " + self.__gen_deep_infix_expr("true", " and false")
|
|
self.__exec_query(and_query)
|
|
or_query = "select " + self.__gen_deep_infix_expr("true", " or false")
|
|
self.__exec_query(or_query)
|
|
|
|
# Arithmetic expr
|
|
arith_query = "select " + self.__gen_deep_infix_expr("1", " + 1")
|
|
self.__exec_query(arith_query)
|
|
|
|
func_query = "select " + self.__gen_deep_func_expr("lower(", "'abc'", ")")
|
|
self.__exec_query(func_query)
|
|
|
|
# Casts.
|
|
cast_query = "select " + self.__gen_deep_func_expr("cast(", "1", " as int)")
|
|
self.__exec_query(cast_query)
|
|
|
|
def test_under_statement_expression_limit(self):
|
|
"""Generate a huge case statement that barely fits within the statement expression
|
|
limit and verify that it runs."""
|
|
# This takes 20+ minutes, so only run it on exhaustive.
|
|
# TODO: Determine whether this needs to run serially. It use >5 GB of memory.
|
|
if self.exploration_strategy() != 'exhaustive':
|
|
pytest.skip("Only test limit of codegen on exhaustive")
|
|
case = self.__gen_huge_case("int_col", 32, 2, " ")
|
|
query = "select {0} as huge_case from functional_parquet.alltypes".format(case)
|
|
self.__exec_query(query)
|
|
|
|
def test_max_statement_size(self):
|
|
"""Generate a huge case statement that exceeds the default 16MB limit and verify
|
|
that it gets rejected."""
|
|
|
|
expected_err_tmpl = ("Statement length of {0} bytes exceeds the maximum "
|
|
"statement length \({1} bytes\)")
|
|
size_16mb = 16 * 1024 * 1024
|
|
|
|
# Case 1: a valid SQL that would parse correctly
|
|
case = self.__gen_huge_case("int_col", 75, 2, " ")
|
|
query = "select {0} as huge_case from functional.alltypes".format(case)
|
|
err = self.execute_query_expect_failure(self.client, query)
|
|
assert re.search(expected_err_tmpl.format(len(query), size_16mb), str(err))
|
|
|
|
# Case 2: a string of 'a' characters that does not parse. This will still fail
|
|
# with the same message, because the check is before parsing.
|
|
invalid_sql = 'a' * (size_16mb + 1)
|
|
err = self.execute_query_expect_failure(self.client, invalid_sql)
|
|
assert re.search(expected_err_tmpl.format(len(invalid_sql), size_16mb), str(err))
|
|
|
|
# This test can take ~2GB memory while it takes only ~10 seconds. It caused OOM
|
|
# in the past, so it is safer to run it serially.
|
|
@pytest.mark.execute_serially
|
|
def test_statement_expression_limit(self):
|
|
"""Generate a huge case statement that barely fits within the 16MB limit but exceeds
|
|
the statement expression limit. Verify that it fails."""
|
|
case = self.__gen_huge_case("int_col", 66, 2, " ")
|
|
query = "select {0} as huge_case from functional.alltypes".format(case)
|
|
assert len(query) < 16 * 1024 * 1024
|
|
expected_err_re = ("Exceeded the statement expression limit \({0}\)\n"
|
|
"Statement has .* expressions.").format(250000)
|
|
err = self.execute_query_expect_failure(self.client, query)
|
|
assert re.search(expected_err_re, str(err))
|
|
|
|
def __gen_huge_case(self, col_name, fanout, depth, indent):
|
|
toks = ["case\n"]
|
|
for i in xrange(fanout):
|
|
add = randint(1, 1000000)
|
|
divisor = randint(1, 10000000)
|
|
mod = randint(0, divisor)
|
|
# Generate a mathematical expr that can't be easily optimised out.
|
|
when_expr = "{0} + {1} % {2} = {3}".format(col_name, add, divisor, mod)
|
|
if depth == 0:
|
|
then_expr = "{0}".format(i)
|
|
else:
|
|
then_expr = "({0})".format(
|
|
self.__gen_huge_case(col_name, fanout, depth - 1, indent + " "))
|
|
toks.append(indent)
|
|
toks.append("when {0} then {1}\n".format(when_expr, then_expr))
|
|
toks.append(indent)
|
|
toks.append("end")
|
|
return ''.join(toks)
|
|
|
|
def __gen_deep_infix_expr(self, prefix, repeat_suffix):
|
|
expr = prefix
|
|
for i in xrange(self.EXPR_DEPTH_LIMIT - 1):
|
|
expr += repeat_suffix
|
|
return expr
|
|
|
|
def __gen_deep_func_expr(self, open_func, base_arg, close_func):
|
|
expr = ""
|
|
for i in xrange(self.EXPR_DEPTH_LIMIT - 1):
|
|
expr += open_func
|
|
expr += base_arg
|
|
for i in xrange(self.EXPR_DEPTH_LIMIT - 1):
|
|
expr += close_func
|
|
return expr
|
|
|
|
def __exec_query(self, sql_str):
|
|
try:
|
|
impala_ret = self.execute_query(sql_str)
|
|
assert impala_ret.success, "Failed to execute query %s" % (sql_str)
|
|
except Exception as e: # consider any exception a failure
|
|
assert False, "Failed to execute query %s: %s" % (sql_str, e)
|
|
|
|
class TestUtcTimestampFunctions(ImpalaTestSuite):
|
|
"""Tests for UTC timestamp functions, i.e. functions that do not depend on the behavior
|
|
of the flag --use_local_tz_for_unix_timestamp_conversions. Tests added here should
|
|
also be run in the custom cluster test test_local_tz_conversion.py to ensure they
|
|
have the same behavior when the conversion flag is set to true."""
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestUtcTimestampFunctions, cls).add_test_dimensions()
|
|
# Test with and without expr rewrites to cover regular expr evaluations
|
|
# as well as constant folding, in particular, timestamp literals.
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
ImpalaTestDimension('enable_expr_rewrites', *[0,1]))
|
|
if cls.exploration_strategy() == 'core':
|
|
# Test with file format that supports codegen
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'text' and\
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
def test_utc_functions(self, vector):
|
|
vector.get_value('exec_option')['enable_expr_rewrites'] = \
|
|
vector.get_value('enable_expr_rewrites')
|
|
self.run_test_case('QueryTest/utc-timestamp-functions', vector)
|