diff --git a/tests/query_test/test_decimal_fuzz.py b/tests/query_test/test_decimal_fuzz.py new file mode 100644 index 000000000..a129e3353 --- /dev/null +++ b/tests/query_test/test_decimal_fuzz.py @@ -0,0 +1,248 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Generates random decimal numbers and verifies that mathematical +# operations return correct results under decimal_v2. + +import decimal +import math +import pytest +import random + +from tests.beeswax.impala_beeswax import ImpalaBeeswaxException +from tests.common.impala_test_suite import ImpalaTestSuite +from tests.common.test_dimensions import create_single_exec_option_dimension +from tests.common.test_vector import ImpalaTestDimension, ImpalaTestMatrix + +class TestDecimalFuzz(ImpalaTestSuite): + + @classmethod + def get_workload(cls): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + cls.ImpalaTestMatrix = ImpalaTestMatrix() + cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) + if cls.exploration_strategy() == 'exhaustive': + cls.iterations = 50000 + else: + cls.iterations = 10000 + + def weighted_choice(self, options): + total_weight = sum(options.itervalues()) + numeric_choice = random.uniform(0, total_weight) + last_choice = None + for choice, weight in options.iteritems(): + if numeric_choice <= weight: + return choice + numeric_choice -= weight + if weight > 0: + last_choice = choice + return last_choice + + def get_decimal(self): + '''Returns a 3-tuple with string values of (value, precision, scale). The function + does not always return completely random values, we try to bias it to select + more interesting values.''' + + def random_precision(): + return random.randint(1, 38) + + def extreme_precision(): + return 38 + + precision_weights = {} + precision_weights[random_precision] = 0.8 + precision_weights[extreme_precision] = 0.2 + precision = self.weighted_choice(precision_weights)() + + def random_scale(precision): + return random.randint(0, precision) + + def extreme_scale(precision): + return random.choice([0, precision]) + + scale_weights = {} + scale_weights[random_scale] = 0.9 + scale_weights[extreme_scale] = 0.1 + scale = self.weighted_choice(scale_weights)(precision) + + def random_value(precision): + '''Generates a completely random value.''' + + def num_digits_random(precision): + return random.randint(1, precision) + + def num_digits_all(precision): + return precision + + # Determine how many digits the value is going to have. + num_digits_weights = {} + num_digits_weights[num_digits_random] = 0.8 + num_digits_weights[num_digits_all] = 0.2 + num_digits = self.weighted_choice(num_digits_weights)(precision) + + no_zero = '123456789' + with_zero = '0123456789' + result = random.choice(no_zero) + for _ in range(num_digits - 1): + result += random.choice(with_zero) + + return result + + def special_case_binary_value(precision): + '''Generates a value that looks like 11111... or 10000... in binary number + system.''' + + def exponent_random(precision): + return random.randint(0, int(precision * math.log(10, 2))) + + def exponent_max(precision): + return int(precision * math.log(10, 2)) + + exponent_weights = {} + exponent_weights[exponent_random] = 0.8 + exponent_weights[exponent_max] = 0.2 + exponent = self.weighted_choice(exponent_weights)(precision) + + value = 2 ** exponent + if random.random() < 0.5: + value -= 1 + return '{0}'.format(value) + + def special_case_decimal_value(precision): + '''Generates a value that looks like 99999... or 10000... in decimal number + system.''' + + def num_digits_random(precision): + return random.randint(1, precision) + + def num_digits_max(precision): + return precision + + num_digits_weights = {} + num_digits_weights[num_digits_random] = 8 + num_digits_weights[num_digits_max] = 0.2 + num_digits = self.weighted_choice(num_digits_weights)(precision) + + value = 10 ** num_digits + + if num_digits == precision or random.random() < 0.5: + value -= 1 + + return '{0}'.format(value) + + value_weights = {} + value_weights[random_value] = 0.6 + value_weights[special_case_binary_value] = 0.2 + value_weights[special_case_decimal_value] = 0.2 + + value = self.weighted_choice(value_weights)(precision) + + # Randomly determine the placement of the decimal mark. + # The smallest index where the decimal mark can be placed in the number string. + min_dot_location = max(len(value) - scale, 0) + # The largest index where the decimal mark can be placed in the number string. + max_dot_location = min(precision - scale, len(value)) + dot_location = random.randint(min_dot_location, max_dot_location) + + if dot_location == 0: + value = '0.' + value + elif dot_location == len(value): + pass + else: + value = value[:dot_location] + '.' + value[dot_location:] + + if random.random() < 0.5: + # Negate the number. + value = '-' + value + return (value, precision, scale) + + def result_equals(self, expected, actual): + '''Verify that the expected result is equal to the actual result. We verify equality + by rounding the expected result to different numbers of places and verifying that the + actual result is matched in at least one of the cases.''' + if actual == expected: + return True + + if actual is None: + # Overflow + if abs(expected) > decimal.Decimal("9" * 32): + # If the expected result is larger than 10^32 - 1, it's not unreasonable for + # there to be an overflow in Impala because the minimum scale is 6 and + # 38 (max precision) - 6 = 32. + return True + return False + + for num_digits_after_dot in xrange(39): + # Reduce the number of digits after the dot in the expected_result to different + # amounts. If it matches the actual result in at least one of the cases, we + # consider the actual result to be acceptable. + truncated_expected = expected.quantize( + decimal.Decimal("1e-{0}".format(num_digits_after_dot)), + rounding=decimal.ROUND_HALF_UP) + if actual == truncated_expected: + return True + return False + + def execute_one(self): + '''Executes a single query and compares the result to a result that we computed in + Python.''' + op = random.choice(['+', '-', '*', '/', '%']) + value1, precision1, scale1 = self.get_decimal() + value2, precision2, scale2 = self.get_decimal() + + query = ('select cast({value1} as decimal({precision1},{scale1})) {op} ' + 'cast({value2} as decimal({precision2},{scale2}))').format(op=op, + value1=value1, precision1=precision1, scale1=scale1, + value2=value2, precision2=precision2, scale2=scale2) + + try: + result = self.execute_scalar(query, query_options={'decimal_v2': 'true'}) + except ImpalaBeeswaxException as e: + result = None + if result is not None: + result = decimal.Decimal(result) + + with decimal.localcontext() as ctx: + # Set the decimal context to a large precision initially, so that the + # mathematical operations are performed at a high precision. + ctx.prec = 80 + + try: + if op == '+': + expected_result = decimal.Decimal(value1) + decimal.Decimal(value2) + elif op == '-': + expected_result = decimal.Decimal(value1) - decimal.Decimal(value2) + elif op == '*': + expected_result = decimal.Decimal(value1) * decimal.Decimal(value2) + elif op == '/': + expected_result = decimal.Decimal(value1) / decimal.Decimal(value2) + elif op == '%': + expected_result = decimal.Decimal(value1) % decimal.Decimal(value2) + else: + assert False + except decimal.InvalidOperation as e: + expected_result = None + except decimal.DivisionByZero as e: + expected_result = None + assert self.result_equals(expected_result, result) + + def test_fuzz(self, vector): + for _ in xrange(self.iterations): + self.execute_one()