Files
impala/tests/custom_cluster/test_stats_extrapolation.py
Joe McDonnell 82bd087fb1 IMPALA-11973: Add absolute_import, division to all eligible Python files
This takes steps to make Python 2 behave like Python 3 as
a way to flush out issues with running on Python 3. Specifically,
it handles two main differences:
 1. Python 3 requires absolute imports within packages. This
    can be emulated via "from __future__ import absolute_import"
 2. Python 3 changed division to "true" division that doesn't
    round to an integer. This can be emulated via
    "from __future__ import division"

This changes all Python files to add imports for absolute_import
and division. For completeness, this also includes print_function in the
import.

I scrutinized each old-division location and converted some locations
to use the integer division '//' operator if it needed an integer
result (e.g. for indices, counts of records, etc). Some code was also using
relative imports and needed to be adjusted to handle absolute_import.
This fixes all Pylint warnings about no-absolute-import and old-division,
and these warnings are now banned.

Testing:
 - Ran core tests

Change-Id: Idb0fcbd11f3e8791f5951c4944be44fb580e576b
Reviewed-on: http://gerrit.cloudera.org:8080/19588
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
2023-03-09 17:17:57 +00:00

69 lines
3.2 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
import pytest
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import (
create_exec_option_dimension,
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
class TestStatsExtrapolation(CustomClusterTestSuite):
"""Minimal end-to-end test for the --enable_stats_extrapolation impalad flag. This test
primarly checks that the flag is propagated to the FE. More testing is done in FE unit
tests and metadata/test_stats_extrapolation.py."""
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestStatsExtrapolation, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
cls.ImpalaTestMatrix.add_dimension(
create_uncompressed_text_dimension(cls.get_workload()))
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(impalad_args="--enable_stats_extrapolation=true")
def test_stats_extrapolation(self, vector, unique_database):
# Test row count extrapolation
self.client.execute("set explain_level=2")
explain_result = self.client.execute("explain select * from functional.alltypes")
assert "extrapolated-rows=7.30K" in " ".join(explain_result.data)
# Test COMPUTE STATS TABLESAMPLE
part_test_tbl = unique_database + ".alltypes"
self.clone_table("functional.alltypes", part_test_tbl, True, vector)
# Since our test tables are small, set the minimum sample size to 0 to make sure
# we exercise the sampling code paths.
self.client.execute("set COMPUTE_STATS_MIN_SAMPLE_SIZE=0")
self.client.execute(
"compute stats {0} tablesample system (13)".format(part_test_tbl))
# Check that table stats were set.
table_stats = self.client.execute("show table stats {0}".format(part_test_tbl))
col_names = [fs.name.upper() for fs in table_stats.schema.fieldSchemas]
extrap_rows_idx = col_names.index("EXTRAP #ROWS")
for row in table_stats.data:
assert int(row.split("\t")[extrap_rows_idx]) >= 0
# Check that column stats were set.
col_stats = self.client.execute("show column stats {0}".format(part_test_tbl))
col_names = [fs.name.upper() for fs in col_stats.schema.fieldSchemas]
ndv_col_idx = col_names.index("#DISTINCT VALUES")
for row in col_stats.data:
assert int(row.split("\t")[ndv_col_idx]) >= 0