mirror of
https://github.com/apache/impala.git
synced 2026-02-03 09:00:39 -05:00
This takes steps to make Python 2 behave like Python 3 as
a way to flush out issues with running on Python 3. Specifically,
it handles two main differences:
1. Python 3 requires absolute imports within packages. This
can be emulated via "from __future__ import absolute_import"
2. Python 3 changed division to "true" division that doesn't
round to an integer. This can be emulated via
"from __future__ import division"
This changes all Python files to add imports for absolute_import
and division. For completeness, this also includes print_function in the
import.
I scrutinized each old-division location and converted some locations
to use the integer division '//' operator if it needed an integer
result (e.g. for indices, counts of records, etc). Some code was also using
relative imports and needed to be adjusted to handle absolute_import.
This fixes all Pylint warnings about no-absolute-import and old-division,
and these warnings are now banned.
Testing:
- Ran core tests
Change-Id: Idb0fcbd11f3e8791f5951c4944be44fb580e576b
Reviewed-on: http://gerrit.cloudera.org:8080/19588
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
95 lines
4.4 KiB
Python
95 lines
4.4 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Tests the TABLESAMPLE clause.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
import pytest
|
|
import subprocess
|
|
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.test_vector import ImpalaTestDimension
|
|
|
|
class TestTableSample(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestTableSample, cls).add_test_dimensions()
|
|
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('repeatable', *[True, False]))
|
|
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('filtered', *[True, False]))
|
|
# Tablesample is only supported on HDFS tables.
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
|
v.get_value('table_format').file_format != 'kudu' and
|
|
v.get_value('table_format').file_format != 'hbase')
|
|
if cls.exploration_strategy() != 'exhaustive':
|
|
# Cut down on core testing time by limiting the file formats.
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
|
v.get_value('table_format').file_format == 'parquet' or
|
|
v.get_value('table_format').file_format == 'text')
|
|
|
|
def test_tablesample(self, vector):
|
|
# Do not use a .test to avoid making this test flaky.
|
|
# 1. Queries without the repeatable clause are non-deterministic.
|
|
# 2. The results of queries without a repeatable clause could change due to
|
|
# changes in data loading that affect the number or size of files.
|
|
repeatable = vector.get_value('repeatable')
|
|
filtered = vector.get_value('filtered')
|
|
|
|
where_clause = ""
|
|
if filtered:
|
|
where_clause = "where month between 1 and 6"
|
|
|
|
ImpalaTestSuite.change_database(self.client, vector.get_value('table_format'))
|
|
result = self.client.execute("select count(*) from alltypes %s" % where_clause)
|
|
baseline_count = int(result.data[0])
|
|
prev_count = None
|
|
for perc in [5, 20, 50, 100]:
|
|
rep_sql = ""
|
|
if repeatable: rep_sql = " repeatable(1)"
|
|
sql_stmt = "select count(*) from alltypes tablesample system(%s)%s %s" \
|
|
% (perc, rep_sql, where_clause)
|
|
handle = self.client.execute_async(sql_stmt)
|
|
# IMPALA-6352: flaky test, possibly due to a hung thread. Wait for 500 sec before
|
|
# failing and logging the backtraces of all impalads.
|
|
is_finished = self.client.wait_for_finished_timeout(handle, 500)
|
|
assert is_finished, 'Query Timed out. Dumping backtrace of all threads in ' \
|
|
'impalads:\nthreads in the impalad1: %s \nthreads in the ' \
|
|
'impalad2: %s \nthreads in the impalad3: %s' % \
|
|
(subprocess.check_output(
|
|
"gdb -ex \"set pagination 0\" -ex \"thread apply all bt\" "
|
|
"--batch -p $(pgrep impalad | sed -n 1p)", shell=True),
|
|
subprocess.check_output(
|
|
"gdb -ex \"set pagination 0\" -ex \"thread apply all bt\" "
|
|
"--batch -p $(pgrep impalad | sed -n 2p)", shell=True),
|
|
subprocess.check_output(
|
|
"gdb -ex \"set pagination 0\" -ex \"thread apply all bt\" "
|
|
"--batch -p $(pgrep impalad | sed -n 3p)", shell=True))
|
|
result = self.client.fetch(sql_stmt, handle)
|
|
self.client.close_query(handle)
|
|
count = int(result.data[0])
|
|
if perc < 100:
|
|
assert count < baseline_count
|
|
else:
|
|
assert count == baseline_count
|
|
if prev_count and repeatable:
|
|
# May not necessarily be true for non-repeatable samples
|
|
assert count > prev_count
|
|
prev_count = count
|