mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Python 3 changes list operators such as range, map, and filter to be lazy. Some code that expects the list operators to happen immediately will fail. e.g. Python 2: range(0,5) == [0,1,2,3,4] True Python 3: range(0,5) == [0,1,2,3,4] False The fix is to wrap locations with list(). i.e. Python 3: list(range(0,5)) == [0,1,2,3,4] True Since the base operators are now lazy, Python 3 also removes the old lazy versions (e.g. xrange, ifilter, izip, etc). This uses future's builtins package to convert the code to the Python 3 behavior (i.e. xrange -> future's builtins.range). Most of the changes were done via these futurize fixes: - libfuturize.fixes.fix_xrange_with_import - lib2to3.fixes.fix_map - lib2to3.fixes.fix_filter This eliminates the pylint warnings: - xrange-builtin - range-builtin-not-iterating - map-builtin-not-iterating - zip-builtin-not-iterating - filter-builtin-not-iterating - reduce-builtin - deprecated-itertools-function Testing: - Ran core job Change-Id: Ic7c082711f8eff451a1b5c085e97461c327edb5f Reviewed-on: http://gerrit.cloudera.org:8080/19589 Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com> Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
161 lines
7.1 KiB
Python
161 lines
7.1 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
from builtins import range
|
|
from copy import deepcopy
|
|
|
|
from tests.comparison.common import Table
|
|
from tests.comparison.funcs import CastFunc
|
|
from tests.comparison.query import (
|
|
InsertClause,
|
|
InsertStatement,
|
|
Query,
|
|
StatementExecutionMode,
|
|
ValuesClause,
|
|
ValuesRow)
|
|
from tests.comparison.query_generator import QueryGenerator
|
|
|
|
|
|
class InsertStatementGenerator(object):
|
|
def __init__(self, profile):
|
|
# QueryProfile-like object
|
|
self.profile = profile
|
|
# used to generate SELECT queries for INSERT/UPSERT ... SELECT statements;
|
|
# to ensure state is completely reset, this is created anew with each call to
|
|
# generate_statement()
|
|
self.select_stmt_generator = None
|
|
|
|
def generate_statement(self, tables, dml_table):
|
|
"""
|
|
Return a randomly generated INSERT or UPSERT statement. Note that UPSERTs are very
|
|
similar to INSERTs, which is why this generator handles both.
|
|
|
|
tables should be a list of Table objects. A typical source of such a list comes from
|
|
db_connection.DbCursor.describe_common_tables(). This list describes the possible
|
|
"sources" of the INSERT/UPSERT's WITH and FROM/WHERE clauses.
|
|
|
|
dml_table is a required Table object. The INSERT/UPSERT will be into this table.
|
|
"""
|
|
if not (isinstance(tables, list) and len(tables) > 0 and
|
|
all((isinstance(t, Table) for t in tables))):
|
|
raise Exception('tables must be a not-empty list of Table objects')
|
|
|
|
if not isinstance(dml_table, Table):
|
|
raise Exception('dml_table must be a Table')
|
|
|
|
self.select_stmt_generator = QueryGenerator(self.profile)
|
|
|
|
insert_statement = InsertStatement(execution=StatementExecutionMode.DML_TEST)
|
|
|
|
# Choose whether this is a
|
|
# INSERT/UPSERT INTO table SELECT/VALUES
|
|
# or
|
|
# INSERT/UPSERT INTO table (col1, col2, ...) SELECT/VALUES
|
|
# If the method returns None, it's the former.
|
|
insert_column_list = self.profile.choose_insert_column_list(dml_table)
|
|
|
|
if dml_table.primary_keys:
|
|
# Having primary keys implies the table is a Kudu table, which makes it subject to
|
|
# both INSERTs (with automatic ignoring of primary key duplicates) and UPSERTs.
|
|
conflict_action = self.profile.choose_insert_vs_upsert()
|
|
else:
|
|
conflict_action = InsertClause.CONFLICT_ACTION_DEFAULT
|
|
insert_statement.insert_clause = InsertClause(
|
|
dml_table, column_list=insert_column_list, conflict_action=conflict_action)
|
|
# We still need to internally track the columns we're inserting. Keep in mind None
|
|
# means "all" without an explicit column list. Since we've already created the
|
|
# InsertClause object though, we can fill this in for ourselves.
|
|
if insert_column_list is None:
|
|
insert_column_list = dml_table.cols
|
|
insert_item_data_types = [col.type for col in insert_column_list]
|
|
|
|
# Decide whether this is INSERT/UPSERT VALUES or INSERT/UPSERT SELECT
|
|
insert_source_clause = self.profile.choose_insert_source_clause()
|
|
|
|
if issubclass(insert_source_clause, Query):
|
|
# Use QueryGenerator()'s public interface to generate the SELECT.
|
|
select_query = self.select_stmt_generator.generate_statement(
|
|
tables, select_item_data_types=insert_item_data_types)
|
|
# To avoid many loss-of-precision errors, explicitly cast the SelectItems. The
|
|
# generator's type system is not near sophisticated enough to know how random
|
|
# expressions will be implicitly casted in the databases. This requires less work
|
|
# to implement. IMPALA-4693 considers alternative approaches.
|
|
self._cast_select_items(select_query, insert_column_list)
|
|
insert_statement.with_clause = deepcopy(select_query.with_clause)
|
|
select_query.with_clause = None
|
|
insert_statement.select_query = select_query
|
|
elif issubclass(insert_source_clause, ValuesClause):
|
|
insert_statement.values_clause = self._generate_values_clause(insert_column_list)
|
|
else:
|
|
raise Exception('unsupported INSERT/UPSERT source clause: {0}'.format(
|
|
insert_source_clause))
|
|
return insert_statement
|
|
|
|
def _generate_values_clause(self, columns):
|
|
"""
|
|
Return a VALUES clause containing a variable number of rows.
|
|
|
|
The values corresponding to primary keys will be non-null constants. Any other
|
|
columns could be null, constants, or function trees that may or may not evaluate to
|
|
null.
|
|
"""
|
|
values_rows = []
|
|
for _ in range(self.profile.choose_insert_values_row_count()):
|
|
values_row = []
|
|
for col in columns:
|
|
if col.is_primary_key:
|
|
val = self.profile.choose_constant(return_type=col.exact_type, allow_null=False)
|
|
elif 'constant' == self.profile.choose_values_item_expr():
|
|
val = self.profile.choose_constant(return_type=col.exact_type, allow_null=True)
|
|
else:
|
|
func_tree = self.select_stmt_generator.create_func_tree(
|
|
col.type, allow_subquery=False)
|
|
val = self.select_stmt_generator.populate_func_with_vals(func_tree)
|
|
# Only the generic type, not the exact type, of the value will be known. To
|
|
# avoid a lot of failed queries due to precision errors, we cast the val to
|
|
# the exact type of the column. This will still not prevent "out of range"
|
|
# conditions, as we don't try to evaluate the random expressions.
|
|
val = CastFunc(val, col.exact_type)
|
|
values_row.append(val)
|
|
values_rows.append(ValuesRow(values_row))
|
|
return ValuesClause(values_rows)
|
|
|
|
def _cast_select_items(self, select_query, column_list):
|
|
"""
|
|
For a given Query select_query and a column_list (list of Columns), cast each select
|
|
item in select_query to the exact type of the column.
|
|
|
|
A Query may have a UNION, recursively do this down the line.
|
|
"""
|
|
for col_idx, select_item in enumerate(select_query.select_clause.items):
|
|
cast_val_expr = CastFunc(select_item.val_expr, column_list[col_idx].exact_type)
|
|
select_item.val_expr = cast_val_expr
|
|
if select_query.union_clause:
|
|
self._cast_select_items(select_query.union_clause.query, column_list)
|
|
|
|
|
|
def get_generator(statement_type):
|
|
"""
|
|
Given a statement type, return the proper statement generator.
|
|
"""
|
|
STATEMENT_GENERATOR_MAP = {
|
|
InsertStatement: InsertStatementGenerator,
|
|
Query: QueryGenerator,
|
|
}
|
|
return STATEMENT_GENERATOR_MAP[statement_type]
|