mirror of
https://github.com/apache/impala.git
synced 2026-01-04 18:00:57 -05:00
- Generate INSERT statements that are either INSERT ... VALUES or INSERT ... SELECT - On both types of INSERTs, we either insert into all columns, or into some column list. If the column list exists, all primary keys will be present, and 0 or more additional columns will also be in the list. The ordering of the column list is random. - For INSERT ... SELECT, occasionally generate a WITH clause - For INSERT ... VALUES, generate non-null constants for the primary keys, but for the non-primary keys, randomly generate a value expression. The type system in the random statement/query generator isn't sophisticated enough to the implicit type of a SELECT item or a value expression. It knows it will be some INT-based type, but not if it's going to be a SMALLINT or a BIGINT. To get around this, the easiest thing seems to be to explicitly cast the SELECT items or value expressions to the columns' so-called exact_type attribute. Much of the testing here involved running discrepancy_searcher.py --explain-only on both tpch_kudu and a random HDFS table, using both the default profile and DML-only profile. This was done to quickly find bugs in the statement generation, as they tend to bubble up as analysis errors. I expect to make other changes as follow on patches and more random statements find small test issues. For actual use against Kudu data, you need to migrate data from Kudu into PostgreSQL 5 (instructions tests/comparison/POSTGRES.txt) and run something like: tests/comparison/discrepancy_searcher.py \ --use-postgresql \ --postgresql-port 5433 \ --profile dmlonly \ --timeout 300 \ --db-name tpch_kudu \ --query-count 10 Change-Id: I842b41f0eed07ab30ec76d8fc3cdd5affb525af6 Reviewed-on: http://gerrit.cloudera.org:8080/5486 Reviewed-by: Jim Apple <jbapple-impala@apache.org> Tested-by: Impala Public Jenkins
662 lines
25 KiB
Python
662 lines
25 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from copy import deepcopy
|
|
from itertools import ifilter
|
|
|
|
from tests.comparison.common import ValExpr
|
|
from tests.comparison.db_types import (
|
|
Boolean,
|
|
Char,
|
|
DataType,
|
|
Decimal,
|
|
Float,
|
|
Int,
|
|
Number,
|
|
Timestamp,
|
|
TYPES)
|
|
|
|
AGG_FUNCS = list() # All aggregate functions will be added
|
|
ANALYTIC_FUNCS = list() # All analytic functions will be added
|
|
FUNCS = list() # All non-aggregate/analytic functions will be added
|
|
|
|
class Arg(object):
|
|
'''Represents an argument in a function signature.
|
|
|
|
data_type may be either a DataType or a list of DataTypes. A list is used to
|
|
represent a subquery.
|
|
|
|
If can_be_null is False, a NULL value should never be passed into the function
|
|
during execution. This is used to maintain consistency across databases. For example
|
|
if Impala and Postgresql both implement function foo but the results differ when
|
|
the args to foo are NULL, then this flag can be used to prevent NULL values.
|
|
|
|
If can_be_null_literal is False, the literal value NULL should never be an argument
|
|
to the function. This is provided to workaround problems involving function signature
|
|
resolution during execution. An alternative would be to CAST(NULL AS INT).
|
|
|
|
determines_signature is used to signify that this arg is used to determine the
|
|
signature during execution. This implies that the function has multiple signatures
|
|
with the same number of arguments and at least one of the "determines_signature"
|
|
arguments must be non-NULL in order to determine which signature to use during
|
|
execution. An example is "SELECT GREATEST(NULL, NULL)" would result in an error
|
|
during execution in Postgresql because the resulting data type could not be
|
|
determined. An alternative would be to ensure that each modeled function contains
|
|
the full set of possible signatures, then see if "foo(NULL)" would be ambiguous
|
|
and if so use "foo(CAST(NULL AS INT))" instead.
|
|
'''
|
|
|
|
def __init__(self,
|
|
data_type,
|
|
require_constant=False,
|
|
min_value=None,
|
|
can_be_null=True,
|
|
can_be_null_literal=True,
|
|
determines_signature=False):
|
|
self.type = data_type
|
|
self.require_constant = require_constant
|
|
self.min_value = min_value
|
|
self.can_be_null = can_be_null
|
|
self.can_be_null_literal = can_be_null_literal
|
|
self.determines_signature = determines_signature
|
|
|
|
@property
|
|
def is_subquery(self):
|
|
return isinstance(self.type, list)
|
|
|
|
def validate(self, expr, skip_nulls=False):
|
|
if not issubclass(expr.type, self.type):
|
|
raise Exception('Expr type is %s but expected %s' % (expr.type, self.type))
|
|
if self.require_constant and not expr.is_constant:
|
|
raise Exception('A constant is required')
|
|
if self.min_value is not None and expr.val < self.min_value:
|
|
raise Exception('Minumum value not met')
|
|
if skip_nulls and expr.is_constant and expr.val is None:
|
|
return
|
|
if expr.is_constant and expr.val is None and not self.can_be_null_literal:
|
|
raise Exception('A NULL literal is not allowed')
|
|
|
|
def __repr__(self):
|
|
_repr = 'Arg<type: '
|
|
if self.is_subquery:
|
|
_repr += 'subquery[' + ', '.join([type_.__name__ for type_ in self.type]) + ']'
|
|
else:
|
|
_repr += self.type.__name__
|
|
if self.require_constant:
|
|
_repr += ', constant: True'
|
|
if self.min_value:
|
|
_repr += ', min: %s' % self.min_value
|
|
_repr += '>'
|
|
return _repr
|
|
|
|
|
|
class Signature(object):
|
|
|
|
def __init__(self, func, return_type, *args):
|
|
self.func = func
|
|
self.return_type = return_type
|
|
self.args = list(args)
|
|
|
|
def __repr__(self):
|
|
return "Signature<func: {func}, returns: {rt}, args: {arg_list}>".format(
|
|
func=repr(self.func), rt=repr(self.return_type),
|
|
arg_list=", ".join([repr(arg) for arg in self.args]))
|
|
|
|
@property
|
|
def input_types(self):
|
|
return self.args[1:]
|
|
|
|
|
|
class Func(ValExpr):
|
|
'''Base class for functions'''
|
|
|
|
_NAME = None # Helper for the classmethod name()
|
|
_SIGNATURES = list() # Helper for the classmethod signatures()
|
|
|
|
@classmethod
|
|
def name(cls):
|
|
'''Returns the name of the function. Multiple functions may have the same name.
|
|
For example, COUNT will have a separate Func class for the analytic and aggregate
|
|
versions but both will have the same value of name().
|
|
'''
|
|
return cls.__name__ if cls._NAME is None else cls._NAME
|
|
|
|
@classmethod
|
|
def signatures(cls):
|
|
'''Returns the available signatures for the function. Varargs are not supported, a
|
|
subset of possible signatures must be chosen.
|
|
'''
|
|
return cls._SIGNATURES
|
|
|
|
@classmethod
|
|
def create_from_args(cls, *val_exprs):
|
|
'''Constructor for instantiating from values. The return types of the exprs will be
|
|
inspected and used to find the function signature. If no signature can be found
|
|
an error will be raised.
|
|
'''
|
|
for signature in cls.signatures():
|
|
if len(signature.args) != len(val_exprs):
|
|
continue
|
|
for idx, arg in enumerate(val_exprs):
|
|
if not issubclass(arg.type, signature.args[idx].type):
|
|
break
|
|
else:
|
|
break
|
|
else:
|
|
raise Exception('No signature matches the given arguments: %s' % (val_exprs, ))
|
|
return cls(signature, *val_exprs)
|
|
|
|
def __init__(self, signature, *val_exprs):
|
|
'''"signature" should be one of the available signatures at the class level and
|
|
signifies which function call this instance is intended to represent.
|
|
'''
|
|
if signature not in self.signatures():
|
|
raise Exception('Unknown signature: %s' % (signature, ))
|
|
self.signature = signature
|
|
if val_exprs:
|
|
self.args = list(val_exprs)
|
|
else:
|
|
self.args = list()
|
|
for arg in signature.args:
|
|
if arg.is_subquery:
|
|
self.args.append([subtype(None) for subtype in arg.type])
|
|
else:
|
|
self.args.append(arg.type(arg.min_value))
|
|
|
|
@property
|
|
def exact_type(self):
|
|
return self.signature.return_type
|
|
|
|
def validate(self, skip_nulls=False):
|
|
if not len(self.args) == len(self.signature.args):
|
|
raise Exception('Signature length mismatch')
|
|
for idx, signature_arg in enumerate(self.signature.args):
|
|
signature_arg.validate(self.args[idx], skip_nulls=skip_nulls)
|
|
|
|
def contains_subquery(self):
|
|
for signature_arg in self.signature.args:
|
|
if signature_arg.is_subquery:
|
|
return True
|
|
return any(self.iter_exprs(lambda expr: expr.is_func and expr.contains_subquery))
|
|
|
|
def iter_exprs(self, filter=None):
|
|
'''Returns an iterator over all val_exprs including those nested within this
|
|
function's args.
|
|
'''
|
|
for arg in self.args:
|
|
if not isinstance(arg, ValExpr):
|
|
continue
|
|
if not filter or filter(arg):
|
|
yield arg
|
|
for expr in arg.iter_exprs(filter=filter):
|
|
yield expr
|
|
|
|
def __hash__(self):
|
|
return hash(type(self)) + hash(self.signature) + hash(tuple(self.args))
|
|
|
|
def __eq__(self, other):
|
|
if self is other:
|
|
return True
|
|
if not type(other) == type(self):
|
|
return False
|
|
return self.signature == other.signature and self.args == other.args
|
|
|
|
|
|
class AggFunc(Func):
|
|
|
|
def __init__(self, *args):
|
|
Func.__init__(self, *args)
|
|
self.distinct = False
|
|
|
|
def validate(self, skip_nulls=False):
|
|
super(AggFunc, self).validate(skip_nulls=skip_nulls)
|
|
for arg in self.args:
|
|
if arg.contains_agg:
|
|
raise Exception('Aggregate functions may not contain other aggregates')
|
|
if self.contains_analytic:
|
|
raise Exception('Aggregate functions may not contain analytics')
|
|
|
|
|
|
class AnalyticFunc(Func):
|
|
|
|
HAS_IMPLICIT_WINDOW = False
|
|
SUPPORTS_WINDOWING = True
|
|
REQUIRES_ORDER_BY = False
|
|
|
|
def __init__(self, *args):
|
|
Func.__init__(self, *args)
|
|
self.partition_by_clause = None
|
|
self.order_by_clause = None
|
|
self.window_clause = None
|
|
|
|
def validate(self, skip_nulls=False):
|
|
super(AnalyticFunc, self).validate(skip_nulls=skip_nulls)
|
|
for arg in self.args:
|
|
if arg.contains_analytic:
|
|
raise Exception('Analytic functions may not contain other analytics')
|
|
|
|
|
|
class PartitionByClause(object):
|
|
|
|
def __init__(self, val_exprs):
|
|
self.val_exprs = val_exprs
|
|
|
|
|
|
class WindowClause(object):
|
|
|
|
def __init__(self, range_or_rows, start_boundary, end_boundary=None):
|
|
self.range_or_rows = range_or_rows
|
|
self.start_boundary = start_boundary
|
|
self.end_boundary = end_boundary
|
|
|
|
|
|
class WindowBoundary(object):
|
|
|
|
UNBOUNDED_PRECEDING = 'UNBOUNDED PRECEDING'
|
|
PRECEDING = 'PRECEDING'
|
|
CURRENT_ROW = 'CURRENT ROW'
|
|
FOLLOWING = 'FOLLOWING'
|
|
UNBOUNDED_FOLLOWING = 'UNBOUNDED FOLLOWING'
|
|
|
|
def __init__(self, boundary_type, val_expr=None):
|
|
self.boundary_type = boundary_type
|
|
self.val_expr = val_expr
|
|
|
|
# It's a lot of work to support this but it should be less error prone than explicitly
|
|
# listing each signature.
|
|
def create_func(name, returns=None, accepts=[], signatures=[], base_type=Func):
|
|
'''Convenience function for creating a function class. The class is put into the
|
|
global namespace just as though the class had been declared using the "class"
|
|
keyword.
|
|
|
|
The name of the class is "name". "base_type" can be used to specify the base class.
|
|
|
|
The signature(s) of the class can be defined in one of three ways. "returns" and
|
|
"accepts" can be used together but not in combination with "signatures".
|
|
|
|
1) "signatures" should be a list of lists. Each entry corresponds to a single
|
|
signature. Each item in the signature can be either an Arg or a DataType or
|
|
a list of the preceding two types. The first entry in the list is the return
|
|
type, the remainder are the input types. DataType is considered a placeholder
|
|
for all other base types (Char, Number, Boolean, Timestamp). If a signature
|
|
contains DataType, the entire signature will be replace with multiple
|
|
signatures, one for each base type. Number is also considered a placeholder
|
|
but the replacements will be the cross-product of (Int, Float, and Decimal) *
|
|
the number of Number's used, except that the return type is the maximum of
|
|
the input types. A function that accepts a subquery is represented by a list of
|
|
Arg or DataType.
|
|
|
|
Ex signatures:
|
|
[Int, Double]: Could be a signature for FLOOR
|
|
[Int, DataType]: Could be a signature for COUNT
|
|
=== [Int, Char] + [Int, Number] + [Int, Boolean] + ...
|
|
[Number, Number, Number]: Could be a signature for Multiply
|
|
=== ... + [Float, Int, Float] + ... (but not [Int, Float, Float])
|
|
[Boolean, DataType, [DataType]]: Could be a signature for In with a subquery
|
|
|
|
2) "returns" and "accepts" is equivalent to
|
|
signatures=[[returns, accepts[0], accepts[1], ..., accepts[n]]]
|
|
|
|
3) "accepts" is equivalent to
|
|
signatures=[[accepts[0], accepts[0], accepts[1], ..., accepts[n]]]
|
|
'''
|
|
if (returns or accepts) and signatures:
|
|
raise Exception('Cannot mix signature specification arguments')
|
|
|
|
type_name = base_type.__name__.replace('Func', '') + name
|
|
func = type(type_name, (base_type, ), {'_NAME': name, '_SIGNATURES': []})
|
|
globals()[type_name] = func
|
|
|
|
if signatures:
|
|
signatures = deepcopy(signatures)
|
|
|
|
if base_type == Func:
|
|
FUNCS.append(func)
|
|
if returns:
|
|
signatures = [Signature(func, returns)]
|
|
elif accepts:
|
|
signatures = [Signature(func, accepts[0])]
|
|
if accepts:
|
|
signatures[0].args.extend(accepts)
|
|
|
|
# Replace convenience inputs with proper types
|
|
for idx, signature in enumerate(signatures):
|
|
if not isinstance(signature, Signature):
|
|
signature = Signature(func, signature[0], *signature[1:])
|
|
signatures[idx] = signature
|
|
if isinstance(signature.return_type, Arg):
|
|
signature.return_type = signature.return_type.type
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if not isinstance(arg, Arg):
|
|
signature.args[arg_idx] = Arg(arg)
|
|
|
|
# Replace "DataType" args with actual types
|
|
non_wildcard_signatures = list()
|
|
for replacement_type in TYPES:
|
|
for signature_idx, signature in enumerate(signatures):
|
|
replacement_signature = None
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if arg.is_subquery:
|
|
for sub_idx, subtype in enumerate(arg.type):
|
|
if subtype == DataType:
|
|
if not replacement_signature:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx].type[sub_idx] = replacement_type
|
|
elif arg.type == DataType:
|
|
replacement_arg = deepcopy(arg)
|
|
replacement_arg.type = replacement_type
|
|
if not replacement_signature:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx] = replacement_arg
|
|
if signature.return_type == DataType:
|
|
if not replacement_signature:
|
|
raise Exception('Wildcard return type requires at least one wildcard input arg')
|
|
replacement_signature.return_type = replacement_type
|
|
if replacement_signature:
|
|
non_wildcard_signatures.append(replacement_signature)
|
|
else:
|
|
non_wildcard_signatures.append(signature)
|
|
# This signature did not contain any "DataType" args, remove it from the list
|
|
# so it isn't processed again.
|
|
del signatures[signature_idx]
|
|
|
|
# Replace "Number" args... Number wildcards work differently than DataType wildcards.
|
|
# foo(DataType, DataType) expands to foo(Boolean, Boolean), foo(Char, Char), etc
|
|
# but foo(Number, Number) expands to foo(Decimal, Decimal), foo(Decimal, Int), etc
|
|
# In other words, a cross product needs to be done for Number wildcards. If the return
|
|
# type is also "Number", then it will be replaced with the largest type of the input
|
|
# replacements. Ex, foo(Decimal, Int) would return Decimal.
|
|
|
|
# Find wildcard signatures
|
|
signatures = non_wildcard_signatures
|
|
wildcard_signatures = list()
|
|
for signature_idx, signature in enumerate(signatures):
|
|
is_wildcard = False
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if arg.is_subquery:
|
|
for subtype in arg.type:
|
|
if subtype == Number:
|
|
is_wildcard = True
|
|
break
|
|
elif arg.type == Number:
|
|
is_wildcard = True
|
|
if is_wildcard:
|
|
if signature.return_type == Number:
|
|
signature.return_type = (Number, Int)
|
|
wildcard_signatures.append(signature)
|
|
del signatures[signature_idx]
|
|
break
|
|
|
|
# Helper function to reduce code duplication
|
|
def update_return_type_and_append(
|
|
replacement_type,
|
|
replacement_signature,
|
|
wildcard_signatures):
|
|
if isinstance(replacement_signature.return_type, tuple):
|
|
replacement_signature.return_type = \
|
|
(Number, max(replacement_type, replacement_signature.return_type[1]))
|
|
wildcard_signatures.append(replacement_signature)
|
|
|
|
# Fully replace each wildcard one at a time so that a cross product is created
|
|
while wildcard_signatures:
|
|
signature = wildcard_signatures.pop()
|
|
is_wildcard = False
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
replacement_signature = None
|
|
if arg.is_subquery:
|
|
if any(ifilter(lambda type_: type_ == Number, arg.type)):
|
|
raise Exception('Number not accepted in subquery signatures')
|
|
elif arg.type == Number:
|
|
for replacement_type in [Decimal, Int, Float]:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx].type = replacement_type
|
|
is_wildcard = True
|
|
update_return_type_and_append(
|
|
replacement_type, replacement_signature, wildcard_signatures)
|
|
if is_wildcard:
|
|
break
|
|
if not is_wildcard:
|
|
if isinstance(signature.return_type, tuple):
|
|
signature.return_type = signature.return_type[1]
|
|
signatures.append(signature)
|
|
|
|
func._SIGNATURES = signatures
|
|
return func
|
|
|
|
|
|
def create_agg(name, returns=None, accepts=[], signatures=[]):
|
|
func = create_func(name, returns, accepts, signatures, AggFunc)
|
|
AGG_FUNCS.append(func)
|
|
return func
|
|
|
|
|
|
def create_analytic(
|
|
name,
|
|
returns=None,
|
|
accepts=[],
|
|
signatures=[],
|
|
require_order=False,
|
|
supports_window=True):
|
|
func = create_func(name, returns, accepts, signatures, AnalyticFunc)
|
|
func.REQUIRES_ORDER_BY = require_order
|
|
func.SUPPORTS_WINDOWING = supports_window
|
|
ANALYTIC_FUNCS.append(func)
|
|
return func
|
|
|
|
|
|
class CastFunc(Func):
|
|
"""
|
|
This function is used internally by the InsertStatementGenerator to cast ValExprs into
|
|
the proper exact types of columns.
|
|
|
|
Arguments:
|
|
val_expr: ValExpr to cast
|
|
type_: Type to cast ValExpr
|
|
"""
|
|
def __init__(self, val_expr, type_):
|
|
self.args = [val_expr, type_]
|
|
|
|
|
|
create_func('IsNull', returns=Boolean, accepts=[DataType])
|
|
create_func('IsNotNull', returns=Boolean, accepts=[DataType])
|
|
create_func('And', returns=Boolean, accepts=[Boolean, Boolean])
|
|
create_func('Or', returns=Boolean, accepts=[Boolean, Boolean])
|
|
create_func('Exists', returns=Boolean, accepts=[[DataType]])
|
|
create_func('NotExists', returns=Boolean, accepts=[[DataType]])
|
|
for func_name in ['In', 'NotIn']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(func_name, signatures=[
|
|
[Boolean, Boolean, [Boolean]],
|
|
[Boolean, Boolean, Boolean, Boolean],
|
|
[Boolean, Char, [Char]],
|
|
[Boolean, Char, Char, Char],
|
|
[Boolean, Decimal, [Decimal]],
|
|
[Boolean, Decimal, [Int]],
|
|
[Boolean, Decimal, Decimal, Decimal],
|
|
[Boolean, Decimal, Decimal, Int],
|
|
[Boolean, Decimal, Int, Decimal],
|
|
[Boolean, Int, [Decimal]],
|
|
[Boolean, Int, [Int]],
|
|
[Boolean, Int, Int, Int],
|
|
[Boolean, Int, Decimal, Int],
|
|
[Boolean, Int, Int, Decimal],
|
|
[Boolean, Timestamp, [Timestamp]],
|
|
[Boolean, Timestamp, Timestamp, Timestamp]])
|
|
for comparator in ['GreaterThan', 'LessThan']:
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Char, Char],
|
|
[Boolean, Number, Number],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
for comparator in ['GreaterThanOrEquals', 'LessThanOrEquals']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Char, Char],
|
|
[Boolean, Decimal, Decimal],
|
|
[Boolean, Decimal, Int],
|
|
[Boolean, Int, Decimal],
|
|
[Boolean, Int, Int],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
for comparator in ['Equals', 'NotEquals', 'IsNotDistinctFrom', 'IsNotDistinctFromOp',
|
|
'IsDistinctFrom']:
|
|
# Avoid equality comparison on FLOATs
|
|
func = create_func(comparator, signatures=[
|
|
[Boolean, Boolean, Boolean],
|
|
[Boolean, Char, Char],
|
|
[Boolean, Decimal, Decimal],
|
|
[Boolean, Decimal, Int],
|
|
[Boolean, Int, Decimal],
|
|
[Boolean, Int, Int],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
create_func('If', returns=DataType,
|
|
accepts=[Boolean, Arg(DataType, determines_signature=True), DataType])
|
|
|
|
# Don't allow + or - when using floats/doubles. This is done to avoid something like
|
|
# (10000.00919 - 10000) * 10000 which would lead to random values.
|
|
for operator in ['Plus', 'Minus']:
|
|
create_func(operator, signatures=[
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Int,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Int, determines_signature=True)]])
|
|
create_func('Multiply', signatures=[
|
|
[Number,
|
|
Arg(Number, determines_signature=True),
|
|
Arg(Number, determines_signature=True)]])
|
|
# Don't allow INT / INT, Postgresql results in an INT, but a FLOAT in most other databases
|
|
create_func('Divide', signatures=[
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Float,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Float, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Float, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Float,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Float, determines_signature=True)]])
|
|
|
|
create_func('Abs', signatures=[[Number, Arg(Number, determines_signature=True)]])
|
|
# Don't allow FLOAT/DOUBLE to become an INT (ie, an approximation to be treated as a
|
|
# precise value).
|
|
create_func('Floor', signatures=[[Decimal, Decimal], [Float, Float]])
|
|
create_func('Ceil', signatures=[[Decimal, Decimal], [Float, Float]])
|
|
|
|
# NULL handling in CONCAT differs between Impala and Postgresql
|
|
create_func('Concat',
|
|
accepts=[Arg(Char, can_be_null=False), Arg(Char, can_be_null=False)])
|
|
create_func('Trim', accepts=[Char])
|
|
create_func('Length', returns=Int, accepts=[Char])
|
|
|
|
for interval in ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second']:
|
|
create_func('Extract' + interval,
|
|
returns=Int, accepts=[Arg(Timestamp, can_be_null_literal=False)])
|
|
create_func(
|
|
'DateAdd' + interval,
|
|
returns=Timestamp,
|
|
# Determines signature in Postgresql
|
|
accepts=[Arg(Timestamp, determines_signature=True), Int])
|
|
|
|
create_func('Greatest', signatures=[
|
|
[Number,
|
|
Arg(Number, can_be_null=False, determines_signature=True),
|
|
Arg(Number, can_be_null=False, determines_signature=True)],
|
|
[Timestamp,
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True),
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True)]])
|
|
create_func('Least', signatures=[
|
|
[Number,
|
|
Arg(Number, can_be_null=False, determines_signature=True),
|
|
Arg(Number, can_be_null=False, determines_signature=True)],
|
|
[Timestamp,
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True),
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True)]])
|
|
create_func('Coalesce', signatures=[
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True)]])
|
|
|
|
# This is added so that query generation can assume that any return type can be
|
|
# produced by an aggregate or analytic with only one level of nesting.
|
|
# Ex: CAST(SUM(...) AS STRING)
|
|
create_func('CastAsChar', signatures=[[Char, Int]])
|
|
|
|
create_agg('Count', returns=Int, accepts=[Number])
|
|
create_agg('Max', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_agg('Min', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_agg('Sum', signatures=[
|
|
# FLOATs not allowed. See comment about Plus/Minus for info.
|
|
[Int, Arg(Int, determines_signature=True)],
|
|
[Decimal, Arg(Decimal, determines_signature=True)]])
|
|
create_agg('Avg', signatures=[
|
|
[Float, Arg(Int, determines_signature=True)],
|
|
[Decimal, Arg(Decimal, determines_signature=True)]])
|
|
|
|
create_analytic('Rank', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('DenseRank', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('RowNumber', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('Lead', require_order=True, supports_window=False, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(Int, require_constant=True, min_value=1)]])
|
|
create_analytic('Lag', require_order=True, supports_window=False, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(Int, require_constant=True, min_value=1)]])
|
|
create_analytic('FirstValue', require_order=True, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)]])
|
|
create_analytic('LastValue', require_order=True, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)]])
|
|
create_analytic('Max', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_analytic('Min', signatures=[[Number, Number], [Timestamp, Timestamp]])
|
|
create_analytic('Sum', signatures=[[Int, Int], [Decimal, Decimal]]) # FLOATs not allowed
|
|
create_analytic('Count', returns=Int, accepts=[Number])
|
|
create_analytic('Avg', returns=Float, accepts=[Number])
|