mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Python 3 changes list operators such as range, map, and filter to be lazy. Some code that expects the list operators to happen immediately will fail. e.g. Python 2: range(0,5) == [0,1,2,3,4] True Python 3: range(0,5) == [0,1,2,3,4] False The fix is to wrap locations with list(). i.e. Python 3: list(range(0,5)) == [0,1,2,3,4] True Since the base operators are now lazy, Python 3 also removes the old lazy versions (e.g. xrange, ifilter, izip, etc). This uses future's builtins package to convert the code to the Python 3 behavior (i.e. xrange -> future's builtins.range). Most of the changes were done via these futurize fixes: - libfuturize.fixes.fix_xrange_with_import - lib2to3.fixes.fix_map - lib2to3.fixes.fix_filter This eliminates the pylint warnings: - xrange-builtin - range-builtin-not-iterating - map-builtin-not-iterating - zip-builtin-not-iterating - filter-builtin-not-iterating - reduce-builtin - deprecated-itertools-function Testing: - Ran core job Change-Id: Ic7c082711f8eff451a1b5c085e97461c327edb5f Reviewed-on: http://gerrit.cloudera.org:8080/19589 Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com> Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
667 lines
25 KiB
Python
667 lines
25 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
from builtins import filter
|
|
from copy import deepcopy
|
|
|
|
from tests.comparison.common import ValExpr
|
|
from tests.comparison.db_types import (
|
|
Boolean,
|
|
Char,
|
|
DataType,
|
|
Decimal,
|
|
Float,
|
|
Int,
|
|
Number,
|
|
Timestamp,
|
|
TYPES)
|
|
|
|
AGG_FUNCS = list() # All aggregate functions will be added
|
|
ANALYTIC_FUNCS = list() # All analytic functions will be added
|
|
FUNCS = list() # All non-aggregate/analytic functions will be added
|
|
|
|
class Arg(object):
|
|
'''Represents an argument in a function signature.
|
|
|
|
data_type may be either a DataType or a list of DataTypes. A list is used to
|
|
represent a subquery.
|
|
|
|
If can_be_null is False, a NULL value should never be passed into the function
|
|
during execution. This is used to maintain consistency across databases. For example
|
|
if Impala and Postgresql both implement function foo but the results differ when
|
|
the args to foo are NULL, then this flag can be used to prevent NULL values.
|
|
|
|
If can_be_null_literal is False, the literal value NULL should never be an argument
|
|
to the function. This is provided to workaround problems involving function signature
|
|
resolution during execution. An alternative would be to CAST(NULL AS INT).
|
|
|
|
determines_signature is used to signify that this arg is used to determine the
|
|
signature during execution. This implies that the function has multiple signatures
|
|
with the same number of arguments and at least one of the "determines_signature"
|
|
arguments must be non-NULL in order to determine which signature to use during
|
|
execution. An example is "SELECT GREATEST(NULL, NULL)" would result in an error
|
|
during execution in Postgresql because the resulting data type could not be
|
|
determined. An alternative would be to ensure that each modeled function contains
|
|
the full set of possible signatures, then see if "foo(NULL)" would be ambiguous
|
|
and if so use "foo(CAST(NULL AS INT))" instead.
|
|
'''
|
|
|
|
def __init__(self,
|
|
data_type,
|
|
require_constant=False,
|
|
min_value=None,
|
|
can_be_null=True,
|
|
can_be_null_literal=True,
|
|
determines_signature=False):
|
|
self.type = data_type
|
|
self.require_constant = require_constant
|
|
self.min_value = min_value
|
|
self.can_be_null = can_be_null
|
|
self.can_be_null_literal = can_be_null_literal
|
|
self.determines_signature = determines_signature
|
|
|
|
@property
|
|
def is_subquery(self):
|
|
return isinstance(self.type, list)
|
|
|
|
def validate(self, expr, skip_nulls=False):
|
|
if not issubclass(expr.type, self.type):
|
|
raise Exception('Expr type is %s but expected %s' % (expr.type, self.type))
|
|
if self.require_constant and not expr.is_constant:
|
|
raise Exception('A constant is required')
|
|
if self.min_value is not None and expr.val < self.min_value:
|
|
raise Exception('Minumum value not met')
|
|
if skip_nulls and expr.is_constant and expr.val is None:
|
|
return
|
|
if expr.is_constant and expr.val is None and not self.can_be_null_literal:
|
|
raise Exception('A NULL literal is not allowed')
|
|
|
|
def __repr__(self):
|
|
_repr = 'Arg<type: '
|
|
if self.is_subquery:
|
|
_repr += 'subquery[' + ', '.join([type_.__name__ for type_ in self.type]) + ']'
|
|
else:
|
|
_repr += self.type.__name__
|
|
if self.require_constant:
|
|
_repr += ', constant: True'
|
|
if self.min_value:
|
|
_repr += ', min: %s' % self.min_value
|
|
_repr += '>'
|
|
return _repr
|
|
|
|
|
|
class Signature(object):
|
|
|
|
def __init__(self, func, return_type, *args):
|
|
self.func = func
|
|
self.return_type = return_type
|
|
self.args = list(args)
|
|
|
|
def __repr__(self):
|
|
return "Signature<func: {func}, returns: {rt}, args: {arg_list}>".format(
|
|
func=repr(self.func), rt=repr(self.return_type),
|
|
arg_list=", ".join([repr(arg) for arg in self.args]))
|
|
|
|
@property
|
|
def input_types(self):
|
|
return self.args[1:]
|
|
|
|
|
|
class Func(ValExpr):
|
|
'''Base class for functions'''
|
|
|
|
_NAME = None # Helper for the classmethod name()
|
|
_SIGNATURES = list() # Helper for the classmethod signatures()
|
|
|
|
@classmethod
|
|
def name(cls):
|
|
'''Returns the name of the function. Multiple functions may have the same name.
|
|
For example, COUNT will have a separate Func class for the analytic and aggregate
|
|
versions but both will have the same value of name().
|
|
'''
|
|
return cls.__name__ if cls._NAME is None else cls._NAME
|
|
|
|
@classmethod
|
|
def signatures(cls):
|
|
'''Returns the available signatures for the function. Varargs are not supported, a
|
|
subset of possible signatures must be chosen.
|
|
'''
|
|
return cls._SIGNATURES
|
|
|
|
@classmethod
|
|
def create_from_args(cls, *val_exprs):
|
|
'''Constructor for instantiating from values. The return types of the exprs will be
|
|
inspected and used to find the function signature. If no signature can be found
|
|
an error will be raised.
|
|
'''
|
|
for signature in cls.signatures():
|
|
if len(signature.args) != len(val_exprs):
|
|
continue
|
|
for idx, arg in enumerate(val_exprs):
|
|
if not issubclass(arg.type, signature.args[idx].type):
|
|
break
|
|
else:
|
|
break
|
|
else:
|
|
raise Exception('No signature matches the given arguments: %s' % (val_exprs, ))
|
|
return cls(signature, *val_exprs)
|
|
|
|
def __init__(self, signature, *val_exprs):
|
|
'''"signature" should be one of the available signatures at the class level and
|
|
signifies which function call this instance is intended to represent.
|
|
'''
|
|
if signature not in self.signatures():
|
|
raise Exception('Unknown signature: %s' % (signature, ))
|
|
self.signature = signature
|
|
if val_exprs:
|
|
self.args = list(val_exprs)
|
|
else:
|
|
self.args = list()
|
|
for arg in signature.args:
|
|
if arg.is_subquery:
|
|
self.args.append([subtype(None) for subtype in arg.type])
|
|
else:
|
|
self.args.append(arg.type(arg.min_value))
|
|
|
|
@property
|
|
def exact_type(self):
|
|
return self.signature.return_type
|
|
|
|
def validate(self, skip_nulls=False):
|
|
if not len(self.args) == len(self.signature.args):
|
|
raise Exception('Signature length mismatch')
|
|
for idx, signature_arg in enumerate(self.signature.args):
|
|
signature_arg.validate(self.args[idx], skip_nulls=skip_nulls)
|
|
|
|
def contains_subquery(self):
|
|
for signature_arg in self.signature.args:
|
|
if signature_arg.is_subquery:
|
|
return True
|
|
return any(self.iter_exprs(lambda expr: expr.is_func and expr.contains_subquery))
|
|
|
|
def iter_exprs(self, filter=None):
|
|
'''Returns an iterator over all val_exprs including those nested within this
|
|
function's args.
|
|
'''
|
|
for arg in self.args:
|
|
if not isinstance(arg, ValExpr):
|
|
continue
|
|
if not filter or filter(arg):
|
|
yield arg
|
|
for expr in arg.iter_exprs(filter=filter):
|
|
yield expr
|
|
|
|
def __hash__(self):
|
|
return hash(type(self)) + hash(self.signature) + hash(tuple(self.args))
|
|
|
|
def __eq__(self, other):
|
|
if self is other:
|
|
return True
|
|
if not type(other) == type(self):
|
|
return False
|
|
return self.signature == other.signature and self.args == other.args
|
|
|
|
|
|
class AggFunc(Func):
|
|
|
|
def __init__(self, *args):
|
|
Func.__init__(self, *args)
|
|
self.distinct = False
|
|
|
|
def validate(self, skip_nulls=False):
|
|
super(AggFunc, self).validate(skip_nulls=skip_nulls)
|
|
for arg in self.args:
|
|
if arg.contains_agg:
|
|
raise Exception('Aggregate functions may not contain other aggregates')
|
|
if self.contains_analytic:
|
|
raise Exception('Aggregate functions may not contain analytics')
|
|
|
|
|
|
class AnalyticFunc(Func):
|
|
|
|
HAS_IMPLICIT_WINDOW = False
|
|
SUPPORTS_WINDOWING = True
|
|
REQUIRES_ORDER_BY = False
|
|
|
|
def __init__(self, *args):
|
|
Func.__init__(self, *args)
|
|
self.partition_by_clause = None
|
|
self.order_by_clause = None
|
|
self.window_clause = None
|
|
|
|
def validate(self, skip_nulls=False):
|
|
super(AnalyticFunc, self).validate(skip_nulls=skip_nulls)
|
|
for arg in self.args:
|
|
if arg.contains_analytic:
|
|
raise Exception('Analytic functions may not contain other analytics')
|
|
|
|
|
|
class PartitionByClause(object):
|
|
|
|
def __init__(self, val_exprs):
|
|
self.val_exprs = val_exprs
|
|
|
|
|
|
class WindowClause(object):
|
|
|
|
def __init__(self, range_or_rows, start_boundary, end_boundary=None):
|
|
self.range_or_rows = range_or_rows
|
|
self.start_boundary = start_boundary
|
|
self.end_boundary = end_boundary
|
|
|
|
|
|
class WindowBoundary(object):
|
|
|
|
UNBOUNDED_PRECEDING = 'UNBOUNDED PRECEDING'
|
|
PRECEDING = 'PRECEDING'
|
|
CURRENT_ROW = 'CURRENT ROW'
|
|
FOLLOWING = 'FOLLOWING'
|
|
UNBOUNDED_FOLLOWING = 'UNBOUNDED FOLLOWING'
|
|
|
|
def __init__(self, boundary_type, val_expr=None):
|
|
self.boundary_type = boundary_type
|
|
self.val_expr = val_expr
|
|
|
|
# It's a lot of work to support this but it should be less error prone than explicitly
|
|
# listing each signature.
|
|
def create_func(name, returns=None, accepts=[], signatures=[], base_type=Func):
|
|
'''Convenience function for creating a function class. The class is put into the
|
|
global namespace just as though the class had been declared using the "class"
|
|
keyword.
|
|
|
|
The name of the class is "name". "base_type" can be used to specify the base class.
|
|
|
|
The signature(s) of the class can be defined in one of three ways. "returns" and
|
|
"accepts" can be used together but not in combination with "signatures".
|
|
|
|
1) "signatures" should be a list of lists. Each entry corresponds to a single
|
|
signature. Each item in the signature can be either an Arg or a DataType or
|
|
a list of the preceding two types. The first entry in the list is the return
|
|
type, the remainder are the input types. DataType is considered a placeholder
|
|
for all other base types (Char, Number, Boolean, Timestamp). If a signature
|
|
contains DataType, the entire signature will be replace with multiple
|
|
signatures, one for each base type. Number is also considered a placeholder
|
|
but the replacements will be the cross-product of (Int, Float, and Decimal) *
|
|
the number of Number's used, except that the return type is the maximum of
|
|
the input types. A function that accepts a subquery is represented by a list of
|
|
Arg or DataType.
|
|
|
|
Ex signatures:
|
|
[Int, Double]: Could be a signature for FLOOR
|
|
[Int, DataType]: Could be a signature for COUNT
|
|
=== [Int, Char] + [Int, Number] + [Int, Boolean] + ...
|
|
[Number, Number, Number]: Could be a signature for Multiply
|
|
=== ... + [Float, Int, Float] + ... (but not [Int, Float, Float])
|
|
[Boolean, DataType, [DataType]]: Could be a signature for In with a subquery
|
|
|
|
2) "returns" and "accepts" is equivalent to
|
|
signatures=[[returns, accepts[0], accepts[1], ..., accepts[n]]]
|
|
|
|
3) "accepts" is equivalent to
|
|
signatures=[[accepts[0], accepts[0], accepts[1], ..., accepts[n]]]
|
|
'''
|
|
if (returns or accepts) and signatures:
|
|
raise Exception('Cannot mix signature specification arguments')
|
|
|
|
type_name = base_type.__name__.replace('Func', '') + name
|
|
func = type(type_name, (base_type, ), {'_NAME': name, '_SIGNATURES': []})
|
|
globals()[type_name] = func
|
|
|
|
if signatures:
|
|
signatures = deepcopy(signatures)
|
|
|
|
if base_type == Func:
|
|
FUNCS.append(func)
|
|
if returns:
|
|
signatures = [Signature(func, returns)]
|
|
elif accepts:
|
|
signatures = [Signature(func, accepts[0])]
|
|
if accepts:
|
|
signatures[0].args.extend(accepts)
|
|
|
|
# Replace convenience inputs with proper types
|
|
for idx, signature in enumerate(signatures):
|
|
if not isinstance(signature, Signature):
|
|
signature = Signature(func, signature[0], *signature[1:])
|
|
signatures[idx] = signature
|
|
if isinstance(signature.return_type, Arg):
|
|
signature.return_type = signature.return_type.type
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if not isinstance(arg, Arg):
|
|
signature.args[arg_idx] = Arg(arg)
|
|
|
|
# Replace "DataType" args with actual types
|
|
non_wildcard_signatures = list()
|
|
for replacement_type in TYPES:
|
|
for signature_idx, signature in enumerate(signatures):
|
|
replacement_signature = None
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if arg.is_subquery:
|
|
for sub_idx, subtype in enumerate(arg.type):
|
|
if subtype == DataType:
|
|
if not replacement_signature:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx].type[sub_idx] = replacement_type
|
|
elif arg.type == DataType:
|
|
replacement_arg = deepcopy(arg)
|
|
replacement_arg.type = replacement_type
|
|
if not replacement_signature:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx] = replacement_arg
|
|
if signature.return_type == DataType:
|
|
if not replacement_signature:
|
|
raise Exception('Wildcard return type requires at least one wildcard input arg')
|
|
replacement_signature.return_type = replacement_type
|
|
if replacement_signature:
|
|
non_wildcard_signatures.append(replacement_signature)
|
|
else:
|
|
non_wildcard_signatures.append(signature)
|
|
# This signature did not contain any "DataType" args, remove it from the list
|
|
# so it isn't processed again.
|
|
del signatures[signature_idx]
|
|
|
|
# Replace "Number" args... Number wildcards work differently than DataType wildcards.
|
|
# foo(DataType, DataType) expands to foo(Boolean, Boolean), foo(Char, Char), etc
|
|
# but foo(Number, Number) expands to foo(Decimal, Decimal), foo(Decimal, Int), etc
|
|
# In other words, a cross product needs to be done for Number wildcards. If the return
|
|
# type is also "Number", then it will be replaced with the largest type of the input
|
|
# replacements. Ex, foo(Decimal, Int) would return Decimal.
|
|
|
|
# Find wildcard signatures
|
|
signatures = non_wildcard_signatures
|
|
wildcard_signatures = list()
|
|
for signature_idx, signature in enumerate(signatures):
|
|
is_wildcard = False
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if arg.is_subquery:
|
|
for subtype in arg.type:
|
|
if subtype == Number:
|
|
is_wildcard = True
|
|
break
|
|
elif arg.type == Number:
|
|
is_wildcard = True
|
|
if is_wildcard:
|
|
if signature.return_type == Number:
|
|
signature.return_type = (Number, Int)
|
|
wildcard_signatures.append(signature)
|
|
del signatures[signature_idx]
|
|
break
|
|
|
|
# Helper function to reduce code duplication
|
|
def update_return_type_and_append(
|
|
replacement_type,
|
|
replacement_signature,
|
|
wildcard_signatures):
|
|
if isinstance(replacement_signature.return_type, tuple):
|
|
replacement_signature.return_type = \
|
|
(Number, max(replacement_type, replacement_signature.return_type[1]))
|
|
wildcard_signatures.append(replacement_signature)
|
|
|
|
# Fully replace each wildcard one at a time so that a cross product is created
|
|
while wildcard_signatures:
|
|
signature = wildcard_signatures.pop()
|
|
is_wildcard = False
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
replacement_signature = None
|
|
if arg.is_subquery:
|
|
if any(filter(lambda type_: type_ == Number, arg.type)):
|
|
raise Exception('Number not accepted in subquery signatures')
|
|
elif arg.type == Number:
|
|
for replacement_type in [Decimal, Int, Float]:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx].type = replacement_type
|
|
is_wildcard = True
|
|
update_return_type_and_append(
|
|
replacement_type, replacement_signature, wildcard_signatures)
|
|
if is_wildcard:
|
|
break
|
|
if not is_wildcard:
|
|
if isinstance(signature.return_type, tuple):
|
|
signature.return_type = signature.return_type[1]
|
|
signatures.append(signature)
|
|
|
|
func._SIGNATURES = signatures
|
|
return func
|
|
|
|
|
|
def create_agg(name, returns=None, accepts=[], signatures=[]):
|
|
func = create_func(name, returns, accepts, signatures, AggFunc)
|
|
AGG_FUNCS.append(func)
|
|
return func
|
|
|
|
|
|
def create_analytic(
|
|
name,
|
|
returns=None,
|
|
accepts=[],
|
|
signatures=[],
|
|
require_order=False,
|
|
supports_window=True):
|
|
func = create_func(name, returns, accepts, signatures, AnalyticFunc)
|
|
func.REQUIRES_ORDER_BY = require_order
|
|
func.SUPPORTS_WINDOWING = supports_window
|
|
ANALYTIC_FUNCS.append(func)
|
|
return func
|
|
|
|
|
|
class CastFunc(Func):
|
|
"""
|
|
This function is used internally by the InsertStatementGenerator to cast ValExprs into
|
|
the proper exact types of columns.
|
|
|
|
Arguments:
|
|
val_expr: ValExpr to cast
|
|
type_: Type to cast ValExpr
|
|
"""
|
|
def __init__(self, val_expr, type_):
|
|
self.args = [val_expr, type_]
|
|
|
|
|
|
create_func('IsNull', returns=Boolean, accepts=[DataType])
|
|
create_func('IsNotNull', returns=Boolean, accepts=[DataType])
|
|
create_func('And', returns=Boolean, accepts=[Boolean, Boolean])
|
|
create_func('Or', returns=Boolean, accepts=[Boolean, Boolean])
|
|
create_func('Exists', returns=Boolean, accepts=[[DataType]])
|
|
create_func('NotExists', returns=Boolean, accepts=[[DataType]])
|
|
for func_name in ['In', 'NotIn']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(func_name, signatures=[
|
|
[Boolean, Boolean, [Boolean]],
|
|
[Boolean, Boolean, Boolean, Boolean],
|
|
[Boolean, Char, [Char]],
|
|
[Boolean, Char, Char, Char],
|
|
[Boolean, Decimal, [Decimal]],
|
|
[Boolean, Decimal, [Int]],
|
|
[Boolean, Decimal, Decimal, Decimal],
|
|
[Boolean, Decimal, Decimal, Int],
|
|
[Boolean, Decimal, Int, Decimal],
|
|
[Boolean, Int, [Decimal]],
|
|
[Boolean, Int, [Int]],
|
|
[Boolean, Int, Int, Int],
|
|
[Boolean, Int, Decimal, Int],
|
|
[Boolean, Int, Int, Decimal],
|
|
[Boolean, Timestamp, [Timestamp]],
|
|
[Boolean, Timestamp, Timestamp, Timestamp]])
|
|
for comparator in ['GreaterThan', 'LessThan']:
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Char, Char],
|
|
[Boolean, Number, Number],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
for comparator in ['GreaterThanOrEquals', 'LessThanOrEquals']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Char, Char],
|
|
[Boolean, Decimal, Decimal],
|
|
[Boolean, Decimal, Int],
|
|
[Boolean, Int, Decimal],
|
|
[Boolean, Int, Int],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
for comparator in ['Equals', 'NotEquals', 'IsNotDistinctFrom', 'IsNotDistinctFromOp',
|
|
'IsDistinctFrom']:
|
|
# Avoid equality comparison on FLOATs
|
|
func = create_func(comparator, signatures=[
|
|
[Boolean, Boolean, Boolean],
|
|
[Boolean, Char, Char],
|
|
[Boolean, Decimal, Decimal],
|
|
[Boolean, Decimal, Int],
|
|
[Boolean, Int, Decimal],
|
|
[Boolean, Int, Int],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
create_func('If', returns=DataType,
|
|
accepts=[Boolean, Arg(DataType, determines_signature=True), DataType])
|
|
|
|
# Don't allow + or - when using floats/doubles. This is done to avoid something like
|
|
# (10000.00919 - 10000) * 10000 which would lead to random values.
|
|
for operator in ['Plus', 'Minus']:
|
|
create_func(operator, signatures=[
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Int,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Int, determines_signature=True)]])
|
|
create_func('Multiply', signatures=[
|
|
[Number,
|
|
Arg(Number, determines_signature=True),
|
|
Arg(Number, determines_signature=True)]])
|
|
# Don't allow INT / INT, Postgresql results in an INT, but a FLOAT in most other databases
|
|
create_func('Divide', signatures=[
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Float,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Float, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Float, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Float,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Float, determines_signature=True)]])
|
|
|
|
create_func('Abs', signatures=[[Number, Arg(Number, determines_signature=True)]])
|
|
# Don't allow FLOAT/DOUBLE to become an INT (ie, an approximation to be treated as a
|
|
# precise value).
|
|
create_func('Floor', signatures=[[Decimal, Decimal], [Float, Float]])
|
|
create_func('Ceil', signatures=[[Decimal, Decimal], [Float, Float]])
|
|
|
|
# NULL handling in CONCAT differs between Impala and Postgresql
|
|
create_func('Concat',
|
|
accepts=[Arg(Char, can_be_null=False), Arg(Char, can_be_null=False)])
|
|
create_func('Trim', accepts=[Char])
|
|
create_func('Length', returns=Int, accepts=[Char])
|
|
|
|
# In order to use the levenshtein() function in Postgres this needs to be run:
|
|
# CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
|
create_func('Levenshtein', returns=Int, accepts=[Arg(Char), Arg(Char)])
|
|
|
|
for interval in ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second']:
|
|
create_func('Extract' + interval,
|
|
returns=Int, accepts=[Arg(Timestamp, can_be_null_literal=False)])
|
|
create_func(
|
|
'DateAdd' + interval,
|
|
returns=Timestamp,
|
|
# Determines signature in Postgresql
|
|
accepts=[Arg(Timestamp, determines_signature=True), Int])
|
|
|
|
create_func('Greatest', signatures=[
|
|
[Number,
|
|
Arg(Number, can_be_null=False, determines_signature=True),
|
|
Arg(Number, can_be_null=False, determines_signature=True)],
|
|
[Timestamp,
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True),
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True)]])
|
|
create_func('Least', signatures=[
|
|
[Number,
|
|
Arg(Number, can_be_null=False, determines_signature=True),
|
|
Arg(Number, can_be_null=False, determines_signature=True)],
|
|
[Timestamp,
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True),
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True)]])
|
|
create_func('Coalesce', signatures=[
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True)]])
|
|
|
|
# This is added so that query generation can assume that any return type can be
|
|
# produced by an aggregate or analytic with only one level of nesting.
|
|
# Ex: CAST(SUM(...) AS STRING)
|
|
create_func('CastAsChar', signatures=[[Char, Int]])
|
|
|
|
create_agg('Count', returns=Int, accepts=[Number])
|
|
create_agg('Max', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_agg('Min', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_agg('Sum', signatures=[
|
|
# FLOATs not allowed. See comment about Plus/Minus for info.
|
|
[Int, Arg(Int, determines_signature=True)],
|
|
[Decimal, Arg(Decimal, determines_signature=True)]])
|
|
create_agg('Avg', signatures=[
|
|
[Float, Arg(Int, determines_signature=True)],
|
|
[Decimal, Arg(Decimal, determines_signature=True)]])
|
|
|
|
create_analytic('Rank', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('DenseRank', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('RowNumber', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('Lead', require_order=True, supports_window=False, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(Int, require_constant=True, min_value=1)]])
|
|
create_analytic('Lag', require_order=True, supports_window=False, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(Int, require_constant=True, min_value=1)]])
|
|
create_analytic('FirstValue', require_order=True, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)]])
|
|
create_analytic('LastValue', require_order=True, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)]])
|
|
create_analytic('Max', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_analytic('Min', signatures=[[Number, Number], [Timestamp, Timestamp]])
|
|
create_analytic('Sum', signatures=[[Int, Int], [Decimal, Decimal]]) # FLOATs not allowed
|
|
create_analytic('Count', returns=Int, accepts=[Number])
|
|
create_analytic('Avg', returns=Float, accepts=[Number])
|