mirror of
https://github.com/apache/impala.git
synced 2026-01-17 12:00:29 -05:00
Summary of changes:
1) Simplified type system. The old system was overly complicated for
the task of query generation. The modeling of types used to mirror
the types used in Impala. For simplicity, new system only uses a
subset of types, Boolean, Char, Decimal, Float, Int, and Timestamp.
2) Functions now have fully typed signatures. Previously you had to
know which functions accepted which inputs, now arbitrary
permutations of functions can be generated. The chance of being
able to add a new function without needing to change the query
generation logic is much higher now.
3) Query generation profiles. The randomness of the previous version
was hardcoded in various places in throughout the query generator.
Now there is a profile to determine which SQL features should be
used. There is still a lot of room for improvement in terms of
intuitiveness and documentation for configuring the profiles.
4) Greater diversity of queries. Besides the function permutations,
various restrictions to simplify query generation have been
removed. Also constants are used in queries.
5) Eliminate spinning and infinite loops. Also the old version would
sometimes "hope" that a generated SQL element would be compatible
with the context and if not, it would try again which would lead
to noticeable spinning and/or infinite loops.
6) Catchup with Impala 2.0 features: subqueries, analytics, and
Char/VarChar.
Change-Id: Ia25f4e85d6a06f7958a906aa42d9f90d63675bc0
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5640
Reviewed-by: Casey Ching <casey@cloudera.com>
Tested-by: jenkins
638 lines
24 KiB
Python
638 lines
24 KiB
Python
# Copyright (c) 2014 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from copy import deepcopy
|
|
from itertools import ifilter
|
|
|
|
from tests.comparison.common import ValExpr
|
|
from tests.comparison.types import (
|
|
Boolean,
|
|
Char,
|
|
DataType,
|
|
Decimal,
|
|
Float,
|
|
Int,
|
|
Number,
|
|
Timestamp,
|
|
TYPES)
|
|
|
|
AGG_FUNCS = list() # All aggregate functions will be added
|
|
ANALYTIC_FUNCS = list() # All analytic functions will be added
|
|
FUNCS = list() # All non-aggregate/analytic functions will be added
|
|
|
|
class Arg(object):
|
|
'''Represents an argument in a function signature.
|
|
|
|
data_type may be either a DataType or a list of DataTypes. A list is used to
|
|
represent a subquery.
|
|
|
|
If can_be_null is False, a NULL value should never be passed into the function
|
|
during execution. This is used to maintain consistency across databases. For example
|
|
if Impala and Postgresql both implement function foo but the results differ when
|
|
the args to foo are NULL, then this flag can be used to prevent NULL values.
|
|
|
|
If can_be_null_literal is False, the literal value NULL should never be an argument
|
|
to the function. This is provided to workaround problems involving function signature
|
|
resolution during execution. An alternative would be to CAST(NULL AS INT).
|
|
|
|
determines_signature is used to signify that this arg is used to determine the
|
|
signature during execution. This implies that the function has multiple signatures
|
|
with the same number of arguments and at least one of the "determines_signature"
|
|
arguments must be non-NULL in order to determine which signature to use during
|
|
execution. An example is "SELECT GREATEST(NULL, NULL)" would result in an error
|
|
during execution in Postgresql because the resulting data type could not be
|
|
determined. An alternative would be to ensure that each modeled function contains
|
|
the full set of possible signatures, then see if "foo(NULL)" would be ambiguous
|
|
and if so use "foo(CAST(NULL AS INT))" instead.
|
|
'''
|
|
|
|
def __init__(self,
|
|
data_type,
|
|
require_constant=False,
|
|
min_value=None,
|
|
can_be_null=True,
|
|
can_be_null_literal=True,
|
|
determines_signature=False):
|
|
self.type = data_type
|
|
self.require_constant = require_constant
|
|
self.min_value = min_value
|
|
self.can_be_null = can_be_null
|
|
self.can_be_null_literal = can_be_null_literal
|
|
self.determines_signature = determines_signature
|
|
|
|
@property
|
|
def is_subquery(self):
|
|
return isinstance(self.type, list)
|
|
|
|
def validate(self, expr, skip_nulls=False):
|
|
if not issubclass(expr.type, self.type):
|
|
raise Exception('Expr type is %s but expected %s' % (expr.type, self.type))
|
|
if self.require_constant and not expr.is_constant:
|
|
raise Exception('A constant is required')
|
|
if self.min_value is not None and expr.val < self.min_value:
|
|
raise Exception('Minumum value not met')
|
|
if skip_nulls and expr.is_constant and expr.val is None:
|
|
return
|
|
if expr.is_constant and expr.val is None and not self.can_be_null_literal:
|
|
raise Exception('A NULL literal is not allowed')
|
|
|
|
def __repr__(self):
|
|
_repr = 'Arg<type: '
|
|
if self.is_subquery:
|
|
_repr += 'subquery[' + ', '.join([type_.__name__ for type_ in self.type]) + ']'
|
|
else:
|
|
_repr += self.type.__name__
|
|
if self.require_constant:
|
|
_repr += ', constant: True'
|
|
if self.min_value:
|
|
_repr += ', min: %s' % self.min_value
|
|
_repr += '>'
|
|
return _repr
|
|
|
|
|
|
class Signature(object):
|
|
|
|
def __init__(self, func, return_type, *args):
|
|
self.func = func
|
|
self.return_type = return_type
|
|
self.args = list(args)
|
|
|
|
@property
|
|
def input_types(self):
|
|
return self.args[1:]
|
|
|
|
|
|
class Func(ValExpr):
|
|
'''Base class for functions'''
|
|
|
|
_NAME = None # Helper for the classmethod name()
|
|
_SIGNATURES = list() # Helper for the classmethod signatures()
|
|
|
|
@classmethod
|
|
def name(cls):
|
|
'''Returns the name of the function. Multiple functions may have the same name.
|
|
For example, COUNT will have a separate Func class for the analytic and aggregate
|
|
versions but both will have the same value of name().
|
|
'''
|
|
return cls.__name__ if cls._NAME is None else cls._NAME
|
|
|
|
@classmethod
|
|
def signatures(cls):
|
|
'''Returns the available signatures for the function. Varargs are not supported, a
|
|
subset of possible signatures must be chosen.
|
|
'''
|
|
return cls._SIGNATURES
|
|
|
|
@classmethod
|
|
def create_from_args(cls, *val_exprs):
|
|
'''Constructor for instantiating from values. The return types of the exprs will be
|
|
inspected and used to find the function signature. If no signature can be found
|
|
an error will be raised.
|
|
'''
|
|
for signature in cls.signatures():
|
|
if len(signature.args) != len(val_exprs):
|
|
continue
|
|
for idx, arg in enumerate(val_exprs):
|
|
if not issubclass(arg.type, signature.args[idx].type):
|
|
break
|
|
else:
|
|
break
|
|
else:
|
|
raise Exception('No signature matches the given arguments: %s' % (val_exprs, ))
|
|
return cls(signature, *val_exprs)
|
|
|
|
def __init__(self, signature, *val_exprs):
|
|
'''"signature" should be one of the available signatures at the class level and
|
|
signifies which function call this instance is intended to represent.
|
|
'''
|
|
if signature not in self.signatures():
|
|
raise Exception('Unknown signature: %s' % (signature, ))
|
|
self.signature = signature
|
|
if val_exprs:
|
|
self.args = list(val_exprs)
|
|
else:
|
|
self.args = list()
|
|
for arg in signature.args:
|
|
if arg.is_subquery:
|
|
self.args.append([subtype(None) for subtype in arg.type])
|
|
else:
|
|
self.args.append(arg.type(arg.min_value))
|
|
|
|
@property
|
|
def exact_type(self):
|
|
return self.signature.return_type
|
|
|
|
def validate(self, skip_nulls=False):
|
|
if not len(self.args) == len(self.signature.args):
|
|
raise Exception('Signature length mismatch')
|
|
for idx, signature_arg in enumerate(self.signature.args):
|
|
signature_arg.validate(self.args[idx], skip_nulls=skip_nulls)
|
|
|
|
def contains_subquery(self):
|
|
for signature_arg in self.signature.args:
|
|
if signature_arg.is_subquery:
|
|
return True
|
|
return any(self.iter_exprs(lambda expr: expr.is_func and expr.contains_subquery))
|
|
|
|
def iter_exprs(self, filter=None):
|
|
'''Returns an iterator over all val_exprs including those nested within this
|
|
function's args.
|
|
'''
|
|
for arg in self.args:
|
|
if not isinstance(arg, ValExpr):
|
|
continue
|
|
if not filter or filter(arg):
|
|
yield arg
|
|
for expr in arg.iter_exprs(filter=filter):
|
|
yield expr
|
|
|
|
def __hash__(self):
|
|
return hash(type(self)) + hash(self.signature) + hash(tuple(self.args))
|
|
|
|
def __eq__(self, other):
|
|
if self is other:
|
|
return True
|
|
if not type(other) == type(self):
|
|
return False
|
|
return self.signature == other.signature and self.args == other.args
|
|
|
|
|
|
class AggFunc(Func):
|
|
|
|
def __init__(self, *args):
|
|
Func.__init__(self, *args)
|
|
self.distinct = False
|
|
|
|
def validate(self, skip_nulls=False):
|
|
super(AggFunc, self).validate(skip_nulls=skip_nulls)
|
|
for arg in self.args:
|
|
if arg.contains_agg:
|
|
raise Exception('Aggregate functions may not contain other aggregates')
|
|
if self.contains_analytic:
|
|
raise Exception('Aggregate functions may not contain analytics')
|
|
|
|
|
|
class AnalyticFunc(Func):
|
|
|
|
HAS_IMPLICIT_WINDOW = False
|
|
SUPPORTS_WINDOWING = True
|
|
REQUIRES_ORDER_BY = False
|
|
|
|
def __init__(self, *args):
|
|
Func.__init__(self, *args)
|
|
self.partition_by_clause = None
|
|
self.order_by_clause = None
|
|
self.window_clause = None
|
|
|
|
def validate(self, skip_nulls=False):
|
|
super(AnalyticFunc, self).validate(skip_nulls=skip_nulls)
|
|
for arg in self.args:
|
|
if arg.contains_analytic:
|
|
raise Exception('Analytic functions may not contain other analytics')
|
|
|
|
|
|
class PartitionByClause(object):
|
|
|
|
def __init__(self, val_exprs):
|
|
self.val_exprs = val_exprs
|
|
|
|
|
|
class WindowClause(object):
|
|
|
|
def __init__(self, range_or_rows, start_boundary, end_boundary=None):
|
|
self.range_or_rows = range_or_rows
|
|
self.start_boundary = start_boundary
|
|
self.end_boundary = end_boundary
|
|
|
|
|
|
class WindowBoundary(object):
|
|
|
|
UNBOUNDED_PRECEDING = 'UNBOUNDED PRECEDING'
|
|
PRECEDING = 'PRECEDING'
|
|
CURRENT_ROW = 'CURRENT ROW'
|
|
FOLLOWING = 'FOLLOWING'
|
|
UNBOUNDED_FOLLOWING = 'UNBOUNDED FOLLOWING'
|
|
|
|
def __init__(self, boundary_type, val_expr=None):
|
|
self.boundary_type = boundary_type
|
|
self.val_expr = val_expr
|
|
|
|
# It's a lot of work to support this but it should be less error prone than explicitly
|
|
# listing each signature.
|
|
def create_func(name, returns=None, accepts=[], signatures=[], base_type=Func):
|
|
'''Convenience function for creating a function class. The class is put into the
|
|
global namespace just as though the class had been declared using the "class"
|
|
keyword.
|
|
|
|
The name of the class is "name". "base_type" can be used to specify the base class.
|
|
|
|
The signature(s) of the class can be defined in one of three ways. "returns" and
|
|
"accepts" can be used together but not in combination with "signatures".
|
|
|
|
1) "signatures" should be a list of lists. Each entry corresponds to a single
|
|
signature. Each item in the signature can be either an Arg or a DataType or
|
|
a list of the preceding two types. The first entry in the list is the return
|
|
type, the remainder are the input types. DataType is considered a placeholder
|
|
for all other base types (Char, Number, Boolean, Timestamp). If a signature
|
|
contains DataType, the entire signature will be replace with multiple
|
|
signatures, one for each base type. Number is also considered a placeholder
|
|
but the replacements will be the cross-product of (Int, Float, and Decimal) *
|
|
the number of Number's used, except that the return type is the maximum of
|
|
the input types. A function that accepts a subquery is represented by a list of
|
|
Arg or DataType.
|
|
|
|
Ex signatures:
|
|
[Int, Double]: Could be a signature for FLOOR
|
|
[Int, DataType]: Could be a signature for COUNT
|
|
=== [Int, Char] + [Int, Number] + [Int, Boolean] + ...
|
|
[Number, Number, Number]: Could be a signature for Multiply
|
|
=== ... + [Float, Int, Float] + ... (but not [Int, Float, Float])
|
|
[Boolean, DataType, [DataType]]: Could be a signature for In with a subquery
|
|
|
|
2) "returns" and "accepts" is equivalent to
|
|
signatures=[[returns, accepts[0], accepts[1], ..., accepts[n]]]
|
|
|
|
3) "accepts" is equivalent to
|
|
signatures=[[accepts[0], accepts[0], accepts[1], ..., accepts[n]]]
|
|
'''
|
|
if (returns or accepts) and signatures:
|
|
raise Exception('Cannot mix signature specification arguments')
|
|
|
|
type_name = base_type.__name__.replace('Func', '') + name
|
|
func = type(type_name, (base_type, ), {'_NAME': name, '_SIGNATURES': []})
|
|
globals()[type_name] = func
|
|
|
|
if signatures:
|
|
signatures = deepcopy(signatures)
|
|
|
|
if base_type == Func:
|
|
FUNCS.append(func)
|
|
if returns:
|
|
signatures = [Signature(func, returns)]
|
|
elif accepts:
|
|
signatures = [Signature(func, accepts[0])]
|
|
if accepts:
|
|
signatures[0].args.extend(accepts)
|
|
|
|
# Replace convenience inputs with proper types
|
|
for idx, signature in enumerate(signatures):
|
|
if not isinstance(signature, Signature):
|
|
signature = Signature(func, signature[0], *signature[1:])
|
|
signatures[idx] = signature
|
|
if isinstance(signature.return_type, Arg):
|
|
signature.return_type = signature.return_type.type
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if not isinstance(arg, Arg):
|
|
signature.args[arg_idx] = Arg(arg)
|
|
|
|
# Replace "DataType" args with actual types
|
|
non_wildcard_signatures = list()
|
|
for replacement_type in TYPES:
|
|
for signature_idx, signature in enumerate(signatures):
|
|
replacement_signature = None
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if arg.is_subquery:
|
|
for sub_idx, subtype in enumerate(arg.type):
|
|
if subtype == DataType:
|
|
if not replacement_signature:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx].type[sub_idx] = replacement_type
|
|
elif arg.type == DataType:
|
|
replacement_arg = deepcopy(arg)
|
|
replacement_arg.type = replacement_type
|
|
if not replacement_signature:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx] = replacement_arg
|
|
if signature.return_type == DataType:
|
|
if not replacement_signature:
|
|
raise Exception('Wildcard return type requires at least one wildcard input arg')
|
|
replacement_signature.return_type = replacement_type
|
|
if replacement_signature:
|
|
non_wildcard_signatures.append(replacement_signature)
|
|
else:
|
|
non_wildcard_signatures.append(signature)
|
|
# This signature did not contain any "DataType" args, remove it from the list
|
|
# so it isn't processed again.
|
|
del signatures[signature_idx]
|
|
|
|
# Replace "Number" args... Number wildcards work differently than DataType wildcards.
|
|
# foo(DataType, DataType) expands to foo(Boolean, Boolean), foo(Char, Char), etc
|
|
# but foo(Number, Number) expands to foo(Decimal, Decimal), foo(Decimal, Int), etc
|
|
# In other words, a cross product needs to be done for Number wildcards. If the return
|
|
# type is also "Number", then it will be replaced with the largest type of the input
|
|
# replacements. Ex, foo(Decimal, Int) would return Decimal.
|
|
|
|
# Find wildcard signatures
|
|
signatures = non_wildcard_signatures
|
|
wildcard_signatures = list()
|
|
for signature_idx, signature in enumerate(signatures):
|
|
is_wildcard = False
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
if arg.is_subquery:
|
|
for subtype in arg.type:
|
|
if subtype == Number:
|
|
is_wildcard = True
|
|
break
|
|
elif arg.type == Number:
|
|
is_wildcard = True
|
|
if is_wildcard:
|
|
if signature.return_type == Number:
|
|
signature.return_type = (Number, Int)
|
|
wildcard_signatures.append(signature)
|
|
del signatures[signature_idx]
|
|
break
|
|
|
|
# Helper function to reduce code duplication
|
|
def update_return_type_and_append(
|
|
replacement_type,
|
|
replacement_signature,
|
|
wildcard_signatures):
|
|
if isinstance(replacement_signature.return_type, tuple):
|
|
replacement_signature.return_type = \
|
|
(Number, max(replacement_type, replacement_signature.return_type[1]))
|
|
wildcard_signatures.append(replacement_signature)
|
|
|
|
# Fully replace each wildcard one at a time so that a cross product is created
|
|
while wildcard_signatures:
|
|
signature = wildcard_signatures.pop()
|
|
is_wildcard = False
|
|
for arg_idx, arg in enumerate(signature.args):
|
|
replacement_signature = None
|
|
if arg.is_subquery:
|
|
if any(ifilter(lambda type_: type_ == Number, arg.type)):
|
|
raise Exception('Number not accepted in subquery signatures')
|
|
elif arg.type == Number:
|
|
for replacement_type in [Decimal, Int, Float]:
|
|
replacement_signature = deepcopy(signature)
|
|
replacement_signature.args[arg_idx].type = replacement_type
|
|
is_wildcard = True
|
|
update_return_type_and_append(
|
|
replacement_type, replacement_signature, wildcard_signatures)
|
|
if is_wildcard:
|
|
break
|
|
if not is_wildcard:
|
|
if isinstance(signature.return_type, tuple):
|
|
signature.return_type = signature.return_type[1]
|
|
signatures.append(signature)
|
|
|
|
func._SIGNATURES = signatures
|
|
return func
|
|
|
|
|
|
def create_agg(name, returns=None, accepts=[], signatures=[]):
|
|
func = create_func(name, returns, accepts, signatures, AggFunc)
|
|
AGG_FUNCS.append(func)
|
|
return func
|
|
|
|
|
|
def create_analytic(
|
|
name,
|
|
returns=None,
|
|
accepts=[],
|
|
signatures=[],
|
|
require_order=False,
|
|
supports_window=True):
|
|
func = create_func(name, returns, accepts, signatures, AnalyticFunc)
|
|
func.REQUIRES_ORDER_BY = require_order
|
|
func.SUPPORTS_WINDOWING = supports_window
|
|
ANALYTIC_FUNCS.append(func)
|
|
return func
|
|
|
|
|
|
create_func('IsNull', returns=Boolean, accepts=[DataType])
|
|
create_func('IsNotNull', returns=Boolean, accepts=[DataType])
|
|
create_func('And', returns=Boolean, accepts=[Boolean, Boolean])
|
|
create_func('Or', returns=Boolean, accepts=[Boolean, Boolean])
|
|
create_func('Exists', returns=Boolean, accepts=[[DataType]])
|
|
create_func('NotExists', returns=Boolean, accepts=[[DataType]])
|
|
for func_name in ['In', 'NotIn']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(func_name, signatures=[
|
|
[Boolean, Boolean, [Boolean]],
|
|
[Boolean, Boolean, Boolean, Boolean],
|
|
[Boolean, Char, [Char]],
|
|
[Boolean, Char, Char, Char],
|
|
[Boolean, Decimal, [Decimal]],
|
|
[Boolean, Decimal, [Int]],
|
|
[Boolean, Decimal, Decimal, Decimal],
|
|
[Boolean, Decimal, Decimal, Int],
|
|
[Boolean, Decimal, Int, Decimal],
|
|
[Boolean, Int, [Decimal]],
|
|
[Boolean, Int, [Int]],
|
|
[Boolean, Int, Int, Int],
|
|
[Boolean, Int, Decimal, Int],
|
|
[Boolean, Int, Int, Decimal],
|
|
[Boolean, Timestamp, [Timestamp]],
|
|
[Boolean, Timestamp, Timestamp, Timestamp]])
|
|
for comparator in ['GreaterThan', 'LessThan']:
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Number, Number],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
for comparator in ['GreaterThanOrEquals', 'LessThanOrEquals']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Decimal, Decimal],
|
|
[Boolean, Decimal, Int],
|
|
[Boolean, Int, Decimal],
|
|
[Boolean, Int, Int],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
for comparator in ['Equals', 'NotEquals']:
|
|
# Avoid equality comparison on FLOATs
|
|
create_func(comparator, signatures=[
|
|
[Boolean, Boolean, Boolean],
|
|
[Boolean, Char, Char],
|
|
[Boolean, Decimal, Decimal],
|
|
[Boolean, Decimal, Int],
|
|
[Boolean, Int, Decimal],
|
|
[Boolean, Int, Int],
|
|
[Boolean, Timestamp, Timestamp]])
|
|
create_func('If', returns=DataType,
|
|
accepts=[Boolean, Arg(DataType, determines_signature=True), DataType])
|
|
|
|
# Don't allow + or - when using floats/doubles. This is done to avoid something like
|
|
# (10000.00919 - 10000) * 10000 which would lead to random values.
|
|
for operator in ['Plus', 'Minus']:
|
|
create_func(operator, signatures=[
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Int,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Int, determines_signature=True)]])
|
|
create_func('Multiply', signatures=[
|
|
[Number,
|
|
Arg(Number, determines_signature=True),
|
|
Arg(Number, determines_signature=True)]])
|
|
# Don't allow INT / INT, Postgresql results in an INT, but a FLOAT in most other databases
|
|
create_func('Divide', signatures=[
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Decimal,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Float,
|
|
Arg(Decimal, determines_signature=True),
|
|
Arg(Float, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Decimal, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Float, determines_signature=True)],
|
|
[Float,
|
|
Arg(Float, determines_signature=True),
|
|
Arg(Int, determines_signature=True)],
|
|
[Float,
|
|
Arg(Int, determines_signature=True),
|
|
Arg(Float, determines_signature=True)]])
|
|
|
|
create_func('Abs', signatures=[[Number, Arg(Number, determines_signature=True)]])
|
|
# Don't allow FLOAT/DOUBLE to become an INT (ie, an approximation to be treated as a
|
|
# precise value).
|
|
create_func('Floor', signatures=[[Decimal, Decimal], [Float, Float]])
|
|
create_func('Ceil', signatures=[[Decimal, Decimal], [Float, Float]])
|
|
|
|
# NULL handling in CONCAT differs between Impala and Postgresql
|
|
create_func('Concat',
|
|
accepts=[Arg(Char, can_be_null=False), Arg(Char, can_be_null=False)])
|
|
create_func('Trim', accepts=[Char])
|
|
create_func('Length', returns=Int, accepts=[Char])
|
|
|
|
for interval in ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second']:
|
|
create_func('Extract' + interval,
|
|
returns=Int, accepts=[Arg(Timestamp, can_be_null_literal=False)])
|
|
create_func(
|
|
'DateAdd' + interval,
|
|
returns=Timestamp,
|
|
# Determines signature in Postgresql
|
|
accepts=[Arg(Timestamp, determines_signature=True), Int])
|
|
|
|
create_func('Greatest', signatures=[
|
|
[Number,
|
|
Arg(Number, can_be_null=False, determines_signature=True),
|
|
Arg(Number, can_be_null=False, determines_signature=True)],
|
|
[Timestamp,
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True),
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True)]])
|
|
create_func('Least', signatures=[
|
|
[Number,
|
|
Arg(Number, can_be_null=False, determines_signature=True),
|
|
Arg(Number, can_be_null=False, determines_signature=True)],
|
|
[Timestamp,
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True),
|
|
Arg(Timestamp, can_be_null=False, determines_signature=True)]])
|
|
create_func('Coalesce', signatures=[
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(DataType, determines_signature=True)]])
|
|
|
|
# This is added so that query generation can assume that any return type can be
|
|
# produced by an aggregate or analytic with only one level of nesting.
|
|
# Ex: CAST(SUM(...) AS STRING)
|
|
create_func('CastAsChar', signatures=[[Char, Int]])
|
|
|
|
create_agg('Count', returns=Int, accepts=[Number])
|
|
create_agg('Max', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_agg('Min', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_agg('Sum', signatures=[
|
|
# FLOATs not allowed. See comment about Plus/Minus for info.
|
|
[Int, Arg(Int, determines_signature=True)],
|
|
[Decimal, Arg(Decimal, determines_signature=True)]])
|
|
create_agg('Avg', signatures=[
|
|
[Float, Arg(Int, determines_signature=True)],
|
|
[Decimal, Arg(Decimal, determines_signature=True)]])
|
|
|
|
create_analytic('Rank', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('DenseRank', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('RowNumber', require_order=True, supports_window=False, returns=Int)
|
|
create_analytic('Lead', require_order=True, supports_window=False, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(Int, require_constant=True, min_value=1)]])
|
|
create_analytic('Lag', require_order=True, supports_window=False, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)],
|
|
[DataType,
|
|
Arg(DataType, determines_signature=True),
|
|
Arg(Int, require_constant=True, min_value=1)]])
|
|
create_analytic('FirstValue', require_order=True, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)]])
|
|
create_analytic('LastValue', require_order=True, signatures=[
|
|
[DataType, Arg(DataType, determines_signature=True)]])
|
|
create_analytic('Max', signatures=[
|
|
[Number, Arg(Number, determines_signature=True)],
|
|
[Timestamp, Arg(Timestamp, determines_signature=True)]])
|
|
create_analytic('Min', signatures=[[Number, Number], [Timestamp, Timestamp]])
|
|
create_analytic('Sum', signatures=[[Int, Int], [Decimal, Decimal]]) # FLOATs not allowed
|
|
create_analytic('Count', returns=Int, accepts=[Number])
|
|
create_analytic('Avg', returns=Float, accepts=[Number])
|