mirror of
https://github.com/apache/impala.git
synced 2026-01-06 06:01:03 -05:00
Make many builtin aggregate functions use fixed-length intermediate types: * avg() * ndv() * stddev(), variance(), etc * distinctpc(), distinctpcsa() sample(), appx_median(), histogram() and group_concat() actually allocate var-len data so aren't changed. This has some major benefits: * Spill-to-disk works properly with these aggregations. * Aggregations are more efficient because there is one less pointer indirection. * Aggregations use less memory, because we don't need an extra 12-byte StringValue for the indirection. Adds a special-purpose internal type FIXED_UDA_INTERMEDIATE. The type is represented in the same way as CHAR - a fixed-size array of bytes, stored inline in tuples. However, it is not user-visible and does not support CHAR semantics, i.e. users can't declare tables, functions, etc with the type. The pointer and length is passed into aggregate functions wrapped in a StringVal. Updates some internal codegen functions to work better with the new type. E.g. store values directly into the result tuple instead of via an intermediate stack allocation. Testing: This change only affects builtin aggregate functions, for which we have test coverage already. If we were to allow wider use of this type, it would need further testing. Added an analyzer test to ensure we can't use the type for UDAs. Added a regression test for spilling avg(). Added a regression test for UDA with CHAR intermediate hitting DCHECK. Perf: Ran TPC-H locally. TPC-H Q17, which has a high-cardinality AVG(), improved dramatically. +----------+-----------------------+---------+------------+------------+----------------+ | Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) | +----------+-----------------------+---------+------------+------------+----------------+ | TPCH(60) | parquet / none / none | 18.44 | -17.54% | 11.92 | -5.34% | +----------+-----------------------+---------+------------+------------+----------------+ +----------+----------+-----------------------+--------+-------------+------------+-----------+----------------+-------------+-------+ | Workload | Query | File Format | Avg(s) | Base Avg(s) | Delta(Avg) | StdDev(%) | Base StdDev(%) | Num Clients | Iters | +----------+----------+-----------------------+--------+-------------+------------+-----------+----------------+-------------+-------+ | TPCH(60) | TPCH-Q12 | parquet / none / none | 18.40 | 17.64 | +4.32% | 0.77% | 1.09% | 1 | 5 | | TPCH(60) | TPCH-Q22 | parquet / none / none | 7.07 | 6.90 | +2.36% | 0.28% | 0.30% | 1 | 5 | | TPCH(60) | TPCH-Q3 | parquet / none / none | 12.37 | 12.11 | +2.10% | 0.18% | 0.15% | 1 | 5 | | TPCH(60) | TPCH-Q7 | parquet / none / none | 42.48 | 42.09 | +0.93% | 2.45% | 0.80% | 1 | 5 | | TPCH(60) | TPCH-Q6 | parquet / none / none | 3.18 | 3.15 | +0.89% | 0.67% | 0.76% | 1 | 5 | | TPCH(60) | TPCH-Q19 | parquet / none / none | 7.24 | 7.20 | +0.50% | 0.95% | 0.67% | 1 | 5 | | TPCH(60) | TPCH-Q10 | parquet / none / none | 13.37 | 13.30 | +0.50% | 0.48% | 1.39% | 1 | 5 | | TPCH(60) | TPCH-Q5 | parquet / none / none | 7.47 | 7.44 | +0.36% | 0.58% | 0.54% | 1 | 5 | | TPCH(60) | TPCH-Q11 | parquet / none / none | 2.03 | 2.02 | +0.06% | 0.26% | 1.95% | 1 | 5 | | TPCH(60) | TPCH-Q4 | parquet / none / none | 5.48 | 5.50 | -0.27% | 0.62% | 1.12% | 1 | 5 | | TPCH(60) | TPCH-Q13 | parquet / none / none | 22.11 | 22.18 | -0.31% | 0.18% | 0.55% | 1 | 5 | | TPCH(60) | TPCH-Q15 | parquet / none / none | 8.45 | 8.48 | -0.32% | 0.40% | 0.47% | 1 | 5 | | TPCH(60) | TPCH-Q9 | parquet / none / none | 33.39 | 33.66 | -0.81% | 0.75% | 0.59% | 1 | 5 | | TPCH(60) | TPCH-Q21 | parquet / none / none | 71.34 | 72.07 | -1.01% | 1.84% | 1.79% | 1 | 5 | | TPCH(60) | TPCH-Q14 | parquet / none / none | 5.93 | 6.00 | -1.07% | 0.15% | 0.69% | 1 | 5 | | TPCH(60) | TPCH-Q20 | parquet / none / none | 5.72 | 5.79 | -1.09% | 0.59% | 0.51% | 1 | 5 | | TPCH(60) | TPCH-Q18 | parquet / none / none | 45.42 | 45.93 | -1.10% | 1.42% | 0.50% | 1 | 5 | | TPCH(60) | TPCH-Q2 | parquet / none / none | 4.81 | 4.89 | -1.52% | 1.68% | 1.01% | 1 | 5 | | TPCH(60) | TPCH-Q16 | parquet / none / none | 5.41 | 5.52 | -1.98% | 0.66% | 0.73% | 1 | 5 | | TPCH(60) | TPCH-Q1 | parquet / none / none | 27.58 | 29.13 | -5.34% | 0.24% | 1.51% | 1 | 5 | | TPCH(60) | TPCH-Q8 | parquet / none / none | 12.61 | 14.30 | -11.78% | 6.20% | * 15.28% * | 1 | 5 | | TPCH(60) | TPCH-Q17 | parquet / none / none | 43.74 | 126.58 | I -65.44% | 1.34% | 9.60% | 1 | 5 | +----------+----------+-----------------------+--------+-------------+------------+-----------+----------------+-------------+-------+ Change-Id: Ife90cf27989f98ffb5ef5c39f1e09ce92e8cb87c Reviewed-on: http://gerrit.cloudera.org:8080/7526 Tested-by: Impala Public Jenkins Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
298 lines
9.9 KiB
Plaintext
298 lines
9.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Add functions and test function overloading and scoping.
|
|
create function $DATABASE.fn() RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function $DATABASE.fn(int) RETURNS double
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function $DATABASE.fn(int, string) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function $DATABASE.fn(string, int) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function $DATABASE.fn2(int) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn2'
|
|
====
|
|
---- QUERY
|
|
create function $DATABASE.fn2(int, string) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn2'
|
|
====
|
|
---- QUERY
|
|
create function $DATABASE.fn_var_arg(int...) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='VarSum'
|
|
====
|
|
---- QUERY
|
|
create aggregate function $DATABASE.agg_fn(int) RETURNS bigint
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libudasample.so' UPDATE_FN='CountUpdate'
|
|
====
|
|
---- QUERY
|
|
create aggregate function $DATABASE.agg_fn(int, string) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdas.so' UPDATE_FN='TwoArgUpdate'
|
|
====
|
|
---- QUERY
|
|
show functions in $DATABASE
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn2(INT, STRING)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show aggregate functions in $DATABASE
|
|
---- RESULTS
|
|
'BIGINT','agg_fn(INT)','NATIVE','true'
|
|
'INT','agg_fn(INT, STRING)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Check that none of the functions show up as analytic functions.
|
|
show analytic functions in $DATABASE
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show create function $DATABASE.fn_var_arg
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE FUNCTION $DATABASE.fn_var_arg(INT...)
|
|
RETURNS INT
|
|
LOCATION '$NAMENODE/test-warehouse/libTestUdfs.so'
|
|
SYMBOL='_Z6VarSumPN10impala_udf15FunctionContextEiPKNS_6IntValE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create aggregate function $DATABASE.agg_fn
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE AGGREGATE FUNCTION $DATABASE.agg_fn(INT)
|
|
RETURNS BIGINT
|
|
LOCATION '$NAMENODE/test-warehouse/libudasample.so'
|
|
UPDATE_FN='_Z11CountUpdatePN10impala_udf15FunctionContextERKNS_6IntValEPNS_9BigIntValE'
|
|
INIT_FN='_Z9CountInitPN10impala_udf15FunctionContextEPNS_9BigIntValE'
|
|
MERGE_FN='_Z10CountMergePN10impala_udf15FunctionContextERKNS_9BigIntValEPS2_'
|
|
FINALIZE_FN='_Z13CountFinalizePN10impala_udf15FunctionContextERKNS_9BigIntValE'
|
|
CREATE AGGREGATE FUNCTION $DATABASE.agg_fn(INT, STRING)
|
|
RETURNS INT
|
|
LOCATION '$NAMENODE/test-warehouse/libTestUdas.so'
|
|
UPDATE_FN='_Z12TwoArgUpdatePN10impala_udf15FunctionContextERKNS_6IntValERKNS_9StringValEPS2_'
|
|
INIT_FN='_Z10TwoArgInitPN10impala_udf15FunctionContextEPNS_6IntValE'
|
|
MERGE_FN='_Z11TwoArgMergePN10impala_udf15FunctionContextERKNS_6IntValEPS2_'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create function _impala_builtins.sin
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE FUNCTION _impala_builtins.sin(DOUBLE)
|
|
RETURNS DOUBLE
|
|
LOCATION 'null'
|
|
SYMBOL='_ZN6impala13MathFunctions3SinEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create aggregate function _impala_builtins.avg
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE AGGREGATE FUNCTION _impala_builtins.avg(BIGINT)
|
|
RETURNS DOUBLE
|
|
INTERMEDIATE FIXED_UDA_INTERMEDIATE(16)
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions11AvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
CREATE AGGREGATE FUNCTION _impala_builtins.avg(DECIMAL(*,*))
|
|
RETURNS DECIMAL(*,*)
|
|
INTERMEDIATE FIXED_UDA_INTERMEDIATE(32)
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions16DecimalAvgUpdateEPN10impala_udf15FunctionContextERKNS1_10DecimalValEPNS1_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions14DecimalAvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions15DecimalAvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions18DecimalAvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
CREATE AGGREGATE FUNCTION _impala_builtins.avg(DOUBLE)
|
|
RETURNS DOUBLE
|
|
INTERMEDIATE FIXED_UDA_INTERMEDIATE(16)
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions11AvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
CREATE AGGREGATE FUNCTION _impala_builtins.avg(TIMESTAMP)
|
|
RETURNS TIMESTAMP
|
|
INTERMEDIATE FIXED_UDA_INTERMEDIATE(16)
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions18TimestampAvgUpdateEPN10impala_udf15FunctionContextERKNS1_12TimestampValEPNS1_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions20TimestampAvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
drop function $DATABASE.fn2(int, string)
|
|
====
|
|
---- QUERY
|
|
show functions
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function if exists $DATABASE.fn2(int, string)
|
|
====
|
|
---- QUERY
|
|
show functions in $DATABASE
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show functions in $DATABASE;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function fn()
|
|
====
|
|
---- QUERY
|
|
show functions;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function fn_var_arg(INT...)
|
|
====
|
|
---- QUERY
|
|
show functions;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function agg_fn(int)
|
|
====
|
|
---- QUERY
|
|
show aggregate functions
|
|
---- RESULTS
|
|
'INT','agg_fn(INT, STRING)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Negative test for showing builtin scalar functions. The result
|
|
# should not contain aggregate or analytic functions. Note that
|
|
# the result must be non-empty for the test to suceed.
|
|
show functions in _impala_builtins;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'DOUBLE','avg(BIGINT)','BUILTIN','true'
|
|
'BIGINT','rank()','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Positive test for showing builtin scalar functions.
|
|
show functions in _impala_builtins;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'STRING','upper(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Negative test for showing builtin aggregate fuctions. The result should
|
|
# not contain scalar or analytic functions.
|
|
show aggregate functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'BIGINT','rank()','BUILTIN','true'
|
|
'STRING','upper(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Positive test for showing builtin aggregate functions.
|
|
show aggregate functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'STRING','group_concat(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Negative test for showing builtin analytic functions. The result should
|
|
# not contain non-analytic aggregate functions or scalar functions.
|
|
show analytic functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'STRING','group_concat(STRING)','BUILTIN','true'
|
|
'STRING','upper(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Positive test for showing builtin analytic functions.
|
|
show analytic functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'BIGINT','rank()','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|