mirror of
https://github.com/apache/impala.git
synced 2026-01-02 21:00:35 -05:00
Since commit d2d3f4c (on asf-master), TAggregateExpr contains
the logical input types of the Aggregate Expr. The reason they
are included is that merging aggregate expressions will have
input tyes of the intermediate values which aren't necessarily
the same as the input types. For instance, NDV() uses a binary
blob as its intermediate value and it's passed to its merge
aggregate expressions as a StringVal but the input type of NDV()
in the query could be DecimalVal. In this case, we consider
DecimalVal as the logical input type while StringVal is the
intermediate type. The logical input types are accessed by the
BE via GetConstFnAttr() during interpretation and constant
propagation during codegen.
To handle distinct aggregate expressions (e.g. select count(distinct)),
the FE uses 2-phase aggregation by introducing an extra phase of
split/merge aggregation in which the distinct aggregate expressions'
inputs are coverted and added to the group-by expressions in the first
phase while the non-distinct aggregate expressions go through the normal
split/merge treatement.
The bug is that the existing code incorrectly propagates the intermediate
types of the non-grouping aggregate expressions as the logical input types
to the merging aggregate expressions in the second phase of aggregation.
The input aggregate expressions for the non-distinct aggregate expressions
in the second phase aggregation are already merging aggregate expressions
(from phase one) in which case we should not treat its input types as
logical input types.
This change fixes the problem above by checking if the input aggregate
expression passed to FunctionCallExpr.createMergeAggCall() is already
a merging aggregate expression. If so, it will use the logical input
types recorded in its 'mergeAggInputFn_' as references for its logical
input types instead of the aggregate expression input types themselves.
Change-Id: I158303b20d1afdff23c67f3338b9c4af2ad80691
Reviewed-on: http://gerrit.cloudera.org:8080/6724
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins
142 lines
3.2 KiB
Plaintext
142 lines
3.2 KiB
Plaintext
====
|
|
---- QUERY
|
|
select hll(int_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'2'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
select test_count(int_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
8
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select test_count(int_col) from functional.alltypesagg;
|
|
---- RESULTS
|
|
10980
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select sum_small_decimal(c3) from functional.decimal_tiny;
|
|
---- RESULTS
|
|
45.00
|
|
---- TYPES
|
|
decimal
|
|
====
|
|
---- QUERY
|
|
select trunc_sum(double_col),sum(double_col) from functional_parquet.alltypes where id < 5555;
|
|
---- RESULTS
|
|
252348,252348.5
|
|
---- TYPES
|
|
bigint,double
|
|
====
|
|
---- QUERY
|
|
select arg_is_const(int_col, 1) from functional_parquet.alltypes;
|
|
---- RESULTS
|
|
true
|
|
---- TYPES
|
|
boolean
|
|
====
|
|
---- QUERY
|
|
# Test with even number of input rows.
|
|
select toggle_null(id), count(*)
|
|
from functional_parquet.alltypesagg
|
|
---- RESULTS
|
|
NULL,11000
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Test with odd number of input rows.
|
|
select toggle_null(id), count(*)
|
|
from functional_parquet.alltypesagg
|
|
where id <= 9998
|
|
---- RESULTS
|
|
1,10999
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Test that input NULLs are passed to aggregate functions ok.
|
|
select count_nulls(tinyint_col), count(*)
|
|
from functional.alltypesagg
|
|
---- RESULTS
|
|
2000,11000
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
# Test that all types are exposed via the FunctionContext correctly.
|
|
# This relies on asserts in the UDA funciton
|
|
select agg_intermediate(int_col), count(*)
|
|
from functional.alltypesagg
|
|
---- RESULTS
|
|
NULL,11000
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
# Test that all types are exposed via the FunctionContext correctly.
|
|
# This relies on asserts in the UDA funciton
|
|
select agg_decimal_intermediate(cast(d1 as decimal(2,1)), 2), count(*)
|
|
from functional.decimal_tbl
|
|
---- RESULTS
|
|
NULL,5
|
|
---- TYPES
|
|
decimal,bigint
|
|
====
|
|
---- QUERY
|
|
# Test that all types are exposed via the FunctionContext correctly.
|
|
# This includes distinct aggregate expression to test IMPALA-5251.
|
|
# It also relies on asserts in the UDA funciton.
|
|
select
|
|
agg_string_intermediate(cast(c1 as decimal(20,10)), 1000, "foobar"),
|
|
agg_decimal_intermediate(cast(c3 as decimal(2,1)), 2),
|
|
agg_intermediate(int_col),
|
|
avg(c2),
|
|
min(c3-c1),
|
|
max(c1+c3),
|
|
count(distinct int_col),
|
|
sum(distinct int_col)
|
|
from
|
|
functional.alltypesagg,
|
|
functional.decimal_tiny
|
|
---- RESULTS
|
|
100,NULL,NULL,160.49989,-10.0989,11.8989,999,499500
|
|
---- TYPES
|
|
decimal,decimal,bigint,decimal,decimal,decimal,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
# Test that all types are exposed via the FunctionContext correctly.
|
|
# This includes distinct aggregate expression to test IMPALA-5251.
|
|
# It also relies on asserts in the UDA funciton.
|
|
select
|
|
agg_string_intermediate(cast(c1 as decimal(20,10)), 1000, "foobar"),
|
|
agg_decimal_intermediate(cast(c3 as decimal(2,1)), 2),
|
|
agg_intermediate(int_col),
|
|
ndv(c2),
|
|
sum(distinct c1)/count(distinct c1)
|
|
from
|
|
functional.alltypesagg,
|
|
functional.decimal_tiny
|
|
group by
|
|
year,month,day
|
|
---- RESULTS
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
100,NULL,NULL,99,5.4994
|
|
---- TYPES
|
|
decimal,decimal,bigint,bigint,decimal
|
|
==== |