# basic aggregation select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) from functional.alltypesagg ---- PLAN 01:AGGREGATE [FINALIZE] | output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col) | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 size=814.73KB ---- DISTRIBUTEDPLAN 03:AGGREGATE [MERGE FINALIZE] | output: sum(count(*)), sum(count(tinyint_col)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)) | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col) | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 size=814.73KB ==== # with grouping select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) from functional.alltypesagg group by 2, 1 ---- PLAN 01:AGGREGATE [FINALIZE] | output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col) | group by: bigint_col, tinyint_col | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 size=814.73KB ---- DISTRIBUTEDPLAN 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE [MERGE FINALIZE] | output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col)) | group by: bigint_col, tinyint_col | 02:EXCHANGE [HASH(bigint_col,tinyint_col)] | 01:AGGREGATE | output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col) | group by: bigint_col, tinyint_col | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 size=814.73KB ==== # avg substitution select avg(id) from functional.testtbl having count(id) > 0 order by avg(zip) limit 10 ---- PLAN 02:TOP-N [LIMIT=10] | order by: sum(zip) / count(zip) ASC | 01:AGGREGATE [FINALIZE] | output: sum(id), count(id), sum(zip), count(zip) | having: count(id) > 0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 size=0B ---- DISTRIBUTEDPLAN 02:TOP-N [LIMIT=10] | order by: sum(zip) / count(zip) ASC | 04:AGGREGATE [MERGE FINALIZE] | output: sum(sum(id)), sum(count(id)), sum(sum(zip)), sum(count(zip)) | having: count(id) > 0 | 03:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: sum(id), count(id), sum(zip), count(zip) | 00:SCAN HDFS [functional.testtbl] partitions=1/1 size=0B ==== # Test correct removal of redundant group-by expressions (IMPALA-817) select int_col + int_col, int_col * int_col from functional.alltypesagg group by int_col + int_col, int_col * int_col, int_col + int_col having (int_col * int_col) < 0 limit 10 ---- PLAN 01:AGGREGATE [FINALIZE] | group by: int_col + int_col, int_col * int_col | having: int_col * int_col < 0 | limit: 10 | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 size=814.73KB ---- DISTRIBUTEDPLAN 04:EXCHANGE [UNPARTITIONED] | limit: 10 | 03:AGGREGATE [MERGE FINALIZE] | group by: int_col + int_col, int_col * int_col | having: int_col * int_col < 0 | limit: 10 | 02:EXCHANGE [HASH(int_col + int_col,int_col * int_col)] | 01:AGGREGATE | group by: int_col + int_col, int_col * int_col | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 size=814.73KB ==== # Tests that a having predicate triggers slot materialization (IMPALA-846). select count(*) from functional.alltypes t1 inner join functional.alltypestiny t2 on t1.smallint_col = t2.smallint_col group by t1.tinyint_col, t2.smallint_col having count(t2.int_col) = count(t1.bigint_col) ---- PLAN 03:AGGREGATE [FINALIZE] | output: count(*), count(t2.int_col), count(t1.bigint_col) | group by: t1.tinyint_col, t2.smallint_col | having: count(t2.int_col) = count(t1.bigint_col) | 02:HASH JOIN [INNER JOIN] | hash predicates: t1.smallint_col = t2.smallint_col | |--01:SCAN HDFS [functional.alltypestiny t2] | partitions=4/4 size=460B compact | 00:SCAN HDFS [functional.alltypes t1] partitions=24/24 size=478.45KB ==== # Tests proper slot materialization of agg-tuple slots for avg (IMP-1271). # 't.x > 10' is picked up as an unassigned conjunct, and not as a binding # predicate because avg gets rewritten into an expr against two slots # (and getBoundPredicates() cannot handle multi-slot predicates). select 1 from (select int_col, avg(bigint_col) x from functional.alltypes group by int_col) t where t.x > 10 ---- PLAN 01:AGGREGATE [FINALIZE] | output: sum(bigint_col), count(bigint_col) | group by: int_col | having: sum(bigint_col) / count(bigint_col) > 10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # test distributed aggregation over unions (IMPALA-831) # non-distinct agg without grouping over a union select count(*) from (select * from functional.alltypes union all select * from functional.alltypessmall) t limit 10 ---- PLAN 03:AGGREGATE [FINALIZE] | output: count(*) | limit: 10 | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 05:AGGREGATE [MERGE FINALIZE] | output: sum(count(*)) | limit: 10 | 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # non-distinct agg with grouping over a union select count(*) from (select * from functional.alltypes union all select * from functional.alltypessmall) t group by t.bigint_col limit 10 ---- PLAN 03:AGGREGATE [FINALIZE] | output: count(*) | group by: bigint_col | limit: 10 | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 06:EXCHANGE [UNPARTITIONED] | limit: 10 | 05:AGGREGATE [MERGE FINALIZE] | output: sum(count(*)) | group by: t.bigint_col | limit: 10 | 04:EXCHANGE [HASH(t.bigint_col)] | 03:AGGREGATE | output: count(*) | group by: bigint_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # distinct agg without grouping over a union select count(distinct int_col) from (select * from functional.alltypes union all select * from functional.alltypessmall) t limit 10 ---- PLAN 04:AGGREGATE [MERGE FINALIZE] | output: count(int_col) | limit: 10 | 03:AGGREGATE | group by: int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 08:AGGREGATE [MERGE FINALIZE] | output: sum(count(int_col)) | 07:EXCHANGE [UNPARTITIONED] | limit: 10 | 04:AGGREGATE [MERGE] | output: count(int_col) | limit: 10 | 06:AGGREGATE [MERGE] | group by: int_col | 05:EXCHANGE [HASH(int_col)] | 03:AGGREGATE | group by: int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # distinct agg with grouping over a union select count(distinct int_col) from (select * from functional.alltypes union all select * from functional.alltypessmall) t group by t.bigint_col limit 10 ---- PLAN 04:AGGREGATE [MERGE FINALIZE] | output: count(int_col) | group by: t.bigint_col | limit: 10 | 03:AGGREGATE | group by: bigint_col, int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 07:EXCHANGE [UNPARTITIONED] | limit: 10 | 04:AGGREGATE [MERGE FINALIZE] | output: count(int_col) | group by: t.bigint_col | limit: 10 | 06:AGGREGATE [MERGE] | group by: t.bigint_col, int_col | 05:EXCHANGE [HASH(t.bigint_col)] | 03:AGGREGATE | group by: bigint_col, int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # mixed distinct and non-distinct agg without grouping over a union select count(smallint_col), count(distinct int_col) from (select * from functional.alltypes union all select * from functional.alltypessmall) t limit 10 ---- PLAN 04:AGGREGATE [MERGE FINALIZE] | output: count(int_col), sum(count(smallint_col)) | limit: 10 | 03:AGGREGATE | output: count(smallint_col) | group by: int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 08:AGGREGATE [MERGE FINALIZE] | output: sum(count(int_col)), sum(sum(count(smallint_col))) | 07:EXCHANGE [UNPARTITIONED] | limit: 10 | 04:AGGREGATE [MERGE] | output: count(int_col), sum(count(smallint_col)) | limit: 10 | 06:AGGREGATE [MERGE] | output: sum(count(smallint_col)) | group by: int_col | 05:EXCHANGE [HASH(int_col)] | 03:AGGREGATE | output: count(smallint_col) | group by: int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # mixed distinct and non-distinct agg with grouping over a union select count(smallint_col), count(distinct int_col) from (select * from functional.alltypes union all select * from functional.alltypessmall) t group by t.bigint_col limit 10 ---- PLAN 04:AGGREGATE [MERGE FINALIZE] | output: count(int_col), sum(count(smallint_col)) | group by: t.bigint_col | limit: 10 | 03:AGGREGATE | output: count(smallint_col) | group by: bigint_col, int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 07:EXCHANGE [UNPARTITIONED] | limit: 10 | 04:AGGREGATE [MERGE FINALIZE] | output: count(int_col), sum(count(smallint_col)) | group by: t.bigint_col | limit: 10 | 06:AGGREGATE [MERGE] | output: sum(count(smallint_col)) | group by: t.bigint_col, int_col | 05:EXCHANGE [HASH(t.bigint_col)] | 03:AGGREGATE | output: count(smallint_col) | group by: bigint_col, int_col | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # mixed distinct and non-distinct agg with grouping over a union distinct select count(smallint_col), count(distinct int_col) from (select * from functional.alltypes union distinct select * from functional.alltypessmall) t group by t.bigint_col limit 10 ---- PLAN 05:AGGREGATE [MERGE FINALIZE] | output: count(int_col), sum(count(smallint_col)) | group by: t.bigint_col | limit: 10 | 04:AGGREGATE | output: count(smallint_col) | group by: bigint_col, int_col | 03:AGGREGATE [FINALIZE] | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 10:EXCHANGE [UNPARTITIONED] | limit: 10 | 05:AGGREGATE [MERGE FINALIZE] | output: count(int_col), sum(count(smallint_col)) | group by: t.bigint_col | limit: 10 | 09:AGGREGATE [MERGE] | output: sum(count(smallint_col)) | group by: t.bigint_col, int_col | 08:EXCHANGE [HASH(t.bigint_col)] | 04:AGGREGATE | output: count(smallint_col) | group by: bigint_col, int_col | 07:AGGREGATE [MERGE FINALIZE] | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month | 06:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)] | 03:AGGREGATE | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month | 00:UNION | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ==== # test that aggregations are not placed below an unpartitioned exchange with a limit select count(*) from (select * from functional.alltypes limit 10) t ---- PLAN 01:AGGREGATE [FINALIZE] | output: count(*) | 00:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB limit: 10 ---- DISTRIBUTEDPLAN 01:AGGREGATE [FINALIZE] | output: count(*) | 02:EXCHANGE [UNPARTITIONED] | limit: 10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB limit: 10 ==== # test that aggregations are not placed below an unpartitioned exchange with a limit select count(*) from (select * from functional.alltypes union all (select * from functional.alltypessmall) limit 10) t ---- PLAN 03:AGGREGATE [FINALIZE] | output: count(*) | 00:UNION | limit: 10 | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ---- DISTRIBUTEDPLAN 03:AGGREGATE [FINALIZE] | output: count(*) | 04:EXCHANGE [UNPARTITIONED] | limit: 10 | 00:UNION | limit: 10 | |--02:SCAN HDFS [functional.alltypessmall] | partitions=4/4 size=6.32KB | 01:SCAN HDFS [functional.alltypes] partitions=24/24 size=478.45KB ====