impala/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test

# basic aggregation
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
---- PLAN
01:AGGREGATE [FINALIZE]
|  output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
00:SCAN HDFS [functional.alltypesagg]
   partitions=10/10 size=743.67KB
---- DISTRIBUTEDPLAN
03:AGGREGATE [MERGE FINALIZE]
|  output: sum(count(*)), sum(count(tinyint_col)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col))
|
02:EXCHANGE [PARTITION=UNPARTITIONED]
|
01:AGGREGATE
|  output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
00:SCAN HDFS [functional.alltypesagg]
   partitions=10/10 size=743.67KB
====
# with grouping
select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
group by 2, 1
---- PLAN
01:AGGREGATE [FINALIZE]
|  output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
|  group by: bigint_col, tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
   partitions=10/10 size=743.67KB
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
---- DISTRIBUTEDPLAN
04:EXCHANGE [PARTITION=UNPARTITIONED]
|
03:AGGREGATE [MERGE FINALIZE]
|  output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col))
|  group by: bigint_col, tinyint_col
|
02:EXCHANGE [PARTITION=HASH(bigint_col,tinyint_col)]
|
01:AGGREGATE
|  output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
|  group by: bigint_col, tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
   partitions=10/10 size=743.67KB
====
# avg substitution
select avg(id)
from functional.testtbl
having count(id) > 0
order by avg(zip) limit 10
---- PLAN
02:TOP-N [LIMIT=10]
|  order by: sum(zip) / count(zip) ASC
|
01:AGGREGATE [FINALIZE]
|  output: sum(id), count(id), sum(zip), count(zip)
|  having: count(id) > 0
|
00:SCAN HDFS [functional.testtbl]
   partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
02:TOP-N [LIMIT=10]
|  order by: sum(zip) / count(zip) ASC
|
04:AGGREGATE [MERGE FINALIZE]
|  output: sum(sum(id)), sum(count(id)), sum(sum(zip)), sum(count(zip))
|  having: count(id) > 0
|
03:EXCHANGE [PARTITION=UNPARTITIONED]
|
01:AGGREGATE
|  output: sum(id), count(id), sum(zip), count(zip)
|
00:SCAN HDFS [functional.testtbl]
   partitions=1/1 size=0B
====
# Test correct removal of redundant group-by expressions (IMPALA-817)
select int_col + int_col, int_col * int_col
from functional.alltypesagg
group by int_col + int_col, int_col * int_col, int_col + int_col
having (int_col * int_col) < 0 limit 10
---- PLAN
01:AGGREGATE [FINALIZE]
|  group by: int_col + int_col, int_col * int_col
|  having: int_col * int_col < 0
|  limit: 10
|
00:SCAN HDFS [functional.alltypesagg]
   partitions=10/10 size=743.67KB
---- DISTRIBUTEDPLAN
04:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
03:AGGREGATE [MERGE FINALIZE]
|  group by: int_col + int_col, int_col * int_col
|  having: int_col * int_col < 0
|  limit: 10
|
02:EXCHANGE [PARTITION=HASH(int_col + int_col,int_col * int_col)]
|
01:AGGREGATE
|  group by: int_col + int_col, int_col * int_col
|
00:SCAN HDFS [functional.alltypesagg]
   partitions=10/10 size=743.67KB
====
# Tests that a having predicate triggers slot materialization (IMPALA-846).
select count(*) from
functional.alltypes t1 inner join functional.alltypestiny t2
  on t1.smallint_col = t2.smallint_col
group by t1.tinyint_col, t2.smallint_col
having count(t2.int_col) = count(t1.bigint_col)
---- PLAN
03:AGGREGATE [FINALIZE]
|  output: count(*), count(t2.int_col), count(t1.bigint_col)
|  group by: t1.tinyint_col, t2.smallint_col
|  having: count(t2.int_col) = count(t1.bigint_col)
|
02:HASH JOIN [INNER JOIN]
|  hash predicates: t1.smallint_col = t2.smallint_col
|
|--01:SCAN HDFS [functional.alltypestiny t2]
|     partitions=4/4 size=460B compact
|
00:SCAN HDFS [functional.alltypes t1]
   partitions=24/24 size=478.45KB
====
# Tests proper slot materialization of agg-tuple slots for avg (IMP-1271).
# 't.x > 10' is picked up as an unassigned conjunct, and not as a binding
# predicate because avg gets rewritten into an expr against two slots
# (and getBoundPredicates() cannot handle multi-slot predicates).
select 1 from
  (select int_col, avg(bigint_col) x from functional.alltypes
   group by int_col) t
where t.x > 10
---- PLAN
01:AGGREGATE [FINALIZE]
|  output: sum(bigint_col), count(bigint_col)
|  group by: int_col
|  having: sum(bigint_col) / count(bigint_col) > 10.0
|
00:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# test distributed aggregation over unions (IMPALA-831)
# non-distinct agg without grouping over a union
select count(*) from
  (select * from functional.alltypes
   union all
   select * from functional.alltypessmall) t
limit 10
---- PLAN
03:AGGREGATE [FINALIZE]
|  output: count(*)
|  limit: 10
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
09:AGGREGATE [MERGE FINALIZE]
|  output: sum(count(*))
|  limit: 10
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|--07:AGGREGATE
|  |  output: count(*)
|  |
|  06:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  output: count(*)
|
05:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# non-distinct agg with grouping over a union
select count(*) from
  (select * from functional.alltypes
   union all
   select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
03:AGGREGATE [FINALIZE]
|  output: count(*)
|  group by: bigint_col
|  limit: 10
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
10:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
09:AGGREGATE [MERGE FINALIZE]
|  output: sum(count(*))
|  group by: t.bigint_col
|  limit: 10
|
08:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
|--07:AGGREGATE
|  |  output: count(*)
|  |  group by: bigint_col
|  |
|  06:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  output: count(*)
|  group by: bigint_col
|
05:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# distinct agg without grouping over a union
select count(distinct int_col)
from
  (select * from functional.alltypes
   union all
   select * from functional.alltypessmall) t
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col)
|  limit: 10
|
03:AGGREGATE
|  group by: int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
12:AGGREGATE [MERGE FINALIZE]
|  output: sum(count(int_col))
|
11:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
04:AGGREGATE [MERGE]
|  output: count(int_col)
|  limit: 10
|
10:AGGREGATE [MERGE]
|  group by: int_col
|
09:EXCHANGE [PARTITION=HASH(int_col)]
|
|--08:AGGREGATE
|  |  group by: int_col
|  |
|  07:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  group by: int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# distinct agg with grouping over a union
select count(distinct int_col)
from
  (select * from functional.alltypes
   union all
   select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col)
|  group by: t.bigint_col
|  limit: 10
|
03:AGGREGATE
|  group by: bigint_col, int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
11:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
04:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col)
|  group by: t.bigint_col
|  limit: 10
|
10:AGGREGATE [MERGE]
|  group by: t.bigint_col, int_col
|
09:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
|--08:AGGREGATE
|  |  group by: bigint_col, int_col
|  |
|  07:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  group by: bigint_col, int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg without grouping over a union
select count(smallint_col), count(distinct int_col)
from
  (select * from functional.alltypes
   union all
   select * from functional.alltypessmall) t
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col), sum(count(smallint_col))
|  limit: 10
|
03:AGGREGATE
|  output: count(smallint_col)
|  group by: int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
12:AGGREGATE [MERGE FINALIZE]
|  output: sum(count(int_col)), sum(sum(count(smallint_col)))
|
11:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
04:AGGREGATE [MERGE]
|  output: count(int_col), sum(count(smallint_col))
|  limit: 10
|
10:AGGREGATE [MERGE]
|  output: sum(count(smallint_col))
|  group by: int_col
|
09:EXCHANGE [PARTITION=HASH(int_col)]
|
|--08:AGGREGATE
|  |  output: count(smallint_col)
|  |  group by: int_col
|  |
|  07:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  output: count(smallint_col)
|  group by: int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg with grouping over a union
select count(smallint_col), count(distinct int_col)
from
  (select * from functional.alltypes
   union all
   select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col), sum(count(smallint_col))
|  group by: t.bigint_col
|  limit: 10
|
03:AGGREGATE
|  output: count(smallint_col)
|  group by: bigint_col, int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
11:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
04:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col), sum(count(smallint_col))
|  group by: t.bigint_col
|  limit: 10
|
10:AGGREGATE [MERGE]
|  output: sum(count(smallint_col))
|  group by: t.bigint_col, int_col
|
09:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
|--08:AGGREGATE
|  |  output: count(smallint_col)
|  |  group by: bigint_col, int_col
|  |
|  07:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  output: count(smallint_col)
|  group by: bigint_col, int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg with grouping over a union distinct
select count(smallint_col), count(distinct int_col)
from
  (select * from functional.alltypes
   union distinct
   select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
05:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col), sum(count(smallint_col))
|  group by: t.bigint_col
|  limit: 10
|
04:AGGREGATE
|  output: count(smallint_col)
|  group by: bigint_col, int_col
|
03:AGGREGATE [FINALIZE]
|  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
14:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
05:AGGREGATE [MERGE FINALIZE]
|  output: count(int_col), sum(count(smallint_col))
|  group by: t.bigint_col
|  limit: 10
|
13:AGGREGATE [MERGE]
|  output: sum(count(smallint_col))
|  group by: t.bigint_col, int_col
|
12:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
04:AGGREGATE
|  output: count(smallint_col)
|  group by: bigint_col, int_col
|
11:AGGREGATE [MERGE FINALIZE]
|  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
10:EXCHANGE [PARTITION=HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
|--09:AGGREGATE
|  |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|  |
|  08:MERGE
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
03:AGGREGATE
|  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
07:MERGE
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====
# test that aggregations are not placed below an unpartitioned exchange with a limit
select count(*) from (select * from functional.alltypes limit 10) t
---- PLAN
01:AGGREGATE [FINALIZE]
|  output: count(*)
|
00:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
   limit: 10
---- DISTRIBUTEDPLAN
01:AGGREGATE [FINALIZE]
|  output: count(*)
|
02:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
00:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
   limit: 10
====
# test that aggregations are not placed below an unpartitioned exchange with a limit
select count(*) from
  (select * from functional.alltypes
   union all
   (select * from functional.alltypessmall) limit 10) t
---- PLAN
03:AGGREGATE [FINALIZE]
|  output: count(*)
|
00:MERGE
|  limit: 10
|
|--02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
03:AGGREGATE [FINALIZE]
|  output: count(*)
|
04:EXCHANGE [PARTITION=UNPARTITIONED]
|  limit: 10
|
|--06:MERGE
|  |  limit: 10
|  |
|  02:SCAN HDFS [functional.alltypessmall]
|     partitions=4/4 size=6.32KB
|
05:MERGE
|  limit: 10
|
01:SCAN HDFS [functional.alltypes]
   partitions=24/24 size=478.45KB
====