Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
Alex Behm 15e05082c0 IMPALA-831: Distributed aggregation and top-n over unions.
Change-Id: I056e8271421008378db93e8b2393861cc9dd4b90
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1840
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1886
2014-03-13 15:42:31 -07:00

601 lines
15 KiB
Plaintext

# basic aggregation
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
---- PLAN
01:AGGREGATE [FINALIZE]
| output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
00:SCAN HDFS [functional.alltypesagg]
partitions=10/10 size=743.67KB
---- DISTRIBUTEDPLAN
03:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*)), sum(count(tinyint_col)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col))
|
02:EXCHANGE [PARTITION=UNPARTITIONED]
|
01:AGGREGATE
| output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
00:SCAN HDFS [functional.alltypesagg]
partitions=10/10 size=743.67KB
====
# with grouping
select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
group by 2, 1
---- PLAN
01:AGGREGATE [FINALIZE]
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
| group by: bigint_col, tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=10/10 size=743.67KB
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
---- DISTRIBUTEDPLAN
04:EXCHANGE [PARTITION=UNPARTITIONED]
|
03:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col))
| group by: bigint_col, tinyint_col
|
02:EXCHANGE [PARTITION=HASH(bigint_col,tinyint_col)]
|
01:AGGREGATE
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
| group by: bigint_col, tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=10/10 size=743.67KB
====
# avg substitution
select avg(id)
from functional.testtbl
having count(id) > 0
order by avg(zip) limit 10
---- PLAN
02:TOP-N [LIMIT=10]
| order by: sum(zip) / count(zip) ASC
|
01:AGGREGATE [FINALIZE]
| output: sum(id), count(id), sum(zip), count(zip)
| having: count(id) > 0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
02:TOP-N [LIMIT=10]
| order by: sum(zip) / count(zip) ASC
|
04:AGGREGATE [MERGE FINALIZE]
| output: sum(sum(id)), sum(count(id)), sum(sum(zip)), sum(count(zip))
| having: count(id) > 0
|
03:EXCHANGE [PARTITION=UNPARTITIONED]
|
01:AGGREGATE
| output: sum(id), count(id), sum(zip), count(zip)
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
# Test correct removal of redundant group-by expressions (IMPALA-817)
select int_col + int_col, int_col * int_col
from functional.alltypesagg
group by int_col + int_col, int_col * int_col, int_col + int_col
having (int_col * int_col) < 0 limit 10
---- PLAN
01:AGGREGATE [FINALIZE]
| group by: int_col + int_col, int_col * int_col
| having: int_col * int_col < 0
| limit: 10
|
00:SCAN HDFS [functional.alltypesagg]
partitions=10/10 size=743.67KB
---- DISTRIBUTEDPLAN
04:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
03:AGGREGATE [MERGE FINALIZE]
| group by: int_col + int_col, int_col * int_col
| having: int_col * int_col < 0
| limit: 10
|
02:EXCHANGE [PARTITION=HASH(int_col + int_col,int_col * int_col)]
|
01:AGGREGATE
| group by: int_col + int_col, int_col * int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=10/10 size=743.67KB
====
# Tests that a having predicate triggers slot materialization (IMPALA-846).
select count(*) from
functional.alltypes t1 inner join functional.alltypestiny t2
on t1.smallint_col = t2.smallint_col
group by t1.tinyint_col, t2.smallint_col
having count(t2.int_col) = count(t1.bigint_col)
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*), count(t2.int_col), count(t1.bigint_col)
| group by: t1.tinyint_col, t2.smallint_col
| having: count(t2.int_col) = count(t1.bigint_col)
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.smallint_col = t2.smallint_col
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 size=460B compact
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 size=478.45KB
====
# Tests proper slot materialization of agg-tuple slots for avg (IMP-1271).
# 't.x > 10' is picked up as an unassigned conjunct, and not as a binding
# predicate because avg gets rewritten into an expr against two slots
# (and getBoundPredicates() cannot handle multi-slot predicates).
select 1 from
(select int_col, avg(bigint_col) x from functional.alltypes
group by int_col) t
where t.x > 10
---- PLAN
01:AGGREGATE [FINALIZE]
| output: sum(bigint_col), count(bigint_col)
| group by: int_col
| having: sum(bigint_col) / count(bigint_col) > 10.0
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test distributed aggregation over unions (IMPALA-831)
# non-distinct agg without grouping over a union
select count(*) from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
limit 10
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
| limit: 10
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
09:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*))
| limit: 10
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|--07:AGGREGATE
| | output: count(*)
| |
| 06:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| output: count(*)
|
05:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# non-distinct agg with grouping over a union
select count(*) from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: bigint_col
| limit: 10
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
10:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
09:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*))
| group by: t.bigint_col
| limit: 10
|
08:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
|--07:AGGREGATE
| | output: count(*)
| | group by: bigint_col
| |
| 06:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| output: count(*)
| group by: bigint_col
|
05:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# distinct agg without grouping over a union
select count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
| limit: 10
|
03:AGGREGATE
| group by: int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
12:AGGREGATE [MERGE FINALIZE]
| output: sum(count(int_col))
|
11:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE]
| output: count(int_col)
| limit: 10
|
10:AGGREGATE [MERGE]
| group by: int_col
|
09:EXCHANGE [PARTITION=HASH(int_col)]
|
|--08:AGGREGATE
| | group by: int_col
| |
| 07:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| group by: int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# distinct agg with grouping over a union
select count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
| group by: t.bigint_col
| limit: 10
|
03:AGGREGATE
| group by: bigint_col, int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
11:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
| group by: t.bigint_col
| limit: 10
|
10:AGGREGATE [MERGE]
| group by: t.bigint_col, int_col
|
09:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
|--08:AGGREGATE
| | group by: bigint_col, int_col
| |
| 07:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| group by: bigint_col, int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg without grouping over a union
select count(smallint_col), count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| limit: 10
|
03:AGGREGATE
| output: count(smallint_col)
| group by: int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
12:AGGREGATE [MERGE FINALIZE]
| output: sum(count(int_col)), sum(sum(count(smallint_col)))
|
11:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE]
| output: count(int_col), sum(count(smallint_col))
| limit: 10
|
10:AGGREGATE [MERGE]
| output: sum(count(smallint_col))
| group by: int_col
|
09:EXCHANGE [PARTITION=HASH(int_col)]
|
|--08:AGGREGATE
| | output: count(smallint_col)
| | group by: int_col
| |
| 07:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| output: count(smallint_col)
| group by: int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg with grouping over a union
select count(smallint_col), count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
03:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
11:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
10:AGGREGATE [MERGE]
| output: sum(count(smallint_col))
| group by: t.bigint_col, int_col
|
09:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
|--08:AGGREGATE
| | output: count(smallint_col)
| | group by: bigint_col, int_col
| |
| 07:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
06:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg with grouping over a union distinct
select count(smallint_col), count(distinct int_col)
from
(select * from functional.alltypes
union distinct
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
05:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
04:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
03:AGGREGATE [FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:MERGE
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
14:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
05:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
13:AGGREGATE [MERGE]
| output: sum(count(smallint_col))
| group by: t.bigint_col, int_col
|
12:EXCHANGE [PARTITION=HASH(t.bigint_col)]
|
04:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
11:AGGREGATE [MERGE FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
10:EXCHANGE [PARTITION=HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
|--09:AGGREGATE
| | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| |
| 08:MERGE
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
03:AGGREGATE
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
07:MERGE
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test that aggregations are not placed below an unpartitioned exchange with a limit
select count(*) from (select * from functional.alltypes limit 10) t
---- PLAN
01:AGGREGATE [FINALIZE]
| output: count(*)
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
limit: 10
---- DISTRIBUTEDPLAN
01:AGGREGATE [FINALIZE]
| output: count(*)
|
02:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
limit: 10
====
# test that aggregations are not placed below an unpartitioned exchange with a limit
select count(*) from
(select * from functional.alltypes
union all
(select * from functional.alltypessmall) limit 10) t
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
00:MERGE
| limit: 10
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
04:EXCHANGE [PARTITION=UNPARTITIONED]
| limit: 10
|
|--06:MERGE
| | limit: 10
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
05:MERGE
| limit: 10
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====