Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
Dimitris Tsirogiannis 5a6f53db16 Add partition pruning tests
The following changes are included in this commit:
1. Modified the alltypesagg table to include an additional partition key
that has nulls.
2. Added a number of tests in hdfs.test that exercise the partition
pruning logic (see IMPALA-887).
3. Modified all the tests that are affected by the change in alltypesagg.

Change-Id: I1a769375aaa71273341522eb94490ba5e4c6f00d
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2874
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3236
2014-06-24 02:14:27 -07:00

548 lines
13 KiB
Plaintext

# basic aggregation
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
---- PLAN
01:AGGREGATE [FINALIZE]
| output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
03:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*)), sum(count(tinyint_col)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col))
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# with grouping
select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
group by 2, 1
---- PLAN
01:AGGREGATE [FINALIZE]
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
| group by: bigint_col, tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col))
| group by: bigint_col, tinyint_col
|
02:EXCHANGE [HASH(bigint_col,tinyint_col)]
|
01:AGGREGATE
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
| group by: bigint_col, tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# avg substitution
select avg(id)
from functional.testtbl
having count(id) > 0
order by avg(zip) limit 10
---- PLAN
02:TOP-N [LIMIT=10]
| order by: sum(zip) / count(zip) ASC
|
01:AGGREGATE [FINALIZE]
| output: sum(id), count(id), sum(zip), count(zip)
| having: count(id) > 0
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
02:TOP-N [LIMIT=10]
| order by: sum(zip) / count(zip) ASC
|
04:AGGREGATE [MERGE FINALIZE]
| output: sum(sum(id)), sum(count(id)), sum(sum(zip)), sum(count(zip))
| having: count(id) > 0
|
03:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: sum(id), count(id), sum(zip), count(zip)
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
# Test correct removal of redundant group-by expressions (IMPALA-817)
select int_col + int_col, int_col * int_col
from functional.alltypesagg
group by int_col + int_col, int_col * int_col, int_col + int_col
having (int_col * int_col) < 0 limit 10
---- PLAN
01:AGGREGATE [FINALIZE]
| group by: int_col + int_col, int_col * int_col
| having: int_col * int_col < 0
| limit: 10
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
04:EXCHANGE [UNPARTITIONED]
| limit: 10
|
03:AGGREGATE [MERGE FINALIZE]
| group by: int_col + int_col, int_col * int_col
| having: int_col * int_col < 0
| limit: 10
|
02:EXCHANGE [HASH(int_col + int_col,int_col * int_col)]
|
01:AGGREGATE
| group by: int_col + int_col, int_col * int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# Tests that a having predicate triggers slot materialization (IMPALA-846).
select count(*) from
functional.alltypes t1 inner join functional.alltypestiny t2
on t1.smallint_col = t2.smallint_col
group by t1.tinyint_col, t2.smallint_col
having count(t2.int_col) = count(t1.bigint_col)
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*), count(t2.int_col), count(t1.bigint_col)
| group by: t1.tinyint_col, t2.smallint_col
| having: count(t2.int_col) = count(t1.bigint_col)
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.smallint_col = t2.smallint_col
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 size=460B compact
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 size=478.45KB
====
# Tests proper slot materialization of agg-tuple slots for avg (IMP-1271).
# 't.x > 10' is picked up as an unassigned conjunct, and not as a binding
# predicate because avg gets rewritten into an expr against two slots
# (and getBoundPredicates() cannot handle multi-slot predicates).
select 1 from
(select int_col, avg(bigint_col) x from functional.alltypes
group by int_col) t
where t.x > 10
---- PLAN
01:AGGREGATE [FINALIZE]
| output: sum(bigint_col), count(bigint_col)
| group by: int_col
| having: sum(bigint_col) / count(bigint_col) > 10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test distributed aggregation over unions (IMPALA-831)
# non-distinct agg without grouping over a union
select count(*) from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
limit 10
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
| limit: 10
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
05:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*))
| limit: 10
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: count(*)
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# non-distinct agg with grouping over a union
select count(*) from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: bigint_col
| limit: 10
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
06:EXCHANGE [UNPARTITIONED]
| limit: 10
|
05:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*))
| group by: t.bigint_col
| limit: 10
|
04:EXCHANGE [HASH(t.bigint_col)]
|
03:AGGREGATE
| output: count(*)
| group by: bigint_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# distinct agg without grouping over a union
select count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
| limit: 10
|
03:AGGREGATE
| group by: int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
08:AGGREGATE [MERGE FINALIZE]
| output: sum(count(int_col))
|
07:EXCHANGE [UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE]
| output: count(int_col)
| limit: 10
|
06:AGGREGATE [MERGE]
| group by: int_col
|
05:EXCHANGE [HASH(int_col)]
|
03:AGGREGATE
| group by: int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# distinct agg with grouping over a union
select count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
| group by: t.bigint_col
| limit: 10
|
03:AGGREGATE
| group by: bigint_col, int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
07:EXCHANGE [UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
| group by: t.bigint_col
| limit: 10
|
06:AGGREGATE [MERGE]
| group by: t.bigint_col, int_col
|
05:EXCHANGE [HASH(t.bigint_col)]
|
03:AGGREGATE
| group by: bigint_col, int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg without grouping over a union
select count(smallint_col), count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| limit: 10
|
03:AGGREGATE
| output: count(smallint_col)
| group by: int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
08:AGGREGATE [MERGE FINALIZE]
| output: sum(count(int_col)), sum(sum(count(smallint_col)))
|
07:EXCHANGE [UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE]
| output: count(int_col), sum(count(smallint_col))
| limit: 10
|
06:AGGREGATE [MERGE]
| output: sum(count(smallint_col))
| group by: int_col
|
05:EXCHANGE [HASH(int_col)]
|
03:AGGREGATE
| output: count(smallint_col)
| group by: int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg with grouping over a union
select count(smallint_col), count(distinct int_col)
from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
03:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
07:EXCHANGE [UNPARTITIONED]
| limit: 10
|
04:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
06:AGGREGATE [MERGE]
| output: sum(count(smallint_col))
| group by: t.bigint_col, int_col
|
05:EXCHANGE [HASH(t.bigint_col)]
|
03:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# mixed distinct and non-distinct agg with grouping over a union distinct
select count(smallint_col), count(distinct int_col)
from
(select * from functional.alltypes
union distinct
select * from functional.alltypessmall) t
group by t.bigint_col
limit 10
---- PLAN
05:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
04:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
03:AGGREGATE [FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
10:EXCHANGE [UNPARTITIONED]
| limit: 10
|
05:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(count(smallint_col))
| group by: t.bigint_col
| limit: 10
|
09:AGGREGATE [MERGE]
| output: sum(count(smallint_col))
| group by: t.bigint_col, int_col
|
08:EXCHANGE [HASH(t.bigint_col)]
|
04:AGGREGATE
| output: count(smallint_col)
| group by: bigint_col, int_col
|
07:AGGREGATE [MERGE FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
06:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
03:AGGREGATE
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test that aggregations are not placed below an unpartitioned exchange with a limit
select count(*) from (select * from functional.alltypes limit 10) t
---- PLAN
01:AGGREGATE [FINALIZE]
| output: count(*)
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
limit: 10
---- DISTRIBUTEDPLAN
01:AGGREGATE [FINALIZE]
| output: count(*)
|
02:EXCHANGE [UNPARTITIONED]
| limit: 10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
limit: 10
====
# test that aggregations are not placed below an unpartitioned exchange with a limit
select count(*) from
(select * from functional.alltypes
union all
(select * from functional.alltypessmall) limit 10) t
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
00:UNION
| limit: 10
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
04:EXCHANGE [UNPARTITIONED]
| limit: 10
|
00:UNION
| limit: 10
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====