mirror of
https://github.com/apache/impala.git
synced 2026-01-02 03:00:32 -05:00
The following changes are included in this commit: 1. Modified the alltypesagg table to include an additional partition key that has nulls. 2. Added a number of tests in hdfs.test that exercise the partition pruning logic (see IMPALA-887). 3. Modified all the tests that are affected by the change in alltypesagg. Change-Id: I1a769375aaa71273341522eb94490ba5e4c6f00d Reviewed-on: http://gerrit.ent.cloudera.com:8080/2874 Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3236
548 lines
13 KiB
Plaintext
548 lines
13 KiB
Plaintext
# basic aggregation
|
|
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from functional.alltypesagg
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*)), sum(count(tinyint_col)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col))
|
|
|
|
|
02:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col)
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# with grouping
|
|
select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from functional.alltypesagg
|
|
group by 2, 1
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
|
|
| group by: bigint_col, tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col))
|
|
| group by: bigint_col, tinyint_col
|
|
|
|
|
02:EXCHANGE [HASH(bigint_col,tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
|
|
| group by: bigint_col, tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# avg substitution
|
|
select avg(id)
|
|
from functional.testtbl
|
|
having count(id) > 0
|
|
order by avg(zip) limit 10
|
|
---- PLAN
|
|
02:TOP-N [LIMIT=10]
|
|
| order by: sum(zip) / count(zip) ASC
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: sum(id), count(id), sum(zip), count(zip)
|
|
| having: count(id) > 0
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
---- DISTRIBUTEDPLAN
|
|
02:TOP-N [LIMIT=10]
|
|
| order by: sum(zip) / count(zip) ASC
|
|
|
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(sum(id)), sum(count(id)), sum(sum(zip)), sum(count(zip))
|
|
| having: count(id) > 0
|
|
|
|
|
03:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
01:AGGREGATE
|
|
| output: sum(id), count(id), sum(zip), count(zip)
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
====
|
|
# Test correct removal of redundant group-by expressions (IMPALA-817)
|
|
select int_col + int_col, int_col * int_col
|
|
from functional.alltypesagg
|
|
group by int_col + int_col, int_col * int_col, int_col + int_col
|
|
having (int_col * int_col) < 0 limit 10
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| group by: int_col + int_col, int_col * int_col
|
|
| having: int_col * int_col < 0
|
|
| limit: 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| group by: int_col + int_col, int_col * int_col
|
|
| having: int_col * int_col < 0
|
|
| limit: 10
|
|
|
|
|
02:EXCHANGE [HASH(int_col + int_col,int_col * int_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: int_col + int_col, int_col * int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# Tests that a having predicate triggers slot materialization (IMPALA-846).
|
|
select count(*) from
|
|
functional.alltypes t1 inner join functional.alltypestiny t2
|
|
on t1.smallint_col = t2.smallint_col
|
|
group by t1.tinyint_col, t2.smallint_col
|
|
having count(t2.int_col) = count(t1.bigint_col)
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*), count(t2.int_col), count(t1.bigint_col)
|
|
| group by: t1.tinyint_col, t2.smallint_col
|
|
| having: count(t2.int_col) = count(t1.bigint_col)
|
|
|
|
|
02:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t1.smallint_col = t2.smallint_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny t2]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Tests proper slot materialization of agg-tuple slots for avg (IMP-1271).
|
|
# 't.x > 10' is picked up as an unassigned conjunct, and not as a binding
|
|
# predicate because avg gets rewritten into an expr against two slots
|
|
# (and getBoundPredicates() cannot handle multi-slot predicates).
|
|
select 1 from
|
|
(select int_col, avg(bigint_col) x from functional.alltypes
|
|
group by int_col) t
|
|
where t.x > 10
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: sum(bigint_col), count(bigint_col)
|
|
| group by: int_col
|
|
| having: sum(bigint_col) / count(bigint_col) > 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# test distributed aggregation over unions (IMPALA-831)
|
|
# non-distinct agg without grouping over a union
|
|
select count(*) from
|
|
(select * from functional.alltypes
|
|
union all
|
|
select * from functional.alltypessmall) t
|
|
limit 10
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| limit: 10
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
05:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*))
|
|
| limit: 10
|
|
|
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(*)
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# non-distinct agg with grouping over a union
|
|
select count(*) from
|
|
(select * from functional.alltypes
|
|
union all
|
|
select * from functional.alltypessmall) t
|
|
group by t.bigint_col
|
|
limit 10
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: bigint_col
|
|
| limit: 10
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
05:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*))
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
04:EXCHANGE [HASH(t.bigint_col)]
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(*)
|
|
| group by: bigint_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# distinct agg without grouping over a union
|
|
select count(distinct int_col)
|
|
from
|
|
(select * from functional.alltypes
|
|
union all
|
|
select * from functional.alltypessmall) t
|
|
limit 10
|
|
---- PLAN
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col)
|
|
| limit: 10
|
|
|
|
|
03:AGGREGATE
|
|
| group by: int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
08:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(int_col))
|
|
|
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| output: count(int_col)
|
|
| limit: 10
|
|
|
|
|
06:AGGREGATE [MERGE]
|
|
| group by: int_col
|
|
|
|
|
05:EXCHANGE [HASH(int_col)]
|
|
|
|
|
03:AGGREGATE
|
|
| group by: int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# distinct agg with grouping over a union
|
|
select count(distinct int_col)
|
|
from
|
|
(select * from functional.alltypes
|
|
union all
|
|
select * from functional.alltypessmall) t
|
|
group by t.bigint_col
|
|
limit 10
|
|
---- PLAN
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col)
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
03:AGGREGATE
|
|
| group by: bigint_col, int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col)
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
06:AGGREGATE [MERGE]
|
|
| group by: t.bigint_col, int_col
|
|
|
|
|
05:EXCHANGE [HASH(t.bigint_col)]
|
|
|
|
|
03:AGGREGATE
|
|
| group by: bigint_col, int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# mixed distinct and non-distinct agg without grouping over a union
|
|
select count(smallint_col), count(distinct int_col)
|
|
from
|
|
(select * from functional.alltypes
|
|
union all
|
|
select * from functional.alltypessmall) t
|
|
limit 10
|
|
---- PLAN
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(count(smallint_col))
|
|
| limit: 10
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| group by: int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
08:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(int_col)), sum(sum(count(smallint_col)))
|
|
|
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| output: count(int_col), sum(count(smallint_col))
|
|
| limit: 10
|
|
|
|
|
06:AGGREGATE [MERGE]
|
|
| output: sum(count(smallint_col))
|
|
| group by: int_col
|
|
|
|
|
05:EXCHANGE [HASH(int_col)]
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| group by: int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# mixed distinct and non-distinct agg with grouping over a union
|
|
select count(smallint_col), count(distinct int_col)
|
|
from
|
|
(select * from functional.alltypes
|
|
union all
|
|
select * from functional.alltypessmall) t
|
|
group by t.bigint_col
|
|
limit 10
|
|
---- PLAN
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(count(smallint_col))
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| group by: bigint_col, int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(count(smallint_col))
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
06:AGGREGATE [MERGE]
|
|
| output: sum(count(smallint_col))
|
|
| group by: t.bigint_col, int_col
|
|
|
|
|
05:EXCHANGE [HASH(t.bigint_col)]
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| group by: bigint_col, int_col
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# mixed distinct and non-distinct agg with grouping over a union distinct
|
|
select count(smallint_col), count(distinct int_col)
|
|
from
|
|
(select * from functional.alltypes
|
|
union distinct
|
|
select * from functional.alltypessmall) t
|
|
group by t.bigint_col
|
|
limit 10
|
|
---- PLAN
|
|
05:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(count(smallint_col))
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
04:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| group by: bigint_col, int_col
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
10:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
05:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(count(smallint_col))
|
|
| group by: t.bigint_col
|
|
| limit: 10
|
|
|
|
|
09:AGGREGATE [MERGE]
|
|
| output: sum(count(smallint_col))
|
|
| group by: t.bigint_col, int_col
|
|
|
|
|
08:EXCHANGE [HASH(t.bigint_col)]
|
|
|
|
|
04:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| group by: bigint_col, int_col
|
|
|
|
|
07:AGGREGATE [MERGE FINALIZE]
|
|
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
|
|
|
|
06:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
|
|
|
|
03:AGGREGATE
|
|
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# test that aggregations are not placed below an unpartitioned exchange with a limit
|
|
select count(*) from (select * from functional.alltypes limit 10) t
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
limit: 10
|
|
---- DISTRIBUTEDPLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
02:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
limit: 10
|
|
====
|
|
# test that aggregations are not placed below an unpartitioned exchange with a limit
|
|
select count(*) from
|
|
(select * from functional.alltypes
|
|
union all
|
|
(select * from functional.alltypessmall) limit 10) t
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
00:UNION
|
|
| limit: 10
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
00:UNION
|
|
| limit: 10
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|