Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/order.test
Dimitris Tsirogiannis 5a6f53db16 Add partition pruning tests
The following changes are included in this commit:
1. Modified the alltypesagg table to include an additional partition key
that has nulls.
2. Added a number of tests in hdfs.test that exercise the partition
pruning logic (see IMPALA-887).
3. Modified all the tests that are affected by the change in alltypesagg.

Change-Id: I1a769375aaa71273341522eb94490ba5e4c6f00d
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2874
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3236
2014-06-24 02:14:27 -07:00

1113 lines
30 KiB
Plaintext

select name, zip
from functional.testtbl
order by name offset 5
---- PLAN
01:SORT
| order by: name ASC
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
02:MERGING-EXCHANGE [UNPARTITIONED]
| offset: 5
| order by: name ASC
|
01:SORT
| order by: name ASC
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
select name, zip
from functional.testtbl
order by name
---- PLAN
01:SORT
| order by: name ASC
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
02:MERGING-EXCHANGE [UNPARTITIONED]
| order by: name ASC
|
01:SORT
| order by: name ASC
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
select zip, count(*)
from functional.testtbl
where name like 'm%'
group by 1
order by 2 desc
---- PLAN
02:SORT
| order by: count(*) DESC
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| group by: zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
predicates: name LIKE 'm%'
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: count(*) DESC
|
02:SORT
| order by: count(*) DESC
|
04:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*))
| group by: zip
|
03:EXCHANGE [HASH(zip)]
|
01:AGGREGATE
| output: count(*)
| group by: zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
predicates: name LIKE 'm%'
====
select int_col, sum(float_col)
from functional_hbase.alltypessmall
where id < 5
group by 1
order by 2
---- PLAN
02:SORT
| order by: sum(float_col) ASC
|
01:AGGREGATE [FINALIZE]
| output: sum(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
predicates: id < 5
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: sum(float_col) ASC
|
02:SORT
| order by: sum(float_col) ASC
|
04:AGGREGATE [MERGE FINALIZE]
| output: sum(sum(float_col))
| group by: int_col
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE
| output: sum(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
predicates: id < 5
====
select int_col, sum(float_col), min(float_col)
from functional_hbase.alltypessmall
group by 1
order by 2,3 desc
---- PLAN
02:SORT
| order by: sum(float_col) ASC, min(float_col) DESC
|
01:AGGREGATE [FINALIZE]
| output: sum(float_col), min(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: sum(float_col) ASC, min(float_col) DESC
|
02:SORT
| order by: sum(float_col) ASC, min(float_col) DESC
|
04:AGGREGATE [MERGE FINALIZE]
| output: sum(sum(float_col)), min(min(float_col))
| group by: int_col
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE
| output: sum(float_col), min(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
====
# Test that the sort is on int_col and not on the id column
select int_col as id from functional.alltypessmall order by id
---- PLAN
01:SORT
| order by: int_col ASC
|
00:SCAN HDFS [functional.alltypessmall]
partitions=4/4 size=6.32KB
---- DISTRIBUTEDPLAN
02:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC
|
01:SORT
| order by: int_col ASC
|
00:SCAN HDFS [functional.alltypessmall]
partitions=4/4 size=6.32KB
====
# Test that the sort is on id and not on int_col
select int_col as id from functional.alltypessmall order by functional.alltypessmall.id
---- PLAN
01:SORT
| order by: id ASC
|
00:SCAN HDFS [functional.alltypessmall]
partitions=4/4 size=6.32KB
---- DISTRIBUTEDPLAN
02:MERGING-EXCHANGE [UNPARTITIONED]
| order by: id ASC
|
01:SORT
| order by: id ASC
|
00:SCAN HDFS [functional.alltypessmall]
partitions=4/4 size=6.32KB
====
# test distributed sort over a union
select int_col, bigint_col from
(select * from functional.alltypes
union all
select * from functional.alltypessmall) t
order by int_col desc offset 5
---- PLAN
03:SORT
| order by: int_col DESC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
04:MERGING-EXCHANGE [UNPARTITIONED]
| offset: 5
| order by: int_col DESC
|
03:SORT
| order by: int_col DESC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test distributed sort over a union distinct
select int_col, bigint_col from
(select * from functional.alltypes
union distinct
select * from functional.alltypessmall) t
order by int_col desc offset 5
---- PLAN
04:SORT
| order by: int_col DESC
|
03:AGGREGATE [FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
07:MERGING-EXCHANGE [UNPARTITIONED]
| offset: 5
| order by: int_col DESC
|
04:SORT
| order by: int_col DESC
|
06:AGGREGATE [MERGE FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
05:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
03:AGGREGATE
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# simple join
select j.*, d.* from functional.JoinTbl j full outer join functional.DimTbl d
on (j.test_id = d.id)
order by j.test_id, j.test_name, j.test_zip, j.alltypes_id, d.name
---- PLAN
03:SORT
| order by: test_id ASC, test_name ASC, test_zip ASC, alltypes_id ASC, name ASC
|
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: j.test_id = d.id
|
|--01:SCAN HDFS [functional.dimtbl d]
| partitions=1/1 size=171B compact
|
00:SCAN HDFS [functional.jointbl j]
partitions=1/1 size=433B
---- DISTRIBUTEDPLAN
06:MERGING-EXCHANGE [UNPARTITIONED]
| order by: test_id ASC, test_name ASC, test_zip ASC, alltypes_id ASC, name ASC
|
03:SORT
| order by: test_id ASC, test_name ASC, test_zip ASC, alltypes_id ASC, name ASC
|
02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: j.test_id = d.id
|
|--05:EXCHANGE [HASH(d.id)]
| |
| 01:SCAN HDFS [functional.dimtbl d]
| partitions=1/1 size=171B
|
04:EXCHANGE [HASH(j.test_id)]
|
00:SCAN HDFS [functional.jointbl j]
partitions=1/1 size=433B
====
# more joins
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from functional.alltypesagg a
join functional.alltypessmall b on (a.smallint_col = b.id)
join functional.alltypessmall c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
order by c.string_col desc, a.smallint_col
---- PLAN
05:SORT
| order by: string_col DESC, smallint_col ASC
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.tinyint_col = c.id
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|--02:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 size=6.32KB compact
| predicates: c.string_col < '7'
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.smallint_col = b.id
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 size=6.32KB compact
| predicates: b.float_col > 4.5
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=1/11 size=73.39KB
predicates: a.int_col > 899
---- DISTRIBUTEDPLAN
08:MERGING-EXCHANGE [UNPARTITIONED]
| order by: string_col DESC, smallint_col ASC
|
05:SORT
| order by: string_col DESC, smallint_col ASC
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.tinyint_col = c.id
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|--07:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 size=6.32KB
| predicates: c.string_col < '7'
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.smallint_col = b.id
|
|--06:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 size=6.32KB
| predicates: b.float_col > 4.5
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=1/11 size=73.39KB
predicates: a.int_col > 899
====
# agg in ordering
select int_col, count(*), avg(tinyint_col)
from functional.alltypesagg
group by 1
order by avg(tinyint_col)
---- PLAN
02:SORT
| order by: sum(tinyint_col) / count(tinyint_col) ASC
|
01:AGGREGATE [FINALIZE]
| output: count(*), sum(tinyint_col), count(tinyint_col)
| group by: int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: sum(tinyint_col) / count(tinyint_col) ASC
|
02:SORT
| order by: sum(tinyint_col) / count(tinyint_col) ASC
|
04:AGGREGATE [MERGE FINALIZE]
| output: sum(count(*)), sum(sum(tinyint_col)), sum(count(tinyint_col))
| group by: int_col
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE
| output: count(*), sum(tinyint_col), count(tinyint_col)
| group by: int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# outer join
select t1.id, t1.int_col, t2.id, t2.int_col
from functional.alltypesagg t1
left outer join functional.alltypessmall t2
on (t1.int_col = t2.int_col)
order by t1.id,t2.id
---- PLAN
03:SORT
| order by: id ASC, id ASC
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.int_col = t2.int_col
|
|--01:SCAN HDFS [functional.alltypessmall t2]
| partitions=4/4 size=6.32KB compact
|
00:SCAN HDFS [functional.alltypesagg t1]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: id ASC, id ASC
|
03:SORT
| order by: id ASC, id ASC
|
02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t1.int_col = t2.int_col
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypessmall t2]
| partitions=4/4 size=6.32KB
|
00:SCAN HDFS [functional.alltypesagg t1]
partitions=11/11 size=814.73KB
====
# cross join
select t1.id, t2.id from functional.alltypestiny t1 cross join functional.alltypestiny t2
where (t1.id < 3 and t2.id < 3)
order by t1.id, t2.id
---- PLAN
03:SORT
| order by: id ASC, id ASC
|
02:CROSS JOIN [BROADCAST]
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 size=460B compact
| predicates: t2.id < 3
|
00:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 size=460B
predicates: t1.id < 3
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: id ASC, id ASC
|
03:SORT
| order by: id ASC, id ASC
|
02:CROSS JOIN [BROADCAST]
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 size=460B
| predicates: t2.id < 3
|
00:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 size=460B
predicates: t1.id < 3
====
# union queries with mutiple operands/union types.
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=1
union all
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=1
union all
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2
union distinct
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2)
order by 1
---- PLAN
06:SORT
| order by: id ASC
|
05:AGGREGATE [FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--04:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--03:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
01:SCAN HDFS [functional.alltypestiny]
partitions=1/4 size=115B
---- DISTRIBUTEDPLAN
09:MERGING-EXCHANGE [UNPARTITIONED]
| order by: id ASC
|
06:SORT
| order by: id ASC
|
08:AGGREGATE [MERGE FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
07:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
05:AGGREGATE
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--04:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--03:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
01:SCAN HDFS [functional.alltypestiny]
partitions=1/4 size=115B
====
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=1
union distinct
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=1
union all
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2
union all
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2)
order by 1,2
---- PLAN
07:SORT
| order by: id ASC, bool_col ASC
|
04:UNION
|
|--03:AGGREGATE [FINALIZE]
| | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| |
| 00:UNION
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | partitions=1/4 size=115B
| |
| 01:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--06:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
05:SCAN HDFS [functional.alltypestiny]
partitions=1/4 size=115B
---- DISTRIBUTEDPLAN
10:MERGING-EXCHANGE [UNPARTITIONED]
| order by: id ASC, bool_col ASC
|
07:SORT
| order by: id ASC, bool_col ASC
|
04:UNION
|
|--06:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--05:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
09:AGGREGATE [MERGE FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
08:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
03:AGGREGATE
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
01:SCAN HDFS [functional.alltypestiny]
partitions=1/4 size=115B
====
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=1
union distinct
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=1
union all
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2)
union distinct
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=2
union all
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=3)
order by 1 limit 3)
union all
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=3
union all
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=4)
union all
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=4
union all
(select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month from functional.alltypestiny where year=2009 and month=5)
order by 1 limit 3)
order by 12, 13, 1
---- PLAN
16:SORT
| order by: year ASC, month ASC, id ASC
|
09:UNION
|
|--08:AGGREGATE [FINALIZE]
| | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| |
| 00:UNION
| |
| |--07:TOP-N [LIMIT=3]
| | | order by: id ASC
| | |
| | 04:UNION
| | |
| | |--06:SCAN HDFS [functional.alltypestiny]
| | | partitions=1/4 size=115B
| | |
| | 05:SCAN HDFS [functional.alltypestiny]
| | partitions=1/4 size=115B
| |
| |--03:SCAN HDFS [functional.alltypestiny]
| | partitions=1/4 size=115B
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | partitions=1/4 size=115B
| |
| 01:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--15:TOP-N [LIMIT=3]
| | order by: id ASC
| |
| 12:UNION
| |
| |--14:SCAN HDFS [functional.alltypestiny]
| | partitions=0/4 size=0B
| |
| 13:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--11:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
10:SCAN HDFS [functional.alltypestiny]
partitions=1/4 size=115B
---- DISTRIBUTEDPLAN
23:MERGING-EXCHANGE [UNPARTITIONED]
| order by: year ASC, month ASC, id ASC
|
16:SORT
| order by: year ASC, month ASC, id ASC
|
09:UNION
|
|--11:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--10:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--22:EXCHANGE [RANDOM]
| |
| 17:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: id ASC
| | limit: 3
| |
| 15:TOP-N [LIMIT=3]
| | order by: id ASC
| |
| 12:UNION
| |
| |--14:SCAN HDFS [functional.alltypestiny]
| | partitions=0/4 size=0B
| |
| 13:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
21:AGGREGATE [MERGE FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
20:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
|
08:AGGREGATE
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
00:UNION
|
|--03:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
|--01:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
19:EXCHANGE [RANDOM]
|
18:MERGING-EXCHANGE [UNPARTITIONED]
| order by: id ASC
| limit: 3
|
07:TOP-N [LIMIT=3]
| order by: id ASC
|
04:UNION
|
|--06:SCAN HDFS [functional.alltypestiny]
| partitions=1/4 size=115B
|
05:SCAN HDFS [functional.alltypestiny]
partitions=1/4 size=115B
====
# Sort over top-n
select * from (select * from functional.alltypes order by bigint_col limit 10) t
order by int_col
---- PLAN
02:SORT
| order by: int_col ASC
|
01:TOP-N [LIMIT=10]
| order by: bigint_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
02:SORT
| order by: int_col ASC
|
03:MERGING-EXCHANGE [UNPARTITIONED]
| order by: bigint_col ASC
| limit: 10
|
01:TOP-N [LIMIT=10]
| order by: bigint_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Sort over top-n over union
select * from
(select * from functional.alltypes
union all
(select * from functional.alltypessmall) order by bigint_col limit 10) t
order by int_col
---- PLAN
04:SORT
| order by: int_col ASC
|
03:TOP-N [LIMIT=10]
| order by: bigint_col ASC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
04:SORT
| order by: int_col ASC
|
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: bigint_col ASC
| limit: 10
|
03:TOP-N [LIMIT=10]
| order by: bigint_col ASC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# ignore order by with no limit in subqueries and with clause
with B as (select * from functional.alltypes order by int_col)
select * from
(select * from functional.alltypes order by bigint_col) A
join B on (A.string_col = B.string_col)
---- PLAN
02:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.string_col = functional.alltypes.string_col
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.string_col = functional.alltypes.string_col
|
|--04:EXCHANGE [HASH(functional.alltypes.string_col)]
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
03:EXCHANGE [HASH(functional.alltypes.string_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# ignore order by with no limit in insert
insert into functional.alltypes partition(year, month)
select * from functional.alltypes order by int_col
---- PLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(functional.alltypes.year,functional.alltypes.month)]
| partitions=24
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(functional.alltypes.year,functional.alltypes.month)]
| partitions=24
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# ignore order by with no limit in CTAS
create table functional.alltypescopy as
select * from functional.alltypes order by bigint_col
---- PLAN
WRITE TO HDFS [functional.alltypescopy, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypescopy, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# ignore order by with no limit in union operand
select * from functional.alltypes
union all
select * from functional.alltypessmall order by bigint_col
---- PLAN
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
03:EXCHANGE [UNPARTITIONED]
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Do not ignore sort over union output
select * from functional.alltypes
union all
(select * from functional.alltypessmall) order by bigint_col
---- PLAN
03:SORT
| order by: bigint_col ASC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
04:MERGING-EXCHANGE [UNPARTITIONED]
| order by: bigint_col ASC
|
03:SORT
| order by: bigint_col ASC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Multiple union operands with valid order-by clauses
select int_col from functional.alltypes order by int_col
union select int_col from functional.alltypes order by int_col limit 10
union (select int_col from functional.alltypes order by int_col limit 10 offset 5)
order by int_col offset 5
---- PLAN
07:SORT
| order by: int_col ASC
|
06:AGGREGATE [FINALIZE]
| group by: int_col
|
00:UNION
|
|--05:TOP-N [LIMIT=10 OFFSET=5]
| | order by: int_col ASC
| |
| 04:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
|--03:TOP-N [LIMIT=10]
| | order by: int_col ASC
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
14:MERGING-EXCHANGE [UNPARTITIONED]
| offset: 5
| order by: int_col ASC
|
07:SORT
| order by: int_col ASC
|
13:AGGREGATE [MERGE FINALIZE]
| group by: int_col
|
12:EXCHANGE [HASH(int_col)]
|
06:AGGREGATE
| group by: int_col
|
00:UNION
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
|--11:EXCHANGE [RANDOM]
| |
| 09:MERGING-EXCHANGE [UNPARTITIONED]
| | offset: 5
| | order by: int_col ASC
| | limit: 10
| |
| 05:TOP-N [LIMIT=15]
| | order by: int_col ASC
| |
| 04:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
10:EXCHANGE [RANDOM]
|
08:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC
| limit: 10
|
03:TOP-N [LIMIT=10]
| order by: int_col ASC
|
02:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Test slot materialization
select * from
(select 1 x, bigint_col y from
(select * from
(select int_col, bigint_col, tinyint_col, smallint_col, id, float_col
from functional.alltypes
order by tinyint_col limit 100
union all
(select int_col, bigint_col, tinyint_col, smallint_col, id, float_col
from functional.alltypes
group by float_col, bigint_col, int_col, tinyint_col, smallint_col, id
order by int_col limit 100)
order by bigint_col limit 100) t1
where smallint_col < 5 order by int_col limit 100) t2
where id < 10 group by bigint_col
having sum(float_col) > 10) t3
order by x
---- PLAN
11:SORT
| order by: x ASC
|
10:AGGREGATE [FINALIZE]
| output: sum(float_col)
| group by: bigint_col
| having: sum(float_col) > 10
|
09:SELECT
| predicates: id < 10
|
08:TOP-N [LIMIT=100]
| order by: int_col ASC
|
07:SELECT
| predicates: smallint_col < 5
|
06:TOP-N [LIMIT=100]
| order by: bigint_col ASC
|
00:UNION
|
|--05:TOP-N [LIMIT=100]
| | order by: int_col ASC
| |
| 04:AGGREGATE [FINALIZE]
| | group by: float_col, bigint_col, int_col, tinyint_col, smallint_col, id
| |
| 03:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
02:TOP-N [LIMIT=100]
| order by: tinyint_col ASC
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
11:SORT
| order by: x ASC
|
10:AGGREGATE [FINALIZE]
| output: sum(float_col)
| group by: bigint_col
| having: sum(float_col) > 10
|
09:SELECT
| predicates: id < 10
|
08:TOP-N [LIMIT=100]
| order by: int_col ASC
|
07:SELECT
| predicates: smallint_col < 5
|
06:TOP-N [LIMIT=100]
| order by: bigint_col ASC
|
00:UNION
|
|--15:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: int_col ASC
| | limit: 100
| |
| 05:TOP-N [LIMIT=100]
| | order by: int_col ASC
| |
| 14:AGGREGATE [MERGE FINALIZE]
| | group by: float_col, bigint_col, int_col, tinyint_col, smallint_col, id
| |
| 13:EXCHANGE [HASH(float_col,bigint_col,int_col,tinyint_col,smallint_col,id)]
| |
| 04:AGGREGATE
| | group by: float_col, bigint_col, int_col, tinyint_col, smallint_col, id
| |
| 03:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
12:MERGING-EXCHANGE [UNPARTITIONED]
| order by: tinyint_col ASC
| limit: 100
|
02:TOP-N [LIMIT=100]
| order by: tinyint_col ASC
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Test slot materialization IMPALA-1006
select int_col from
(select int_col, bigint_col
from functional.alltypesagg
union all
(select int_col, bigint_col from functional.alltypesagg)
order by bigint_col limit 10) A
---- PLAN
03:TOP-N [LIMIT=10]
| order by: bigint_col ASC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 size=814.73KB
|
01:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
04:MERGING-EXCHANGE [UNPARTITIONED]
| order by: bigint_col ASC
| limit: 10
|
03:TOP-N [LIMIT=10]
| order by: bigint_col ASC
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 size=814.73KB
|
01:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====