Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/views.test
Dimitris Tsirogiannis 5a6f53db16 Add partition pruning tests
The following changes are included in this commit:
1. Modified the alltypesagg table to include an additional partition key
that has nulls.
2. Added a number of tests in hdfs.test that exercise the partition
pruning logic (see IMPALA-887).
3. Modified all the tests that are affected by the change in alltypesagg.

Change-Id: I1a769375aaa71273341522eb94490ba5e4c6f00d
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2874
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3236
2014-06-24 02:14:27 -07:00

404 lines
11 KiB
Plaintext

# Basic test with a view.
select int_col, string_col from functional.alltypes_view
---- PLAN
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Basic test with a complex view.
select * from functional.complex_view
---- PLAN
04:TOP-N [LIMIT=100]
| order by: b.string_col ASC
|
03:AGGREGATE [FINALIZE]
| output: count(a.bigint_col)
| group by: b.string_col
| having: count(a.bigint_col) > 1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 size=460B compact
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 size=814.73KB
predicates: a.bigint_col < 50
---- DISTRIBUTEDPLAN
08:MERGING-EXCHANGE [UNPARTITIONED]
| order by: b.string_col ASC
| limit: 100
|
04:TOP-N [LIMIT=100]
| order by: b.string_col ASC
|
07:AGGREGATE [MERGE FINALIZE]
| output: sum(count(a.bigint_col))
| group by: b.string_col
| having: count(a.bigint_col) > 1
|
06:EXCHANGE [HASH(b.string_col)]
|
03:AGGREGATE
| output: count(a.bigint_col)
| group by: b.string_col
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 size=460B
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 size=814.73KB
predicates: a.bigint_col < 50
====
# Basic test with a view on a view
select int_col, string_col from functional.view_view
---- PLAN
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# view used in a union.
select * from functional.alltypes_view union all
select * from functional.alltypes_view where id < 10
---- PLAN
00:UNION
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
| predicates: functional.alltypes.id < 10
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
03:EXCHANGE [UNPARTITIONED]
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
| predicates: functional.alltypes.id < 10
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# view used in an inline view.
select t.id from (select id from functional.alltypes_view) t
where t.id < 10
---- PLAN
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
predicates: functional.alltypes.id < 10
---- DISTRIBUTEDPLAN
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
predicates: functional.alltypes.id < 10
====
# Multiple views used in a join.
select * from functional.alltypes_view t1, functional.alltypes_view_sub t2,
functional.complex_view t3 where t1.id = t2.x and t2.x = t3.abc
---- PLAN
08:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = int_col
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
| predicates: functional.alltypes.int_col > 1
|
07:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = count(a.bigint_col)
|
|--06:TOP-N [LIMIT=100]
| | order by: b.string_col ASC
| |
| 05:AGGREGATE [FINALIZE]
| | output: count(a.bigint_col)
| | group by: b.string_col
| | having: count(a.bigint_col) > 1
| |
| 04:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = b.id
| |
| |--03:SCAN HDFS [functional.alltypestiny b]
| | partitions=4/4 size=460B compact
| |
| 02:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 size=814.73KB
| predicates: a.bigint_col < 50
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
predicates: functional.alltypes.id > 1
---- DISTRIBUTEDPLAN
15:EXCHANGE [UNPARTITIONED]
|
08:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: functional.alltypes.id = int_col
|
|--14:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
| predicates: functional.alltypes.int_col > 1
|
07:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: functional.alltypes.id = count(a.bigint_col)
|
|--13:EXCHANGE [BROADCAST]
| |
| 12:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: b.string_col ASC
| | limit: 100
| |
| 06:TOP-N [LIMIT=100]
| | order by: b.string_col ASC
| |
| 11:AGGREGATE [MERGE FINALIZE]
| | output: sum(count(a.bigint_col))
| | group by: b.string_col
| | having: count(a.bigint_col) > 1
| |
| 10:EXCHANGE [HASH(b.string_col)]
| |
| 05:AGGREGATE
| | output: count(a.bigint_col)
| | group by: b.string_col
| |
| 04:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: a.id = b.id
| |
| |--09:EXCHANGE [BROADCAST]
| | |
| | 03:SCAN HDFS [functional.alltypestiny b]
| | partitions=4/4 size=460B
| |
| 02:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 size=814.73KB
| predicates: a.bigint_col < 50
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
predicates: functional.alltypes.id > 1
====
# Self-join of view to make sure the on clause is properly set
# in the cloned view instances.
select * from functional.alltypes_view t1
inner join functional.alltypes_view t2 on (t1.id = t2.id)
inner join functional.alltypes_view t3 on (t2.id = t3.id)
---- PLAN
04:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
03:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--07:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--06:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
05:EXCHANGE [HASH(functional.alltypes.id)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Self-join views to make sure the using clause is properly set
# in the cloned view instances.
select * from functional.alltypes_view t1
inner join functional.alltypes_view t2 using(id)
inner join functional.alltypes_view t3 using(id)
---- PLAN
04:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
03:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--07:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--06:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
05:EXCHANGE [HASH(functional.alltypes.id)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Self-join of view to make sure the join op is properly set
# in the cloned view instances.
select * from functional.alltypes_view t1
left outer join functional.alltypes_view t2 using(id)
full outer join functional.alltypes_view t3 using(id)
---- PLAN
04:HASH JOIN [FULL OUTER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--07:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--06:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
05:EXCHANGE [HASH(functional.alltypes.id)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Self-join of view to make sure join hints are properly set
# in the cloned view instances.
# Note that in the plan above without hints the first join uses shuffle
# and the second broadcast.
select * from functional.alltypes_view t1
inner join [broadcast] functional.alltypes_view t2 using(id)
inner join [shuffle] functional.alltypes_view t3 using(id)
---- PLAN
04:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
03:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB compact
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--07:EXCHANGE [HASH(functional.alltypes.id)]
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
06:EXCHANGE [HASH(functional.alltypes.id)]
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 size=478.45KB
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# Tests that parentheses are preserved when creating a view
# enabling proper partition pruning for this particular view.
select * from functional.alltypes_parens
---- PLAN
00:SCAN HDFS [functional.alltypes]
partitions=1/24 size=19.95KB
predicates: (int_col < 100 OR bool_col = FALSE)
---- DISTRIBUTEDPLAN
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=1/24 size=19.95KB
predicates: (int_col < 100 OR bool_col = FALSE)
====
# Tests that slotrefs are correctly marked as assigned inside an inline view where
# possible (see IMPALA-923)
select bool_col FROM ( SELECT bool_col FROM functional.alltypes t ) t WHERE t.bool_col
---- PLAN
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 size=478.45KB
predicates: bool_col
---- DISTRIBUTEDPLAN
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 size=478.45KB
predicates: bool_col
====