Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test
Alex Behm 21c9eb68b1 Restore casts stripped from grouping exprs by substitution.
Change-Id: I2a317025f9a8549beed7cf79b463239e11a6a2d0
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3352
Reviewed-by: Nong Li <nong@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3432
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
2014-07-08 10:45:43 -07:00

434 lines
11 KiB
Plaintext

# distinct *
select distinct *
from functional.testtbl
---- PLAN
01:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [MERGE FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
02:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
01:AGGREGATE
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
# distinct w/ explicit select list
select distinct id, zip
from functional.testtbl
---- PLAN
01:AGGREGATE [FINALIZE]
| group by: id, zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [MERGE FINALIZE]
| group by: id, zip
|
02:EXCHANGE [HASH(id,zip)]
|
01:AGGREGATE
| group by: id, zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
# count(distinct)
select count(distinct id, zip)
from functional.testtbl
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(if(id IS NULL, NULL, zip))
|
01:AGGREGATE
| group by: id, zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
---- DISTRIBUTEDPLAN
06:AGGREGATE [MERGE FINALIZE]
| output: sum(count(if(id IS NULL, NULL, zip)))
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE]
| output: count(if(id IS NULL, NULL, zip))
|
04:AGGREGATE [MERGE]
| group by: id, zip
|
03:EXCHANGE [HASH(id,zip)]
|
01:AGGREGATE
| group by: id, zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 size=0B
====
# count(distinct) w/ grouping
select tinyint_col, count(distinct int_col, bigint_col)
from functional.alltypesagg
group by 1
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(if(int_col IS NULL, NULL, bigint_col))
| group by: tinyint_col
|
01:AGGREGATE
| group by: tinyint_col, int_col, bigint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE FINALIZE]
| output: count(if(int_col IS NULL, NULL, bigint_col))
| group by: tinyint_col
|
04:AGGREGATE [MERGE]
| group by: tinyint_col, int_col, bigint_col
|
03:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE
| group by: tinyint_col, int_col, bigint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# count(distinct) and sum(distinct) w/ grouping
select tinyint_col, count(distinct int_col), sum(distinct int_col)
from functional.alltypesagg
group by 1
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(int_col)
| group by: tinyint_col
|
01:AGGREGATE
| group by: tinyint_col, int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(int_col)
| group by: tinyint_col
|
04:AGGREGATE [MERGE]
| group by: tinyint_col, int_col
|
03:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE
| group by: tinyint_col, int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# sum(distinct) w/o grouping
select sum(distinct int_col)
from functional.alltypesagg
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: sum(int_col)
|
01:AGGREGATE
| group by: int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
06:AGGREGATE [MERGE FINALIZE]
| output: sum(sum(int_col))
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE]
| output: sum(int_col)
|
04:AGGREGATE [MERGE]
| group by: int_col
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE
| group by: int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# count(distinct) and sum(distinct) w/ grouping; distinct in min() and max()
# is ignored
select tinyint_col, count(distinct int_col),
min(distinct smallint_col), max(distinct string_col)
from functional.alltypesagg group by 1
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), min(min(smallint_col)), max(max(string_col))
| group by: tinyint_col
|
01:AGGREGATE
| output: min(smallint_col), max(string_col)
| group by: tinyint_col, int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), min(min(smallint_col)), max(max(string_col))
| group by: tinyint_col
|
04:AGGREGATE [MERGE]
| output: min(min(smallint_col)), max(max(string_col))
| group by: tinyint_col, int_col
|
03:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE
| output: min(smallint_col), max(string_col)
| group by: tinyint_col, int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# aggregate fns with and without distinct
select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col),
sum(int_col), min(smallint_col), max(bigint_col)
from functional.alltypesagg group by 1
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(int_col), sum(count(*)), sum(sum(int_col)), min(min(smallint_col)), max(max(bigint_col))
| group by: tinyint_col
|
01:AGGREGATE
| output: count(*), sum(int_col), min(smallint_col), max(bigint_col)
| group by: tinyint_col, int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col), sum(int_col), sum(count(*)), sum(sum(int_col)), min(min(smallint_col)), max(max(bigint_col))
| group by: tinyint_col
|
04:AGGREGATE [MERGE]
| output: sum(count(*)), sum(sum(int_col)), min(min(smallint_col)), max(max(bigint_col))
| group by: tinyint_col, int_col
|
03:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE
| output: count(*), sum(int_col), min(smallint_col), max(bigint_col)
| group by: tinyint_col, int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 size=814.73KB
====
# test join on inline views containing distinct aggregates to make sure
# the aggregation info reports the correct tuple ids (from the 2nd phase
# distinct aggregation) for the inline-view expression substitution
select t1.c, t2.c from
(select count(distinct int_col) as c from functional.alltypestiny) t1 inner join
(select count(distinct bigint_col) as c from functional.alltypestiny) t2 on (t1.c = t2.c)
---- PLAN
06:HASH JOIN [INNER JOIN]
| hash predicates: count(int_col) = count(bigint_col)
|
|--05:AGGREGATE [MERGE FINALIZE]
| | output: count(bigint_col)
| |
| 04:AGGREGATE
| | group by: bigint_col
| |
| 03:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 size=460B
|
02:AGGREGATE [MERGE FINALIZE]
| output: count(int_col)
|
01:AGGREGATE
| group by: int_col
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 size=460B
---- DISTRIBUTEDPLAN
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: count(int_col) = count(bigint_col)
|
|--15:EXCHANGE [UNPARTITIONED]
| |
| 14:AGGREGATE [MERGE FINALIZE]
| | output: sum(count(bigint_col))
| |
| 13:EXCHANGE [UNPARTITIONED]
| |
| 05:AGGREGATE [MERGE]
| | output: count(bigint_col)
| |
| 12:AGGREGATE [MERGE]
| | group by: bigint_col
| |
| 11:EXCHANGE [HASH(bigint_col)]
| |
| 04:AGGREGATE
| | group by: bigint_col
| |
| 03:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 size=460B
|
10:AGGREGATE [MERGE FINALIZE]
| output: sum(count(int_col))
|
09:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE]
| output: count(int_col)
|
08:AGGREGATE [MERGE]
| group by: int_col
|
07:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE
| group by: int_col
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 size=460B
====
# Test placement of having predicate into 2nd phase merge agg for
# distinct + non-distinct aggregates without group by (IMPALA-845).
# TODO: Fix the incorrect labels for non-distinct agg expr after the
# 1st phase merge. We'd need to create more smaps during analysis
# because there are more than two levels of merging for the
# non-distinct agg expr.
select count(distinct tinyint_col) from functional.alltypes
having count(bigint_col) > 0
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(tinyint_col), sum(count(bigint_col))
| having: zeroifnull(sum(count(bigint_col))) > 0
|
01:AGGREGATE
| output: count(bigint_col)
| group by: tinyint_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
06:AGGREGATE [MERGE FINALIZE]
| output: sum(count(tinyint_col)), sum(sum(count(bigint_col)))
| having: zeroifnull(sum(count(bigint_col))) > 0
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE]
| output: count(tinyint_col), sum(count(bigint_col))
|
04:AGGREGATE [MERGE]
| output: sum(count(bigint_col))
| group by: tinyint_col
|
03:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE
| output: count(bigint_col)
| group by: tinyint_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test slot materialization on a distinct agg inside an inline view
# triggered by a predicate in an outer query block (IMPALA-861)
select 1 from
(select count(distinct 1) x from functional.alltypes) t
where t.x is not null
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(1)
| having: count(1) IS NOT NULL
|
01:AGGREGATE
| group by: 1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
06:AGGREGATE [MERGE FINALIZE]
| output: sum(count(1))
| having: count(1) IS NOT NULL
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE]
| output: count(1)
|
04:AGGREGATE [MERGE]
| group by: 1
|
03:EXCHANGE [HASH(1)]
|
01:AGGREGATE
| group by: 1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====
# test slot materialization on a distinct agg inside an inline view
# triggered by a predicate in an outer query block (IMPALA-861)
select 1 from
(select count(distinct 1) x, count(1) y from functional.alltypes) t
where t.x + t.y > 10 and t.x > 0 and t.y > 1
---- PLAN
02:AGGREGATE [MERGE FINALIZE]
| output: count(1), sum(count(1))
| having: count(1) > 0, count(1) + zeroifnull(sum(count(1))) > 10, zeroifnull(sum(count(1))) > 1
|
01:AGGREGATE
| output: count(1)
| group by: 1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
---- DISTRIBUTEDPLAN
06:AGGREGATE [MERGE FINALIZE]
| output: sum(count(1)), sum(sum(count(1)))
| having: count(1) > 0, count(1) + zeroifnull(sum(count(1))) > 10, zeroifnull(sum(count(1))) > 1
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [MERGE]
| output: count(1), sum(count(1))
|
04:AGGREGATE [MERGE]
| output: sum(count(1))
| group by: 1
|
03:EXCHANGE [HASH(1)]
|
01:AGGREGATE
| output: count(1)
| group by: 1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 size=478.45KB
====