mirror of
https://github.com/apache/impala.git
synced 2026-01-06 15:01:43 -05:00
Change-Id: I2a317025f9a8549beed7cf79b463239e11a6a2d0 Reviewed-on: http://gerrit.ent.cloudera.com:8080/3352 Reviewed-by: Nong Li <nong@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3432 Reviewed-by: Alex Behm <alex.behm@cloudera.com>
434 lines
11 KiB
Plaintext
434 lines
11 KiB
Plaintext
# distinct *
|
|
select distinct *
|
|
from functional.testtbl
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
02:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
====
|
|
# distinct w/ explicit select list
|
|
select distinct id, zip
|
|
from functional.testtbl
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| group by: id, zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| group by: id, zip
|
|
|
|
|
02:EXCHANGE [HASH(id,zip)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: id, zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
====
|
|
# count(distinct)
|
|
select count(distinct id, zip)
|
|
from functional.testtbl
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(if(id IS NULL, NULL, zip))
|
|
|
|
|
01:AGGREGATE
|
|
| group by: id, zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
---- DISTRIBUTEDPLAN
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(if(id IS NULL, NULL, zip)))
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE]
|
|
| output: count(if(id IS NULL, NULL, zip))
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| group by: id, zip
|
|
|
|
|
03:EXCHANGE [HASH(id,zip)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: id, zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
====
|
|
# count(distinct) w/ grouping
|
|
select tinyint_col, count(distinct int_col, bigint_col)
|
|
from functional.alltypesagg
|
|
group by 1
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(if(int_col IS NULL, NULL, bigint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
01:AGGREGATE
|
|
| group by: tinyint_col, int_col, bigint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(if(int_col IS NULL, NULL, bigint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| group by: tinyint_col, int_col, bigint_col
|
|
|
|
|
03:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: tinyint_col, int_col, bigint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# count(distinct) and sum(distinct) w/ grouping
|
|
select tinyint_col, count(distinct int_col), sum(distinct int_col)
|
|
from functional.alltypesagg
|
|
group by 1
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(int_col)
|
|
| group by: tinyint_col
|
|
|
|
|
01:AGGREGATE
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(int_col)
|
|
| group by: tinyint_col
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
03:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# sum(distinct) w/o grouping
|
|
select sum(distinct int_col)
|
|
from functional.alltypesagg
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(int_col)
|
|
|
|
|
01:AGGREGATE
|
|
| group by: int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(sum(int_col))
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE]
|
|
| output: sum(int_col)
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| group by: int_col
|
|
|
|
|
03:EXCHANGE [HASH(int_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# count(distinct) and sum(distinct) w/ grouping; distinct in min() and max()
|
|
# is ignored
|
|
select tinyint_col, count(distinct int_col),
|
|
min(distinct smallint_col), max(distinct string_col)
|
|
from functional.alltypesagg group by 1
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), min(min(smallint_col)), max(max(string_col))
|
|
| group by: tinyint_col
|
|
|
|
|
01:AGGREGATE
|
|
| output: min(smallint_col), max(string_col)
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), min(min(smallint_col)), max(max(string_col))
|
|
| group by: tinyint_col
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| output: min(min(smallint_col)), max(max(string_col))
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
03:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: min(smallint_col), max(string_col)
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# aggregate fns with and without distinct
|
|
select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col),
|
|
sum(int_col), min(smallint_col), max(bigint_col)
|
|
from functional.alltypesagg group by 1
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(int_col), sum(count(*)), sum(sum(int_col)), min(min(smallint_col)), max(max(bigint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), sum(int_col), min(smallint_col), max(bigint_col)
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col), sum(int_col), sum(count(*)), sum(sum(int_col)), min(min(smallint_col)), max(max(bigint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| output: sum(count(*)), sum(sum(int_col)), min(min(smallint_col)), max(max(bigint_col))
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
03:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), sum(int_col), min(smallint_col), max(bigint_col)
|
|
| group by: tinyint_col, int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# test join on inline views containing distinct aggregates to make sure
|
|
# the aggregation info reports the correct tuple ids (from the 2nd phase
|
|
# distinct aggregation) for the inline-view expression substitution
|
|
select t1.c, t2.c from
|
|
(select count(distinct int_col) as c from functional.alltypestiny) t1 inner join
|
|
(select count(distinct bigint_col) as c from functional.alltypestiny) t2 on (t1.c = t2.c)
|
|
---- PLAN
|
|
06:HASH JOIN [INNER JOIN]
|
|
| hash predicates: count(int_col) = count(bigint_col)
|
|
|
|
|
|--05:AGGREGATE [MERGE FINALIZE]
|
|
| | output: count(bigint_col)
|
|
| |
|
|
| 04:AGGREGATE
|
|
| | group by: bigint_col
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(int_col)
|
|
|
|
|
01:AGGREGATE
|
|
| group by: int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
06:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: count(int_col) = count(bigint_col)
|
|
|
|
|
|--15:EXCHANGE [UNPARTITIONED]
|
|
| |
|
|
| 14:AGGREGATE [MERGE FINALIZE]
|
|
| | output: sum(count(bigint_col))
|
|
| |
|
|
| 13:EXCHANGE [UNPARTITIONED]
|
|
| |
|
|
| 05:AGGREGATE [MERGE]
|
|
| | output: count(bigint_col)
|
|
| |
|
|
| 12:AGGREGATE [MERGE]
|
|
| | group by: bigint_col
|
|
| |
|
|
| 11:EXCHANGE [HASH(bigint_col)]
|
|
| |
|
|
| 04:AGGREGATE
|
|
| | group by: bigint_col
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
10:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(int_col))
|
|
|
|
|
09:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE]
|
|
| output: count(int_col)
|
|
|
|
|
08:AGGREGATE [MERGE]
|
|
| group by: int_col
|
|
|
|
|
07:EXCHANGE [HASH(int_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: int_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|
|
# Test placement of having predicate into 2nd phase merge agg for
|
|
# distinct + non-distinct aggregates without group by (IMPALA-845).
|
|
# TODO: Fix the incorrect labels for non-distinct agg expr after the
|
|
# 1st phase merge. We'd need to create more smaps during analysis
|
|
# because there are more than two levels of merging for the
|
|
# non-distinct agg expr.
|
|
select count(distinct tinyint_col) from functional.alltypes
|
|
having count(bigint_col) > 0
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(tinyint_col), sum(count(bigint_col))
|
|
| having: zeroifnull(sum(count(bigint_col))) > 0
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(bigint_col)
|
|
| group by: tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(tinyint_col)), sum(sum(count(bigint_col)))
|
|
| having: zeroifnull(sum(count(bigint_col))) > 0
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE]
|
|
| output: count(tinyint_col), sum(count(bigint_col))
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| output: sum(count(bigint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
03:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(bigint_col)
|
|
| group by: tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# test slot materialization on a distinct agg inside an inline view
|
|
# triggered by a predicate in an outer query block (IMPALA-861)
|
|
select 1 from
|
|
(select count(distinct 1) x from functional.alltypes) t
|
|
where t.x is not null
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(1)
|
|
| having: count(1) IS NOT NULL
|
|
|
|
|
01:AGGREGATE
|
|
| group by: 1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(1))
|
|
| having: count(1) IS NOT NULL
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE]
|
|
| output: count(1)
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| group by: 1
|
|
|
|
|
03:EXCHANGE [HASH(1)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: 1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# test slot materialization on a distinct agg inside an inline view
|
|
# triggered by a predicate in an outer query block (IMPALA-861)
|
|
select 1 from
|
|
(select count(distinct 1) x, count(1) y from functional.alltypes) t
|
|
where t.x + t.y > 10 and t.x > 0 and t.y > 1
|
|
---- PLAN
|
|
02:AGGREGATE [MERGE FINALIZE]
|
|
| output: count(1), sum(count(1))
|
|
| having: count(1) > 0, count(1) + zeroifnull(sum(count(1))) > 10, zeroifnull(sum(count(1))) > 1
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(1)
|
|
| group by: 1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(1)), sum(sum(count(1)))
|
|
| having: count(1) > 0, count(1) + zeroifnull(sum(count(1))) > 10, zeroifnull(sum(count(1))) > 1
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE [MERGE]
|
|
| output: count(1), sum(count(1))
|
|
|
|
|
04:AGGREGATE [MERGE]
|
|
| output: sum(count(1))
|
|
| group by: 1
|
|
|
|
|
03:EXCHANGE [HASH(1)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(1)
|
|
| group by: 1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|