mirror of
https://github.com/apache/impala.git
synced 2026-02-03 00:00:40 -05:00
This adds a new version of the pre-existing partition key scan optimization that always returns correct results, even when files have zero rows. This new version is always enabled by default. The old existing optimization, which does a metadata-only query, is still enabled behind the OPTIMIZE_PARTITION_KEY_SCANS query option. The new version of the optimization must scan the files to see if they are non-empty. Instead of using metadata only, the planner instructs the backend to short-circuit HDFS scans after a single row has been returned from each file. This gives results equivalent to returning all the rows from each file, because all rows in the file belong to the same partition and therefore have identical values for any columns that are partition key values. Planner cardinality estimates are adjusted accordingly to enable potentially better plans and other optimisations like disabling codegen. We make some effort to avoid generated extra scan ranges for remote scans by only generating one range per remote file. The backend optimisation is implemented by constructing a row batch with capacity for a single row only and then terminating each scan range once a single row has been produced. Both Parquet and ORC have optimized code paths for zero slot table scans that mean this will only result in a footer read. (Other file formats still need to read some portion of the file, but can terminate early once one row has been produced.) This should be quite efficient in practice with file handle caching and data caching enabled, because it then only requires reading the footer from the cache for each file. The partition key scan optimization is also slightly generalised to apply to scans of unpartitioned tables where no slots are materialized. A limitation of the optimization where it did not apply to multiple grouping classes was also fixed. Limitations: * This still scans every file in the partition. I.e. there is no short-circuiting if a row has already been found in the partition by the current scan node. * Resource reservations and estimates for the scan node do not all take into account this optimisation, so are conservative - they assume the whole file is scanned. Testing: * Added end-to-end tests that execute the query on all HDFS file formats and verify that the correct number of rows flow through the plan. * Added planner test based on the existing test partition key scan test. * Added test to make sure single node optimisation kicks in when expected. * Add test for cardinality estimates with and without stats * Added test for unpartitioned tables. * Added planner test that checks that optimisation is enabled for multiple aggregation classes. * Added a targeted perf test. Change-Id: I26c87525a4f75ffeb654267b89948653b2e1ff8c Reviewed-on: http://gerrit.cloudera.org:8080/13993 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
1115 lines
35 KiB
Plaintext
1115 lines
35 KiB
Plaintext
# A subset of aggregation classes are materialized. No group by.
|
|
select a, c, e1, e2, e3 from
|
|
(select count(distinct tinyint_col) a, avg(distinct smallint_col) b,
|
|
count(distinct int_col) c, avg(distinct bigint_col) d,
|
|
min(float_col) e1, max(float_col) e2, sum(double_col) e3
|
|
from functional.alltypes) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|
|
| row-size=32B cardinality=1
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| Class 1
|
|
| output: count(int_col)
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| row-size=32B cardinality=3
|
|
|
|
|
01:AGGREGATE
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: int_col
|
|
| Class 2
|
|
| output: min(float_col), max(float_col), sum(double_col)
|
|
| row-size=21B cardinality=21
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=17B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|
|
| row-size=32B cardinality=1
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count:merge(tinyint_col)
|
|
| Class 1
|
|
| output: count:merge(int_col)
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| row-size=32B cardinality=3
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| Class 1
|
|
| output: count(int_col)
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| row-size=32B cardinality=3
|
|
|
|
|
05:AGGREGATE
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: int_col
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| row-size=21B cardinality=21
|
|
|
|
|
04:EXCHANGE [HASH(CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 6 THEN murmur_hash(int_col) WHEN 11 THEN 0 END)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: int_col
|
|
| Class 2
|
|
| output: min(float_col), max(float_col), sum(double_col)
|
|
| row-size=21B cardinality=21
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=17B cardinality=7.30K
|
|
====
|
|
# A subset of aggregation classes are materialized. A subset of aggregation functions
|
|
# of the surviving classes are materialized. No group by.
|
|
select a1, c2, e2 from
|
|
(select count(distinct tinyint_col) a1, avg(distinct tinyint_col) a2,
|
|
count(distinct smallint_col) b1, avg(distinct smallint_col) b2,
|
|
count(distinct int_col) c1, avg(distinct int_col) c2,
|
|
count(distinct bigint_col) d1, avg(distinct bigint_col) d2,
|
|
min(float_col) e1, max(float_col) e2, sum(double_col) e3
|
|
from functional.alltypes) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|
|
| row-size=20B cardinality=1
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| Class 1
|
|
| output: avg(int_col)
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| row-size=20B cardinality=3
|
|
|
|
|
01:AGGREGATE
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: int_col
|
|
| Class 2
|
|
| output: max(float_col)
|
|
| row-size=9B cardinality=21
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=9B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|
|
| row-size=20B cardinality=1
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count:merge(tinyint_col)
|
|
| Class 1
|
|
| output: avg:merge(int_col)
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| row-size=20B cardinality=3
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| Class 1
|
|
| output: avg(int_col)
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| row-size=20B cardinality=3
|
|
|
|
|
05:AGGREGATE
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: int_col
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| row-size=9B cardinality=21
|
|
|
|
|
04:EXCHANGE [HASH(CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 7 THEN murmur_hash(int_col) WHEN 13 THEN 0 END)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: int_col
|
|
| Class 2
|
|
| output: max(float_col)
|
|
| row-size=9B cardinality=21
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=9B cardinality=7.30K
|
|
====
|
|
# A subset of aggregation classes are materialized. With group by.
|
|
select a, c, e1, e2, e3, gby2 from
|
|
(select count(distinct tinyint_col) a, avg(distinct smallint_col) b,
|
|
count(distinct int_col) c, avg(distinct bigint_col) d,
|
|
min(float_col) e1, max(float_col) e2, sum(double_col) e3,
|
|
string_col gby1, date_string_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|
|
| group by: CASE valid_tid(2,7,11) WHEN 2 THEN string_col WHEN 7 THEN string_col WHEN 11 THEN string_col END, CASE valid_tid(2,7,11) WHEN 2 THEN date_string_col WHEN 7 THEN date_string_col WHEN 11 THEN date_string_col END
|
|
| row-size=56B cardinality=8.11K
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 1
|
|
| output: count(int_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=131B cardinality=22.08K
|
|
|
|
|
01:AGGREGATE
|
|
| Class 0
|
|
| group by: string_col, date_string_col, tinyint_col
|
|
| Class 1
|
|
| group by: string_col, date_string_col, int_col
|
|
| Class 2
|
|
| output: min(float_col), max(float_col), sum(double_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=120B cardinality=21.90K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=50B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|
|
| group by: CASE valid_tid(2,7,11) WHEN 2 THEN string_col WHEN 7 THEN string_col WHEN 11 THEN string_col END, CASE valid_tid(2,7,11) WHEN 2 THEN date_string_col WHEN 7 THEN date_string_col WHEN 11 THEN date_string_col END
|
|
| row-size=56B cardinality=8.11K
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count:merge(tinyint_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 1
|
|
| output: count:merge(int_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=131B cardinality=22.08K
|
|
|
|
|
06:EXCHANGE [HASH(CASE valid_tid(2,7,11) WHEN 2 THEN murmur_hash(string_col) WHEN 7 THEN murmur_hash(string_col) WHEN 11 THEN murmur_hash(string_col) END,CASE valid_tid(2,7,11) WHEN 2 THEN murmur_hash(date_string_col) WHEN 7 THEN murmur_hash(date_string_col) WHEN 11 THEN murmur_hash(date_string_col) END)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 1
|
|
| output: count(int_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=131B cardinality=22.08K
|
|
|
|
|
05:AGGREGATE
|
|
| Class 0
|
|
| group by: string_col, date_string_col, tinyint_col
|
|
| Class 1
|
|
| group by: string_col, date_string_col, int_col
|
|
| Class 2
|
|
| output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=120B cardinality=21.90K
|
|
|
|
|
04:EXCHANGE [HASH(CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(string_col) WHEN 6 THEN murmur_hash(string_col) WHEN 11 THEN murmur_hash(string_col) END,CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(date_string_col) WHEN 6 THEN murmur_hash(date_string_col) WHEN 11 THEN murmur_hash(date_string_col) END,CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 6 THEN murmur_hash(int_col) WHEN 11 THEN 0 END)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| group by: string_col, date_string_col, tinyint_col
|
|
| Class 1
|
|
| group by: string_col, date_string_col, int_col
|
|
| Class 2
|
|
| output: min(float_col), max(float_col), sum(double_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=120B cardinality=21.90K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=50B cardinality=7.30K
|
|
====
|
|
# A subset of aggregation classes are materialized. A subset of aggregation functions
|
|
# of the surviving classes are materialized. With group by.
|
|
select a1, c2, e2, gby1 from
|
|
(select count(distinct tinyint_col) a1, avg(distinct tinyint_col) a2,
|
|
count(distinct smallint_col) b1, avg(distinct smallint_col) b2,
|
|
count(distinct int_col) c1, avg(distinct int_col) c2,
|
|
count(distinct bigint_col) d1, avg(distinct bigint_col) d2,
|
|
min(float_col) e1, max(float_col) e2, sum(double_col) e3,
|
|
string_col gby1, date_string_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|
|
| group by: CASE valid_tid(3,9,13) WHEN 3 THEN string_col WHEN 9 THEN string_col WHEN 13 THEN string_col END, CASE valid_tid(3,9,13) WHEN 3 THEN date_string_col WHEN 9 THEN date_string_col WHEN 13 THEN date_string_col END
|
|
| row-size=44B cardinality=8.11K
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 1
|
|
| output: avg(int_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=119B cardinality=22.08K
|
|
|
|
|
01:AGGREGATE
|
|
| Class 0
|
|
| group by: string_col, date_string_col, tinyint_col
|
|
| Class 1
|
|
| group by: string_col, date_string_col, int_col
|
|
| Class 2
|
|
| output: max(float_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=108B cardinality=21.90K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=42B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|
|
| group by: CASE valid_tid(3,9,13) WHEN 3 THEN string_col WHEN 9 THEN string_col WHEN 13 THEN string_col END, CASE valid_tid(3,9,13) WHEN 3 THEN date_string_col WHEN 9 THEN date_string_col WHEN 13 THEN date_string_col END
|
|
| row-size=44B cardinality=8.11K
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count:merge(tinyint_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 1
|
|
| output: avg:merge(int_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=119B cardinality=22.08K
|
|
|
|
|
06:EXCHANGE [HASH(CASE valid_tid(2,8,13) WHEN 2 THEN murmur_hash(string_col) WHEN 8 THEN murmur_hash(string_col) WHEN 13 THEN murmur_hash(string_col) END,CASE valid_tid(2,8,13) WHEN 2 THEN murmur_hash(date_string_col) WHEN 8 THEN murmur_hash(date_string_col) WHEN 13 THEN murmur_hash(date_string_col) END)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 1
|
|
| output: avg(int_col)
|
|
| group by: string_col, date_string_col
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=119B cardinality=22.08K
|
|
|
|
|
05:AGGREGATE
|
|
| Class 0
|
|
| group by: string_col, date_string_col, tinyint_col
|
|
| Class 1
|
|
| group by: string_col, date_string_col, int_col
|
|
| Class 2
|
|
| output: max:merge(float_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=108B cardinality=21.90K
|
|
|
|
|
04:EXCHANGE [HASH(CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(string_col) WHEN 7 THEN murmur_hash(string_col) WHEN 13 THEN murmur_hash(string_col) END,CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(date_string_col) WHEN 7 THEN murmur_hash(date_string_col) WHEN 13 THEN murmur_hash(date_string_col) END,CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 7 THEN murmur_hash(int_col) WHEN 13 THEN 0 END)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| group by: string_col, date_string_col, tinyint_col
|
|
| Class 1
|
|
| group by: string_col, date_string_col, int_col
|
|
| Class 2
|
|
| output: max(float_col)
|
|
| group by: string_col, date_string_col
|
|
| row-size=108B cardinality=21.90K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=42B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggrgeation class. Only first distinct agg is materialized.
|
|
select a from
|
|
(select count(distinct tinyint_col) a, count(distinct smallint_col) b
|
|
from functional.alltypes) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(tinyint_col)
|
|
| row-size=8B cardinality=1
|
|
|
|
|
01:AGGREGATE
|
|
| group by: tinyint_col
|
|
| row-size=1B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=1B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(tinyint_col)
|
|
| row-size=8B cardinality=1
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| output: count(tinyint_col)
|
|
| row-size=8B cardinality=1
|
|
|
|
|
05:AGGREGATE
|
|
| group by: tinyint_col
|
|
| row-size=1B cardinality=10
|
|
|
|
|
04:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| group by: tinyint_col
|
|
| row-size=1B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=1B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggrgeation class. Only second distinct agg is materialized.
|
|
select b from
|
|
(select count(distinct tinyint_col) a, count(distinct smallint_col) b
|
|
from functional.alltypes) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(smallint_col)
|
|
| row-size=8B cardinality=1
|
|
|
|
|
01:AGGREGATE
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=2B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(smallint_col)
|
|
| row-size=8B cardinality=1
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| row-size=8B cardinality=1
|
|
|
|
|
05:AGGREGATE
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
04:EXCHANGE [HASH(smallint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=2B cardinality=7.30K
|
|
====
|
|
# Some aggs only referenced in HAVING clause.
|
|
select count(distinct tinyint_col), min(timestamp_col) from functional.alltypes
|
|
having count(distinct smallint_col) < 10 and max(date_string_col) = 'test'
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|
|
| having: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) < 10, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) = 'test'
|
|
| row-size=44B cardinality=0
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| Class 1
|
|
| output: count(smallint_col)
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| row-size=44B cardinality=3
|
|
|
|
|
01:AGGREGATE
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: smallint_col
|
|
| Class 2
|
|
| output: min(timestamp_col), max(date_string_col)
|
|
| row-size=31B cardinality=21
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=39B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|
|
| having: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) < 10, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) = 'test'
|
|
| row-size=44B cardinality=0
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count:merge(tinyint_col)
|
|
| Class 1
|
|
| output: count:merge(smallint_col)
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| row-size=44B cardinality=3
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| Class 1
|
|
| output: count(smallint_col)
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| row-size=44B cardinality=3
|
|
|
|
|
05:AGGREGATE
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: smallint_col
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| row-size=31B cardinality=21
|
|
|
|
|
04:EXCHANGE [HASH(CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 3 THEN murmur_hash(smallint_col) WHEN 5 THEN 0 END)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| group by: tinyint_col
|
|
| Class 1
|
|
| group by: smallint_col
|
|
| Class 2
|
|
| output: min(timestamp_col), max(date_string_col)
|
|
| row-size=31B cardinality=21
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=39B cardinality=7.30K
|
|
====
|
|
# Some aggs only referenced in ORDER BY clause.
|
|
select count(distinct tinyint_col), min(timestamp_col) from functional.alltypes
|
|
group by bigint_col
|
|
order by count(distinct smallint_col), max(date_string_col)
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:SORT
|
|
| order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
|
|
| row-size=44B cardinality=11
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|
|
| group by: CASE valid_tid(2,4,5) WHEN 2 THEN bigint_col WHEN 4 THEN bigint_col WHEN 5 THEN bigint_col END
|
|
| row-size=52B cardinality=11
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| group by: bigint_col
|
|
| Class 1
|
|
| output: count(smallint_col)
|
|
| group by: bigint_col
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| group by: bigint_col
|
|
| row-size=68B cardinality=30
|
|
|
|
|
01:AGGREGATE
|
|
| Class 0
|
|
| group by: bigint_col, tinyint_col
|
|
| Class 1
|
|
| group by: bigint_col, smallint_col
|
|
| Class 2
|
|
| output: min(timestamp_col), max(date_string_col)
|
|
| group by: bigint_col
|
|
| row-size=55B cardinality=210
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=47B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
09:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
|
|
|
|
|
04:SORT
|
|
| order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
|
|
| row-size=44B cardinality=11
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|
|
| group by: CASE valid_tid(2,4,5) WHEN 2 THEN bigint_col WHEN 4 THEN bigint_col WHEN 5 THEN bigint_col END
|
|
| row-size=52B cardinality=11
|
|
|
|
|
08:AGGREGATE [FINALIZE]
|
|
| Class 0
|
|
| output: count:merge(tinyint_col)
|
|
| group by: bigint_col
|
|
| Class 1
|
|
| output: count:merge(smallint_col)
|
|
| group by: bigint_col
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| group by: bigint_col
|
|
| row-size=68B cardinality=30
|
|
|
|
|
07:EXCHANGE [HASH(CASE valid_tid(2,4,5) WHEN 2 THEN murmur_hash(bigint_col) WHEN 4 THEN murmur_hash(bigint_col) WHEN 5 THEN murmur_hash(bigint_col) END)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| output: count(tinyint_col)
|
|
| group by: bigint_col
|
|
| Class 1
|
|
| output: count(smallint_col)
|
|
| group by: bigint_col
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| group by: bigint_col
|
|
| row-size=68B cardinality=30
|
|
|
|
|
06:AGGREGATE
|
|
| Class 0
|
|
| group by: bigint_col, tinyint_col
|
|
| Class 1
|
|
| group by: bigint_col, smallint_col
|
|
| Class 2
|
|
| output: min:merge(timestamp_col), max:merge(date_string_col)
|
|
| group by: bigint_col
|
|
| row-size=55B cardinality=210
|
|
|
|
|
05:EXCHANGE [HASH(CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(bigint_col) WHEN 3 THEN murmur_hash(bigint_col) WHEN 5 THEN murmur_hash(bigint_col) END,CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 3 THEN murmur_hash(smallint_col) WHEN 5 THEN 0 END)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| Class 0
|
|
| group by: bigint_col, tinyint_col
|
|
| Class 1
|
|
| group by: bigint_col, smallint_col
|
|
| Class 2
|
|
| output: min(timestamp_col), max(date_string_col)
|
|
| group by: bigint_col
|
|
| row-size=55B cardinality=210
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=47B cardinality=7.30K
|
|
====
|
|
# Mixed distinct and non-distinct aggs. No materialized aggregations. No group by.
|
|
select 1 from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d
|
|
from functional.alltypes) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| row-size=0B cardinality=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
partition key scan
|
|
row-size=0B cardinality=24
|
|
====
|
|
# Mixed distinct and non-distinct aggs. No materialized aggregations. With group by.
|
|
select v.gby1 from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d,
|
|
string_col gby1, date_string_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| group by: string_col, date_string_col
|
|
| row-size=33B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=33B cardinality=7.30K
|
|
====
|
|
# Only distinct aggs. No materialized aggregations. No group by.
|
|
select 1 from
|
|
(select count(distinct tinyint_col) a, count(distinct smallint_col) b
|
|
from functional.alltypes) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| row-size=0B cardinality=1
|
|
|
|
|
01:AGGREGATE
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=2B cardinality=7.30K
|
|
====
|
|
# Only distinct aggs. No materialized aggregations. With group by.
|
|
select v.gby2 from
|
|
(select count(distinct tinyint_col) a, count(distinct smallint_col) b,
|
|
string_col gby1, date_string_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| group by: string_col, date_string_col
|
|
| row-size=33B cardinality=7.30K
|
|
|
|
|
01:AGGREGATE
|
|
| group by: string_col, date_string_col, smallint_col
|
|
| row-size=35B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=35B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggregation class. Only first distinct agg is materialized.
|
|
# No group by.
|
|
select b from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d
|
|
from functional.alltypes
|
|
having count(distinct tinyint_col) < 9) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(tinyint_col)
|
|
| having: count(tinyint_col) < 9
|
|
| row-size=8B cardinality=0
|
|
|
|
|
01:AGGREGATE
|
|
| group by: tinyint_col
|
|
| row-size=1B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=1B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(tinyint_col)
|
|
| having: count(tinyint_col) < 9
|
|
| row-size=8B cardinality=1
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| output: count(tinyint_col)
|
|
| row-size=8B cardinality=0
|
|
|
|
|
05:AGGREGATE
|
|
| group by: tinyint_col
|
|
| row-size=1B cardinality=10
|
|
|
|
|
04:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| group by: tinyint_col
|
|
| row-size=1B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=1B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggregation class. Only second distinct agg is materialized.
|
|
# No group by.
|
|
select d from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d
|
|
from functional.alltypes
|
|
having count(distinct smallint_col) < 9) v
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(smallint_col)
|
|
| having: count(smallint_col) < 9
|
|
| row-size=8B cardinality=0
|
|
|
|
|
01:AGGREGATE
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=2B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(smallint_col)
|
|
| having: count(smallint_col) < 9
|
|
| row-size=8B cardinality=1
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| output: count(smallint_col)
|
|
| row-size=8B cardinality=0
|
|
|
|
|
05:AGGREGATE
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
04:EXCHANGE [HASH(smallint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| group by: smallint_col
|
|
| row-size=2B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=2B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggregation class. Only non-distinct aggs remain.
|
|
# No group by.
|
|
select a, c from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d
|
|
from functional.alltypes
|
|
having min(string_col) < '9') v
|
|
where c > '0' and c < a
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: min(string_col), max(string_col)
|
|
| having: max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|
|
| row-size=24B cardinality=0
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=13B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: min:merge(string_col), max:merge(string_col)
|
|
| having: max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|
|
| row-size=24B cardinality=0
|
|
|
|
|
03:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
01:AGGREGATE
|
|
| output: min(string_col), max(string_col)
|
|
| row-size=24B cardinality=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=13B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggregation class with distinct and non-distinct aggs.
|
|
# No group by.
|
|
select a, b, c from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d
|
|
from functional.alltypes
|
|
having min(string_col) < '9' and count(distinct tinyint_col) = 10) v
|
|
where c > '0' and c < a
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|
|
| having: count(tinyint_col) = 10, max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|
|
| row-size=32B cardinality=0
|
|
|
|
|
01:AGGREGATE
|
|
| output: min(string_col), max(string_col)
|
|
| group by: tinyint_col
|
|
| row-size=25B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=14B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(tinyint_col), min:merge(string_col), max:merge(string_col)
|
|
| having: count(tinyint_col) = 10, max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|
|
| row-size=32B cardinality=1
|
|
|
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:AGGREGATE
|
|
| output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|
|
| row-size=32B cardinality=0
|
|
|
|
|
05:AGGREGATE
|
|
| output: min:merge(string_col), max:merge(string_col)
|
|
| group by: tinyint_col
|
|
| row-size=25B cardinality=10
|
|
|
|
|
04:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| output: min(string_col), max(string_col)
|
|
| group by: tinyint_col
|
|
| row-size=25B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=14B cardinality=7.30K
|
|
====
|
|
# Simplifies to a single aggregation class with one distinct agg.
|
|
# With group by.
|
|
select b, gby1 from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d,
|
|
date_string_col gby1, timestamp_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2
|
|
having count(distinct tinyint_col) < 10) v
|
|
where gby1 = 'test1'
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(tinyint_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| having: count(tinyint_col) < 10
|
|
| row-size=44B cardinality=1
|
|
|
|
|
01:AGGREGATE
|
|
| group by: date_string_col, timestamp_col, tinyint_col
|
|
| row-size=37B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.date_string_col = 'test1'
|
|
row-size=37B cardinality=10
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(tinyint_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| having: count(tinyint_col) < 10
|
|
| row-size=44B cardinality=10
|
|
|
|
|
06:EXCHANGE [HASH(date_string_col,timestamp_col)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| output: count(tinyint_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| row-size=44B cardinality=1
|
|
|
|
|
05:AGGREGATE
|
|
| group by: date_string_col, timestamp_col, tinyint_col
|
|
| row-size=37B cardinality=10
|
|
|
|
|
04:EXCHANGE [HASH(date_string_col,timestamp_col,tinyint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| group by: date_string_col, timestamp_col, tinyint_col
|
|
| row-size=37B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.date_string_col = 'test1'
|
|
row-size=37B cardinality=10
|
|
====
|
|
# Simplifies to a single aggregation class with two non-distinct aggs.
|
|
# With group by.
|
|
select a, c, gby1, gby2 from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d,
|
|
date_string_col gby1, timestamp_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2
|
|
having count(distinct tinyint_col) < 10) v
|
|
where gby1 = 'test1' and gby1 < gby2
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| having: count(tinyint_col) < 10, date_string_col < timestamp_col
|
|
| row-size=68B cardinality=0
|
|
|
|
|
01:AGGREGATE
|
|
| output: min(string_col), max(string_col)
|
|
| group by: date_string_col, timestamp_col, tinyint_col
|
|
| row-size=61B cardinality=3
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
|
|
row-size=50B cardinality=3
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(tinyint_col), min:merge(string_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| having: count(tinyint_col) < 10, date_string_col < timestamp_col
|
|
| row-size=68B cardinality=3
|
|
|
|
|
06:EXCHANGE [HASH(date_string_col,timestamp_col)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| row-size=68B cardinality=0
|
|
|
|
|
05:AGGREGATE
|
|
| output: min:merge(string_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col, tinyint_col
|
|
| row-size=61B cardinality=3
|
|
|
|
|
04:EXCHANGE [HASH(date_string_col,timestamp_col,tinyint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| output: min(string_col), max(string_col)
|
|
| group by: date_string_col, timestamp_col, tinyint_col
|
|
| row-size=61B cardinality=3
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
|
|
row-size=50B cardinality=3
|
|
====
|
|
# Simplifies to a single aggregation class with one distinct and one non-distinct agg.
|
|
# With group by.
|
|
select c, d, gby1, gby2 from
|
|
(select min(string_col) a, count(distinct tinyint_col) b,
|
|
max(string_col) c, count(distinct smallint_col) d,
|
|
date_string_col gby1, timestamp_col gby2
|
|
from functional.alltypes
|
|
group by gby1, gby2
|
|
having count(distinct smallint_col) < 20) v
|
|
where gby1 = 'test1' and gby1 < gby2
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(smallint_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| having: count(smallint_col) < 20, date_string_col < timestamp_col
|
|
| row-size=56B cardinality=0
|
|
|
|
|
01:AGGREGATE
|
|
| output: max(string_col)
|
|
| group by: date_string_col, timestamp_col, smallint_col
|
|
| row-size=50B cardinality=3
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
|
|
row-size=51B cardinality=3
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
07:AGGREGATE [FINALIZE]
|
|
| output: count:merge(smallint_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| having: count(smallint_col) < 20, date_string_col < timestamp_col
|
|
| row-size=56B cardinality=3
|
|
|
|
|
06:EXCHANGE [HASH(date_string_col,timestamp_col)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| output: count(smallint_col), max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col
|
|
| row-size=56B cardinality=0
|
|
|
|
|
05:AGGREGATE
|
|
| output: max:merge(string_col)
|
|
| group by: date_string_col, timestamp_col, smallint_col
|
|
| row-size=50B cardinality=3
|
|
|
|
|
04:EXCHANGE [HASH(date_string_col,timestamp_col,smallint_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| output: max(string_col)
|
|
| group by: date_string_col, timestamp_col, smallint_col
|
|
| row-size=50B cardinality=3
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
|
|
row-size=51B cardinality=3
|
|
====
|