impala/testdata/workloads/functional-planner/queries/PlannerTest/multiple-distinct-materialization.test

# A subset of aggregation classes are materialized. No group by.
select a, c, e1, e2, e3 from
  (select count(distinct tinyint_col) a, avg(distinct smallint_col) b,
          count(distinct int_col) c, avg(distinct bigint_col) d,
          min(float_col) e1, max(float_col) e2, sum(double_col) e3
   from functional.alltypes) v
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|  row-size=32B cardinality=1
|
02:AGGREGATE [FINALIZE]
|  Class 0
|    output: count(tinyint_col)
|  Class 1
|    output: count(int_col)
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|  row-size=32B cardinality=3
|
01:AGGREGATE
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: int_col
|  Class 2
|    output: min(float_col), max(float_col), sum(double_col)
|  row-size=21B cardinality=21
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|  row-size=32B cardinality=1
|
07:AGGREGATE [FINALIZE]
|  Class 0
|    output: count:merge(tinyint_col)
|  Class 1
|    output: count:merge(int_col)
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|  row-size=32B cardinality=3
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  Class 0
|    output: count(tinyint_col)
|  Class 1
|    output: count(int_col)
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|  row-size=32B cardinality=3
|
05:AGGREGATE
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: int_col
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|  row-size=21B cardinality=21
|
04:EXCHANGE [HASH(CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 6 THEN murmur_hash(int_col) WHEN 11 THEN 0 END)]
|
01:AGGREGATE [STREAMING]
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: int_col
|  Class 2
|    output: min(float_col), max(float_col), sum(double_col)
|  row-size=21B cardinality=21
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=17B cardinality=7.30K
====
# A subset of aggregation classes are materialized. A subset of aggregation functions
# of the surviving classes are materialized. No group by.
select a1, c2, e2 from
  (select count(distinct tinyint_col) a1, avg(distinct tinyint_col) a2,
          count(distinct smallint_col) b1, avg(distinct smallint_col) b2,
          count(distinct int_col) c1, avg(distinct int_col) c2,
          count(distinct bigint_col) d1, avg(distinct bigint_col) d2,
          min(float_col) e1, max(float_col) e2, sum(double_col) e3
   from functional.alltypes) v
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|  row-size=20B cardinality=1
|
02:AGGREGATE [FINALIZE]
|  Class 0
|    output: count(tinyint_col)
|  Class 1
|    output: avg(int_col)
|  Class 2
|    output: max:merge(float_col)
|  row-size=20B cardinality=3
|
01:AGGREGATE
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: int_col
|  Class 2
|    output: max(float_col)
|  row-size=9B cardinality=21
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=9B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|  row-size=20B cardinality=1
|
07:AGGREGATE [FINALIZE]
|  Class 0
|    output: count:merge(tinyint_col)
|  Class 1
|    output: avg:merge(int_col)
|  Class 2
|    output: max:merge(float_col)
|  row-size=20B cardinality=3
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  Class 0
|    output: count(tinyint_col)
|  Class 1
|    output: avg(int_col)
|  Class 2
|    output: max:merge(float_col)
|  row-size=20B cardinality=3
|
05:AGGREGATE
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: int_col
|  Class 2
|    output: max:merge(float_col)
|  row-size=9B cardinality=21
|
04:EXCHANGE [HASH(CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 7 THEN murmur_hash(int_col) WHEN 13 THEN 0 END)]
|
01:AGGREGATE [STREAMING]
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: int_col
|  Class 2
|    output: max(float_col)
|  row-size=9B cardinality=21
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=9B cardinality=7.30K
====
# A subset of aggregation classes are materialized. With group by.
select a, c, e1, e2, e3, gby2 from
  (select count(distinct tinyint_col) a, avg(distinct smallint_col) b,
          count(distinct int_col) c, avg(distinct bigint_col) d,
          min(float_col) e1, max(float_col) e2, sum(double_col) e3,
          string_col gby1, date_string_col gby2
   from functional.alltypes
   group by gby1, gby2) v
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|  group by: CASE valid_tid(2,7,11) WHEN 2 THEN string_col WHEN 7 THEN string_col WHEN 11 THEN string_col END, CASE valid_tid(2,7,11) WHEN 2 THEN date_string_col WHEN 7 THEN date_string_col WHEN 11 THEN date_string_col END
|  row-size=56B cardinality=8.11K
|
02:AGGREGATE [FINALIZE]
|  Class 0
|    output: count(tinyint_col)
|    group by: string_col, date_string_col
|  Class 1
|    output: count(int_col)
|    group by: string_col, date_string_col
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|    group by: string_col, date_string_col
|  row-size=131B cardinality=22.08K
|
01:AGGREGATE
|  Class 0
|    group by: string_col, date_string_col, tinyint_col
|  Class 1
|    group by: string_col, date_string_col, int_col
|  Class 2
|    output: min(float_col), max(float_col), sum(double_col)
|    group by: string_col, date_string_col
|  row-size=120B cardinality=21.90K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=50B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
|  group by: CASE valid_tid(2,7,11) WHEN 2 THEN string_col WHEN 7 THEN string_col WHEN 11 THEN string_col END, CASE valid_tid(2,7,11) WHEN 2 THEN date_string_col WHEN 7 THEN date_string_col WHEN 11 THEN date_string_col END
|  row-size=56B cardinality=8.11K
|
07:AGGREGATE [FINALIZE]
|  Class 0
|    output: count:merge(tinyint_col)
|    group by: string_col, date_string_col
|  Class 1
|    output: count:merge(int_col)
|    group by: string_col, date_string_col
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|    group by: string_col, date_string_col
|  row-size=131B cardinality=22.08K
|
06:EXCHANGE [HASH(CASE valid_tid(2,7,11) WHEN 2 THEN murmur_hash(string_col) WHEN 7 THEN murmur_hash(string_col) WHEN 11 THEN murmur_hash(string_col) END,CASE valid_tid(2,7,11) WHEN 2 THEN murmur_hash(date_string_col) WHEN 7 THEN murmur_hash(date_string_col) WHEN 11 THEN murmur_hash(date_string_col) END)]
|
02:AGGREGATE [STREAMING]
|  Class 0
|    output: count(tinyint_col)
|    group by: string_col, date_string_col
|  Class 1
|    output: count(int_col)
|    group by: string_col, date_string_col
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|    group by: string_col, date_string_col
|  row-size=131B cardinality=22.08K
|
05:AGGREGATE
|  Class 0
|    group by: string_col, date_string_col, tinyint_col
|  Class 1
|    group by: string_col, date_string_col, int_col
|  Class 2
|    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
|    group by: string_col, date_string_col
|  row-size=120B cardinality=21.90K
|
04:EXCHANGE [HASH(CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(string_col) WHEN 6 THEN murmur_hash(string_col) WHEN 11 THEN murmur_hash(string_col) END,CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(date_string_col) WHEN 6 THEN murmur_hash(date_string_col) WHEN 11 THEN murmur_hash(date_string_col) END,CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 6 THEN murmur_hash(int_col) WHEN 11 THEN 0 END)]
|
01:AGGREGATE [STREAMING]
|  Class 0
|    group by: string_col, date_string_col, tinyint_col
|  Class 1
|    group by: string_col, date_string_col, int_col
|  Class 2
|    output: min(float_col), max(float_col), sum(double_col)
|    group by: string_col, date_string_col
|  row-size=120B cardinality=21.90K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=50B cardinality=7.30K
====
# A subset of aggregation classes are materialized. A subset of aggregation functions
# of the surviving classes are materialized. With group by.
select a1, c2, e2, gby1 from
  (select count(distinct tinyint_col) a1, avg(distinct tinyint_col) a2,
          count(distinct smallint_col) b1, avg(distinct smallint_col) b2,
          count(distinct int_col) c1, avg(distinct int_col) c2,
          count(distinct bigint_col) d1, avg(distinct bigint_col) d2,
          min(float_col) e1, max(float_col) e2, sum(double_col) e3,
          string_col gby1, date_string_col gby2
   from functional.alltypes
   group by gby1, gby2) v
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|  group by: CASE valid_tid(3,9,13) WHEN 3 THEN string_col WHEN 9 THEN string_col WHEN 13 THEN string_col END, CASE valid_tid(3,9,13) WHEN 3 THEN date_string_col WHEN 9 THEN date_string_col WHEN 13 THEN date_string_col END
|  row-size=44B cardinality=8.11K
|
02:AGGREGATE [FINALIZE]
|  Class 0
|    output: count(tinyint_col)
|    group by: string_col, date_string_col
|  Class 1
|    output: avg(int_col)
|    group by: string_col, date_string_col
|  Class 2
|    output: max:merge(float_col)
|    group by: string_col, date_string_col
|  row-size=119B cardinality=22.08K
|
01:AGGREGATE
|  Class 0
|    group by: string_col, date_string_col, tinyint_col
|  Class 1
|    group by: string_col, date_string_col, int_col
|  Class 2
|    output: max(float_col)
|    group by: string_col, date_string_col
|  row-size=108B cardinality=21.90K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=42B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
|  group by: CASE valid_tid(3,9,13) WHEN 3 THEN string_col WHEN 9 THEN string_col WHEN 13 THEN string_col END, CASE valid_tid(3,9,13) WHEN 3 THEN date_string_col WHEN 9 THEN date_string_col WHEN 13 THEN date_string_col END
|  row-size=44B cardinality=8.11K
|
07:AGGREGATE [FINALIZE]
|  Class 0
|    output: count:merge(tinyint_col)
|    group by: string_col, date_string_col
|  Class 1
|    output: avg:merge(int_col)
|    group by: string_col, date_string_col
|  Class 2
|    output: max:merge(float_col)
|    group by: string_col, date_string_col
|  row-size=119B cardinality=22.08K
|
06:EXCHANGE [HASH(CASE valid_tid(2,8,13) WHEN 2 THEN murmur_hash(string_col) WHEN 8 THEN murmur_hash(string_col) WHEN 13 THEN murmur_hash(string_col) END,CASE valid_tid(2,8,13) WHEN 2 THEN murmur_hash(date_string_col) WHEN 8 THEN murmur_hash(date_string_col) WHEN 13 THEN murmur_hash(date_string_col) END)]
|
02:AGGREGATE [STREAMING]
|  Class 0
|    output: count(tinyint_col)
|    group by: string_col, date_string_col
|  Class 1
|    output: avg(int_col)
|    group by: string_col, date_string_col
|  Class 2
|    output: max:merge(float_col)
|    group by: string_col, date_string_col
|  row-size=119B cardinality=22.08K
|
05:AGGREGATE
|  Class 0
|    group by: string_col, date_string_col, tinyint_col
|  Class 1
|    group by: string_col, date_string_col, int_col
|  Class 2
|    output: max:merge(float_col)
|    group by: string_col, date_string_col
|  row-size=108B cardinality=21.90K
|
04:EXCHANGE [HASH(CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(string_col) WHEN 7 THEN murmur_hash(string_col) WHEN 13 THEN murmur_hash(string_col) END,CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(date_string_col) WHEN 7 THEN murmur_hash(date_string_col) WHEN 13 THEN murmur_hash(date_string_col) END,CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 7 THEN murmur_hash(int_col) WHEN 13 THEN 0 END)]
|
01:AGGREGATE [STREAMING]
|  Class 0
|    group by: string_col, date_string_col, tinyint_col
|  Class 1
|    group by: string_col, date_string_col, int_col
|  Class 2
|    output: max(float_col)
|    group by: string_col, date_string_col
|  row-size=108B cardinality=21.90K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=42B cardinality=7.30K
====
# Simplifies to a single aggrgeation class. Only first distinct agg is materialized.
select a from
  (select count(distinct tinyint_col) a, count(distinct smallint_col) b
   from functional.alltypes) v
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(tinyint_col)
|  row-size=8B cardinality=1
|
01:AGGREGATE
|  group by: tinyint_col
|  row-size=1B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=1B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(tinyint_col)
|  row-size=8B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  output: count(tinyint_col)
|  row-size=8B cardinality=1
|
05:AGGREGATE
|  group by: tinyint_col
|  row-size=1B cardinality=10
|
04:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
|  group by: tinyint_col
|  row-size=1B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=1B cardinality=7.30K
====
# Simplifies to a single aggrgeation class. Only second distinct agg is materialized.
select b from
  (select count(distinct tinyint_col) a, count(distinct smallint_col) b
   from functional.alltypes) v
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(smallint_col)
|  row-size=8B cardinality=1
|
01:AGGREGATE
|  group by: smallint_col
|  row-size=2B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=2B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(smallint_col)
|  row-size=8B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  output: count(smallint_col)
|  row-size=8B cardinality=1
|
05:AGGREGATE
|  group by: smallint_col
|  row-size=2B cardinality=10
|
04:EXCHANGE [HASH(smallint_col)]
|
01:AGGREGATE [STREAMING]
|  group by: smallint_col
|  row-size=2B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=2B cardinality=7.30K
====
# Some aggs only referenced in HAVING clause.
select count(distinct tinyint_col), min(timestamp_col) from functional.alltypes
having count(distinct smallint_col) < 10 and max(date_string_col) = 'test'
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|  having: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) < 10, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) = 'test'
|  row-size=44B cardinality=0
|
02:AGGREGATE [FINALIZE]
|  Class 0
|    output: count(tinyint_col)
|  Class 1
|    output: count(smallint_col)
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|  row-size=44B cardinality=3
|
01:AGGREGATE
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: smallint_col
|  Class 2
|    output: min(timestamp_col), max(date_string_col)
|  row-size=31B cardinality=21
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=39B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|  having: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) < 10, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) = 'test'
|  row-size=44B cardinality=0
|
07:AGGREGATE [FINALIZE]
|  Class 0
|    output: count:merge(tinyint_col)
|  Class 1
|    output: count:merge(smallint_col)
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|  row-size=44B cardinality=3
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  Class 0
|    output: count(tinyint_col)
|  Class 1
|    output: count(smallint_col)
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|  row-size=44B cardinality=3
|
05:AGGREGATE
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: smallint_col
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|  row-size=31B cardinality=21
|
04:EXCHANGE [HASH(CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 3 THEN murmur_hash(smallint_col) WHEN 5 THEN 0 END)]
|
01:AGGREGATE [STREAMING]
|  Class 0
|    group by: tinyint_col
|  Class 1
|    group by: smallint_col
|  Class 2
|    output: min(timestamp_col), max(date_string_col)
|  row-size=31B cardinality=21
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=39B cardinality=7.30K
====
# Some aggs only referenced in ORDER BY clause.
select count(distinct tinyint_col), min(timestamp_col) from functional.alltypes
group by bigint_col
order by count(distinct smallint_col), max(date_string_col)
---- PLAN
PLAN-ROOT SINK
|
04:SORT
|  order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
|  row-size=44B cardinality=11
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|  group by: CASE valid_tid(2,4,5) WHEN 2 THEN bigint_col WHEN 4 THEN bigint_col WHEN 5 THEN bigint_col END
|  row-size=52B cardinality=11
|
02:AGGREGATE [FINALIZE]
|  Class 0
|    output: count(tinyint_col)
|    group by: bigint_col
|  Class 1
|    output: count(smallint_col)
|    group by: bigint_col
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|    group by: bigint_col
|  row-size=68B cardinality=30
|
01:AGGREGATE
|  Class 0
|    group by: bigint_col, tinyint_col
|  Class 1
|    group by: bigint_col, smallint_col
|  Class 2
|    output: min(timestamp_col), max(date_string_col)
|    group by: bigint_col
|  row-size=55B cardinality=210
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=47B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:MERGING-EXCHANGE [UNPARTITIONED]
|  order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
|
04:SORT
|  order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
|  row-size=44B cardinality=11
|
03:AGGREGATE [FINALIZE]
|  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
|  group by: CASE valid_tid(2,4,5) WHEN 2 THEN bigint_col WHEN 4 THEN bigint_col WHEN 5 THEN bigint_col END
|  row-size=52B cardinality=11
|
08:AGGREGATE [FINALIZE]
|  Class 0
|    output: count:merge(tinyint_col)
|    group by: bigint_col
|  Class 1
|    output: count:merge(smallint_col)
|    group by: bigint_col
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|    group by: bigint_col
|  row-size=68B cardinality=30
|
07:EXCHANGE [HASH(CASE valid_tid(2,4,5) WHEN 2 THEN murmur_hash(bigint_col) WHEN 4 THEN murmur_hash(bigint_col) WHEN 5 THEN murmur_hash(bigint_col) END)]
|
02:AGGREGATE [STREAMING]
|  Class 0
|    output: count(tinyint_col)
|    group by: bigint_col
|  Class 1
|    output: count(smallint_col)
|    group by: bigint_col
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|    group by: bigint_col
|  row-size=68B cardinality=30
|
06:AGGREGATE
|  Class 0
|    group by: bigint_col, tinyint_col
|  Class 1
|    group by: bigint_col, smallint_col
|  Class 2
|    output: min:merge(timestamp_col), max:merge(date_string_col)
|    group by: bigint_col
|  row-size=55B cardinality=210
|
05:EXCHANGE [HASH(CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(bigint_col) WHEN 3 THEN murmur_hash(bigint_col) WHEN 5 THEN murmur_hash(bigint_col) END,CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 3 THEN murmur_hash(smallint_col) WHEN 5 THEN 0 END)]
|
01:AGGREGATE [STREAMING]
|  Class 0
|    group by: bigint_col, tinyint_col
|  Class 1
|    group by: bigint_col, smallint_col
|  Class 2
|    output: min(timestamp_col), max(date_string_col)
|    group by: bigint_col
|  row-size=55B cardinality=210
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=47B cardinality=7.30K
====
# Mixed distinct and non-distinct aggs. No materialized aggregations. No group by.
select 1 from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d
   from functional.alltypes) v
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
|  row-size=0B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   partition key scan
   row-size=0B cardinality=24
====
# Mixed distinct and non-distinct aggs. No materialized aggregations. With group by.
select v.gby1 from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d,
          string_col gby1, date_string_col gby2
   from functional.alltypes
   group by gby1, gby2) v
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
|  group by: string_col, date_string_col
|  row-size=33B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=33B cardinality=7.30K
====
# Only distinct aggs. No materialized aggregations. No group by.
select 1 from
  (select count(distinct tinyint_col) a, count(distinct smallint_col) b
   from functional.alltypes) v
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  row-size=0B cardinality=1
|
01:AGGREGATE
|  group by: smallint_col
|  row-size=2B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=2B cardinality=7.30K
====
# Only distinct aggs. No materialized aggregations. With group by.
select v.gby2 from
  (select count(distinct tinyint_col) a, count(distinct smallint_col) b,
          string_col gby1, date_string_col gby2
   from functional.alltypes
   group by gby1, gby2) v
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  group by: string_col, date_string_col
|  row-size=33B cardinality=7.30K
|
01:AGGREGATE
|  group by: string_col, date_string_col, smallint_col
|  row-size=35B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=35B cardinality=7.30K
====
# Simplifies to a single aggregation class. Only first distinct agg is materialized.
# No group by.
select b from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d
   from functional.alltypes
   having count(distinct tinyint_col) < 9) v
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(tinyint_col)
|  having: count(tinyint_col) < 9
|  row-size=8B cardinality=0
|
01:AGGREGATE
|  group by: tinyint_col
|  row-size=1B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=1B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(tinyint_col)
|  having: count(tinyint_col) < 9
|  row-size=8B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  output: count(tinyint_col)
|  row-size=8B cardinality=0
|
05:AGGREGATE
|  group by: tinyint_col
|  row-size=1B cardinality=10
|
04:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
|  group by: tinyint_col
|  row-size=1B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=1B cardinality=7.30K
====
# Simplifies to a single aggregation class. Only second distinct agg is materialized.
# No group by.
select d from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d
   from functional.alltypes
   having count(distinct smallint_col) < 9) v
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(smallint_col)
|  having: count(smallint_col) < 9
|  row-size=8B cardinality=0
|
01:AGGREGATE
|  group by: smallint_col
|  row-size=2B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=2B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(smallint_col)
|  having: count(smallint_col) < 9
|  row-size=8B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  output: count(smallint_col)
|  row-size=8B cardinality=0
|
05:AGGREGATE
|  group by: smallint_col
|  row-size=2B cardinality=10
|
04:EXCHANGE [HASH(smallint_col)]
|
01:AGGREGATE [STREAMING]
|  group by: smallint_col
|  row-size=2B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=2B cardinality=7.30K
====
# Simplifies to a single aggregation class. Only non-distinct aggs remain.
# No group by.
select a, c from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d
   from functional.alltypes
   having min(string_col) < '9') v
where c > '0' and c < a
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
|  output: min(string_col), max(string_col)
|  having: max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|  row-size=24B cardinality=0
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=13B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
|  output: min:merge(string_col), max:merge(string_col)
|  having: max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|  row-size=24B cardinality=0
|
03:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
|  output: min(string_col), max(string_col)
|  row-size=24B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=13B cardinality=7.30K
====
# Simplifies to a single aggregation class with distinct and non-distinct aggs.
# No group by.
select a, b, c from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d
   from functional.alltypes
   having min(string_col) < '9' and count(distinct tinyint_col) = 10) v
where c > '0' and c < a
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|  having: count(tinyint_col) = 10, max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|  row-size=32B cardinality=0
|
01:AGGREGATE
|  output: min(string_col), max(string_col)
|  group by: tinyint_col
|  row-size=25B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=14B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(tinyint_col), min:merge(string_col), max:merge(string_col)
|  having: count(tinyint_col) = 10, max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
|  row-size=32B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
|  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|  row-size=32B cardinality=0
|
05:AGGREGATE
|  output: min:merge(string_col), max:merge(string_col)
|  group by: tinyint_col
|  row-size=25B cardinality=10
|
04:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
|  output: min(string_col), max(string_col)
|  group by: tinyint_col
|  row-size=25B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=14B cardinality=7.30K
====
# Simplifies to a single aggregation class with one distinct agg.
# With group by.
select b, gby1 from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d,
          date_string_col gby1, timestamp_col gby2
   from functional.alltypes
   group by gby1, gby2
   having count(distinct tinyint_col) < 10) v
where gby1 = 'test1'
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(tinyint_col)
|  group by: date_string_col, timestamp_col
|  having: count(tinyint_col) < 10
|  row-size=44B cardinality=1
|
01:AGGREGATE
|  group by: date_string_col, timestamp_col, tinyint_col
|  row-size=37B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   predicates: functional.alltypes.date_string_col = 'test1'
   row-size=37B cardinality=10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(tinyint_col)
|  group by: date_string_col, timestamp_col
|  having: count(tinyint_col) < 10
|  row-size=44B cardinality=10
|
06:EXCHANGE [HASH(date_string_col,timestamp_col)]
|
02:AGGREGATE [STREAMING]
|  output: count(tinyint_col)
|  group by: date_string_col, timestamp_col
|  row-size=44B cardinality=1
|
05:AGGREGATE
|  group by: date_string_col, timestamp_col, tinyint_col
|  row-size=37B cardinality=10
|
04:EXCHANGE [HASH(date_string_col,timestamp_col,tinyint_col)]
|
01:AGGREGATE [STREAMING]
|  group by: date_string_col, timestamp_col, tinyint_col
|  row-size=37B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   predicates: functional.alltypes.date_string_col = 'test1'
   row-size=37B cardinality=10
====
# Simplifies to a single aggregation class with two non-distinct aggs.
# With group by.
select a, c, gby1, gby2 from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d,
          date_string_col gby1, timestamp_col gby2
   from functional.alltypes
   group by gby1, gby2
   having count(distinct tinyint_col) < 10) v
where gby1 = 'test1' and gby1 < gby2
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col
|  having: count(tinyint_col) < 10, date_string_col < timestamp_col
|  row-size=68B cardinality=0
|
01:AGGREGATE
|  output: min(string_col), max(string_col)
|  group by: date_string_col, timestamp_col, tinyint_col
|  row-size=61B cardinality=3
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
   row-size=50B cardinality=3
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(tinyint_col), min:merge(string_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col
|  having: count(tinyint_col) < 10, date_string_col < timestamp_col
|  row-size=68B cardinality=3
|
06:EXCHANGE [HASH(date_string_col,timestamp_col)]
|
02:AGGREGATE [STREAMING]
|  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col
|  row-size=68B cardinality=0
|
05:AGGREGATE
|  output: min:merge(string_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col, tinyint_col
|  row-size=61B cardinality=3
|
04:EXCHANGE [HASH(date_string_col,timestamp_col,tinyint_col)]
|
01:AGGREGATE [STREAMING]
|  output: min(string_col), max(string_col)
|  group by: date_string_col, timestamp_col, tinyint_col
|  row-size=61B cardinality=3
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
   row-size=50B cardinality=3
====
# Simplifies to a single aggregation class with one distinct and one non-distinct agg.
# With group by.
select c, d, gby1, gby2 from
  (select min(string_col) a, count(distinct tinyint_col) b,
          max(string_col) c, count(distinct smallint_col) d,
          date_string_col gby1, timestamp_col gby2
   from functional.alltypes
   group by gby1, gby2
   having count(distinct smallint_col) < 20) v
where gby1 = 'test1' and gby1 < gby2
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
|  output: count(smallint_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col
|  having: count(smallint_col) < 20, date_string_col < timestamp_col
|  row-size=56B cardinality=0
|
01:AGGREGATE
|  output: max(string_col)
|  group by: date_string_col, timestamp_col, smallint_col
|  row-size=50B cardinality=3
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
   row-size=51B cardinality=3
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
07:AGGREGATE [FINALIZE]
|  output: count:merge(smallint_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col
|  having: count(smallint_col) < 20, date_string_col < timestamp_col
|  row-size=56B cardinality=3
|
06:EXCHANGE [HASH(date_string_col,timestamp_col)]
|
02:AGGREGATE [STREAMING]
|  output: count(smallint_col), max:merge(string_col)
|  group by: date_string_col, timestamp_col
|  row-size=56B cardinality=0
|
05:AGGREGATE
|  output: max:merge(string_col)
|  group by: date_string_col, timestamp_col, smallint_col
|  row-size=50B cardinality=3
|
04:EXCHANGE [HASH(date_string_col,timestamp_col,smallint_col)]
|
01:AGGREGATE [STREAMING]
|  output: max(string_col)
|  group by: date_string_col, timestamp_col, smallint_col
|  row-size=50B cardinality=3
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
   row-size=51B cardinality=3
====