mirror of
https://github.com/apache/impala.git
synced 2026-01-08 12:02:54 -05:00
385 lines
11 KiB
Plaintext
385 lines
11 KiB
Plaintext
# distinct *
|
|
select distinct *
|
|
from testtbl
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 4>, <slot 5>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (2)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 2
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
====
|
|
# distinct w/ explicit select list
|
|
select distinct id, zip
|
|
from testtbl
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: id, zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 2>, <slot 3>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (2)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 2
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: id, zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
====
|
|
# count(distinct)
|
|
select count(distinct id, zip)
|
|
from testtbl
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*)
|
|
GROUP BY:
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: id, zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*)
|
|
GROUP BY:
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 2>, <slot 3>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: id, zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
====
|
|
# count(distinct) w/ grouping
|
|
select tinyint_col, count(distinct int_col, bigint_col)
|
|
from alltypesagg
|
|
group by 1
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*)
|
|
GROUP BY: <slot 3>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: tinyint_col, int_col, bigint_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*)
|
|
GROUP BY: <slot 3>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 4>, <slot 5>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: tinyint_col, int_col, bigint_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|
|
# count(distinct) and sum(distinct) w/ grouping
|
|
select tinyint_col, count(distinct int_col), sum(distinct int_col)
|
|
from alltypesagg
|
|
group by 1
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(<slot 3>)
|
|
GROUP BY: <slot 2>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: tinyint_col, int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(<slot 3>)
|
|
GROUP BY: <slot 2>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 2>, <slot 3>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: tinyint_col, int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|
|
# count(distinct) and sum(distinct) w/ grouping; distinct in min() and max()
|
|
# is ignored
|
|
select tinyint_col, count(distinct int_col),
|
|
min(distinct smallint_col), max(distinct string_col)
|
|
from alltypesagg group by 1
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), MIN(<slot 6>), MAX(<slot 7>)
|
|
GROUP BY: <slot 4>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT: MIN(smallint_col), MAX(string_col)
|
|
GROUP BY: tinyint_col, int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), MIN(<slot 6>), MAX(<slot 7>)
|
|
GROUP BY: <slot 4>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT: MIN(<slot 6>), MAX(<slot 7>)
|
|
GROUP BY: <slot 4>, <slot 5>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: MIN(smallint_col), MAX(string_col)
|
|
GROUP BY: tinyint_col, int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|
|
# aggregate fns with and without distinct
|
|
select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col),
|
|
sum(int_col), min(smallint_col), max(bigint_col)
|
|
from alltypesagg group by 1
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(<slot 5>), SUM(<slot 6>), SUM(<slot 7>), MIN(<slot 8>), MAX(<slot 9>)
|
|
GROUP BY: <slot 4>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(int_col), MIN(smallint_col), MAX(bigint_col)
|
|
GROUP BY: tinyint_col, int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(<slot 5>), SUM(<slot 6>), SUM(<slot 7>), MIN(<slot 8>), MAX(<slot 9>)
|
|
GROUP BY: <slot 4>
|
|
TUPLE IDS: 2
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 6>), SUM(<slot 7>), MIN(<slot 8>), MAX(<slot 9>)
|
|
GROUP BY: <slot 4>, <slot 5>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(int_col), MIN(smallint_col), MAX(bigint_col)
|
|
GROUP BY: tinyint_col, int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|