Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test

385 lines
11 KiB
Plaintext

# distinct *
select distinct *
from testtbl
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 4>, <slot 5>
TUPLE IDS: 1
EXCHANGE (2)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 2
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
====
# distinct w/ explicit select list
select distinct id, zip
from testtbl
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: id, zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: <slot 2>, <slot 3>
TUPLE IDS: 1
EXCHANGE (2)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 2
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: id, zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
====
# count(distinct)
select count(distinct id, zip)
from testtbl
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*)
GROUP BY:
TUPLE IDS: 2
AGGREGATE
OUTPUT:
GROUP BY: id, zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*)
GROUP BY:
TUPLE IDS: 2
AGGREGATE
OUTPUT:
GROUP BY: <slot 2>, <slot 3>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: id, zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
====
# count(distinct) w/ grouping
select tinyint_col, count(distinct int_col, bigint_col)
from alltypesagg
group by 1
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*)
GROUP BY: <slot 3>
TUPLE IDS: 2
AGGREGATE
OUTPUT:
GROUP BY: tinyint_col, int_col, bigint_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*)
GROUP BY: <slot 3>
TUPLE IDS: 2
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 4>, <slot 5>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: tinyint_col, int_col, bigint_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====
# count(distinct) and sum(distinct) w/ grouping
select tinyint_col, count(distinct int_col), sum(distinct int_col)
from alltypesagg
group by 1
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(<slot 3>)
GROUP BY: <slot 2>
TUPLE IDS: 2
AGGREGATE
OUTPUT:
GROUP BY: tinyint_col, int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(<slot 3>)
GROUP BY: <slot 2>
TUPLE IDS: 2
AGGREGATE
OUTPUT:
GROUP BY: <slot 2>, <slot 3>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: tinyint_col, int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====
# count(distinct) and sum(distinct) w/ grouping; distinct in min() and max()
# is ignored
select tinyint_col, count(distinct int_col),
min(distinct smallint_col), max(distinct string_col)
from alltypesagg group by 1
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), MIN(<slot 6>), MAX(<slot 7>)
GROUP BY: <slot 4>
TUPLE IDS: 2
AGGREGATE
OUTPUT: MIN(smallint_col), MAX(string_col)
GROUP BY: tinyint_col, int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), MIN(<slot 6>), MAX(<slot 7>)
GROUP BY: <slot 4>
TUPLE IDS: 2
AGGREGATE
OUTPUT: MIN(<slot 6>), MAX(<slot 7>)
GROUP BY: <slot 4>, <slot 5>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT: MIN(smallint_col), MAX(string_col)
GROUP BY: tinyint_col, int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====
# aggregate fns with and without distinct
select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col),
sum(int_col), min(smallint_col), max(bigint_col)
from alltypesagg group by 1
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(<slot 5>), SUM(<slot 6>), SUM(<slot 7>), MIN(<slot 8>), MAX(<slot 9>)
GROUP BY: <slot 4>
TUPLE IDS: 2
AGGREGATE
OUTPUT: COUNT(*), SUM(int_col), MIN(smallint_col), MAX(bigint_col)
GROUP BY: tinyint_col, int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(<slot 5>), SUM(<slot 6>), SUM(<slot 7>), MIN(<slot 8>), MAX(<slot 9>)
GROUP BY: <slot 4>
TUPLE IDS: 2
AGGREGATE
OUTPUT: SUM(<slot 6>), SUM(<slot 7>), MIN(<slot 8>), MAX(<slot 9>)
GROUP BY: <slot 4>, <slot 5>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(int_col), MIN(smallint_col), MAX(bigint_col)
GROUP BY: tinyint_col, int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====