Files
impala/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
Alex Behm 15e05082c0 IMPALA-831: Distributed aggregation and top-n over unions.
Change-Id: I056e8271421008378db93e8b2393861cc9dd4b90
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1840
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1886
2014-03-13 15:42:31 -07:00

405 lines
15 KiB
Plaintext

====
---- QUERY
# Tests explaining a query (TPCDS-Q19)
explain
select
i_brand_id,
i_brand,
i_manufact_id,
i_manufact,
sum(ss_ext_sales_price) ext_price
from
tpcds.store_sales
join tpcds.item on (store_sales.ss_item_sk = item.i_item_sk)
join tpcds.customer on (store_sales.ss_customer_sk = customer.c_customer_sk)
join tpcds.customer_address on (customer.c_current_addr_sk = customer_address.ca_address_sk)
join tpcds.store on (store_sales.ss_store_sk = store.s_store_sk)
where
ss_date between '1999-11-01' and '1999-11-30'
and i_manager_id = 7
and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
group by
i_brand,
i_brand_id,
i_manufact_id,
i_manufact
order by
ext_price desc,
i_brand,
i_brand_id,
i_manufact_id,
i_manufact
limit 100
---- RESULTS
'Estimated Per-Host Requirements: Memory=72.66MB VCores=5'
''
'F07:PLAN FRAGMENT [PARTITION=UNPARTITIONED]'
' 19:TOP-N [LIMIT=100]'
' | order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC'
' | hosts=3 per-host-mem=unavailable'
' | tuple-ids=5 row-size=48B cardinality=100'
' |'
' 18:EXCHANGE [PARTITION=UNPARTITIONED]'
' hosts=3 per-host-mem=unavailable'
' tuple-ids=5 row-size=48B cardinality=100'
''
'F06:PLAN FRAGMENT [PARTITION=HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]'
' DATASTREAM SINK [FRAGMENT=F07, EXCHANGE=18, PARTITION=UNPARTITIONED]'
' 10:TOP-N [LIMIT=100]'
' | order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC'
' | hosts=3 per-host-mem=4.69KB'
' | tuple-ids=5 row-size=48B cardinality=100'
' |'
' 17:AGGREGATE [MERGE FINALIZE]'
' | output: sum(sum(ss_ext_sales_price))'
' | group by: i_brand, i_brand_id, i_manufact_id, i_manufact'
' | hosts=3 per-host-mem=10.00MB'
' | tuple-ids=5 row-size=48B cardinality=87208'
' |'
' 16:EXCHANGE [PARTITION=HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]'
' hosts=3 per-host-mem=0B'
' tuple-ids=5 row-size=48B cardinality=87208'
''
'F02:PLAN FRAGMENT [PARTITION=HASH(customer_address.ca_address_sk)]'
' DATASTREAM SINK [FRAGMENT=F06, EXCHANGE=16, PARTITION=HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]'
' 09:AGGREGATE'
' | output: sum(ss_ext_sales_price)'
' | group by: i_brand, i_brand_id, i_manufact_id, i_manufact'
' | hosts=3 per-host-mem=10.00MB'
' | tuple-ids=5 row-size=48B cardinality=87208'
' |'
' 08:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: store_sales.ss_store_sk = store.s_store_sk'
' | other predicates: substr(ca_zip, 1, 5) != substr(s_zip, 1, 5)'
' | hosts=3 per-host-mem=383B'
' | tuple-ids=3,2,0,1,4 row-size=192B cardinality=87208'
' |'
' |--15:EXCHANGE [BROADCAST]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=4 row-size=29B cardinality=12'
' |'
' 07:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: store_sales.ss_item_sk = item.i_item_sk'
' | hosts=3 per-host-mem=14.60KB'
' | tuple-ids=3,2,0,1 row-size=163B cardinality=87208'
' |'
' |--14:EXCHANGE [BROADCAST]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=1 row-size=79B cardinality=171'
' |'
' 06:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: customer.c_customer_sk = store_sales.ss_customer_sk'
' | hosts=3 per-host-mem=234.75KB'
' | tuple-ids=3,2,0 row-size=84B cardinality=87208'
' |'
' |--13:EXCHANGE [BROADCAST]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=0 row-size=43B cardinality=5082'
' |'
' 05:HASH JOIN [INNER JOIN, PARTITIONED]'
' | hash predicates: customer_address.ca_address_sk = customer.c_current_addr_sk'
' | hosts=3 per-host-mem=429.69KB'
' | tuple-ids=3,2 row-size=41B cardinality=87208'
' |'
' |--12:EXCHANGE [PARTITION=HASH(customer.c_current_addr_sk)]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=2 row-size=12B cardinality=100000'
' |'
' 11:EXCHANGE [PARTITION=HASH(customer_address.ca_address_sk)]'
' hosts=3 per-host-mem=0B'
' tuple-ids=3 row-size=29B cardinality=50000'
''
'F05:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=15, BROADCAST]'
' 04:SCAN HDFS [tpcds.store, PARTITION=RANDOM]'
' partitions=1/1 size=3.08KB'
' table stats: 12 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=4 row-size=29B cardinality=12'
''
'F04:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=14, BROADCAST]'
' 01:SCAN HDFS [tpcds.item, PARTITION=RANDOM]'
' partitions=1/1 size=4.82MB'
' predicates: i_manager_id = 7'
' table stats: 18000 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=1 row-size=79B cardinality=171'
''
'F03:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=13, BROADCAST]'
' 00:SCAN HDFS [tpcds.store_sales, PARTITION=RANDOM]'
' partitions=2/120 size=663.52KB'
' table stats: 183592 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=0 row-size=43B cardinality=5082'
''
'F01:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=12, PARTITION=HASH(customer.c_current_addr_sk)]'
' 02:SCAN HDFS [tpcds.customer, PARTITION=RANDOM]'
' partitions=1/1 size=12.60MB'
' table stats: 100000 rows total'
' column stats: all'
' hosts=3 per-host-mem=24.00MB'
' tuple-ids=2 row-size=12B cardinality=100000'
''
'F00:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=11, PARTITION=HASH(customer_address.ca_address_sk)]'
' 03:SCAN HDFS [tpcds.customer_address, PARTITION=RANDOM]'
' partitions=1/1 size=5.25MB'
' table stats: 50000 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=3 row-size=29B cardinality=50000'
====
---- QUERY
# Tests explaining an insert query
explain insert overwrite functional.alltypessmall (id, string_col)
partition (year, month)
select a.id, a.string_col, a.year, a.month from functional.alltypes a
left semi join functional.alltypesagg b on (a.id = b.id)
where a.year > 2009 and a.month = 4
union distinct
select id, string_col, year, month from functional.alltypes
---- RESULTS
'Estimated Per-Host Requirements: Memory=30.00MB VCores=3'
''
'F05:PLAN FRAGMENT [PARTITION=HASH(year,month)]'
' WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(year,month)]'
' | partitions=96'
' | hosts=3 per-host-mem=69.26KB'
' |'
' 14:EXCHANGE [PARTITION=HASH(year,month)]'
' hosts=3 per-host-mem=0B'
' tuple-ids=3 row-size=28B cardinality=7600'
''
'F04:PLAN FRAGMENT [PARTITION=HASH(id,string_col,year,month)]'
' DATASTREAM SINK [FRAGMENT=F05, EXCHANGE=14, PARTITION=HASH(year,month)]'
' 13:AGGREGATE [MERGE FINALIZE]'
' | group by: id, string_col, year, month'
' | hosts=3 per-host-mem=10.00MB'
' | tuple-ids=3 row-size=28B cardinality=7600'
' |'
' 12:EXCHANGE [PARTITION=HASH(id,string_col,year,month)]'
' hosts=3 per-host-mem=0B'
' tuple-ids=3 row-size=28B cardinality=7600'
''
'F03:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F04, EXCHANGE=12, PARTITION=HASH(id,string_col,year,month)]'
' 11:AGGREGATE'
' | group by: id, string_col, year, month'
' | hosts=3 per-host-mem=10.00MB'
' | tuple-ids=3 row-size=28B cardinality=7300'
' |'
' 10:MERGE'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=3 row-size=28B cardinality=7300'
' |'
' 04:SCAN HDFS [functional.alltypes, PARTITION=RANDOM]'
' partitions=24/24 size=478.45KB'
' table stats: 7300 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=2 row-size=29B cardinality=7300'
''
'F02:PLAN FRAGMENT [PARTITION=HASH(a.id)]'
' DATASTREAM SINK [FRAGMENT=F04, EXCHANGE=12, PARTITION=HASH(id,string_col,year,month)]'
' 05:AGGREGATE'
' | group by: id, string_col, year, month'
' | hosts=3 per-host-mem=10.00MB'
' | tuple-ids=3 row-size=28B cardinality=300'
' |'
' 09:MERGE'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=3 row-size=28B cardinality=300'
' |'
' 03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]'
' | hash predicates: a.id = b.id'
' | hosts=3 per-host-mem=14.32KB'
' | tuple-ids=0,1 row-size=33B cardinality=300'
' |'
' |--07:EXCHANGE [PARTITION=HASH(b.id)]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=1 row-size=4B cardinality=10000'
' |'
' 06:EXCHANGE [PARTITION=HASH(a.id)]'
' hosts=3 per-host-mem=0B'
' tuple-ids=0 row-size=29B cardinality=300'
''
'F01:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=07, PARTITION=HASH(b.id)]'
' 02:SCAN HDFS [functional.alltypesagg b, PARTITION=RANDOM]'
' partitions=10/10 size=743.67KB'
' table stats: 10000 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=1 row-size=4B cardinality=10000'
''
'F00:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=06, PARTITION=HASH(a.id)]'
' 01:SCAN HDFS [functional.alltypes a, PARTITION=RANDOM]'
' partitions=1/24 size=19.71KB'
' table stats: 7300 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=0 row-size=29B cardinality=300'
====
---- QUERY
# Tests explaining an insert query to/from an HBase table
explain insert into functional_hbase.alltypes
select a.* from functional_hbase.alltypessmall a
cross join functional.alltypessmall b
where a.year > 2009 and a.month = 4
union all
select * from functional_hbase.alltypessmall
---- RESULTS
'Estimated Per-Host Requirements: Memory=1.02GB VCores=3'
''
'F03:PLAN FRAGMENT [PARTITION=UNPARTITIONED]'
' WRITE TO HBASE table=functional_hbase.alltypes'
' | hosts=1 per-host-mem=unavailable'
' |'
' 06:EXCHANGE [PARTITION=UNPARTITIONED]'
' hosts=100 per-host-mem=unavailable'
' tuple-ids=3 row-size=88B cardinality=596'
''
'F02:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=06, PARTITION=UNPARTITIONED]'
' 08:MERGE'
' | hosts=100 per-host-mem=0B'
' | tuple-ids=3 row-size=88B cardinality=196'
' |'
' 04:SCAN HBASE [functional_hbase.alltypessmall]'
' table stats: 100 rows total'
' column stats: all'
' hosts=100 per-host-mem=1.00GB'
' tuple-ids=2 row-size=97B cardinality=196'
''
'F00:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=06, PARTITION=UNPARTITIONED]'
' 07:MERGE'
' | hosts=100 per-host-mem=0B'
' | tuple-ids=3 row-size=88B cardinality=400'
' |'
' 03:CROSS JOIN [BROADCAST]'
' | hosts=100 per-host-mem=0B'
' | tuple-ids=0,1 row-size=97B cardinality=400'
' |'
' |--05:EXCHANGE [BROADCAST]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=1 row-size=0B cardinality=100'
' |'
' 01:SCAN HBASE [functional_hbase.alltypessmall a]'
' predicates: a.year > 2009, a.month = 4'
' table stats: 100 rows total'
' column stats: all'
' hosts=100 per-host-mem=1.00GB'
' tuple-ids=0 row-size=97B cardinality=4'
''
'F01:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=05, BROADCAST]'
' 02:SCAN HDFS [functional.alltypessmall b, PARTITION=RANDOM]'
' partitions=4/4 size=6.32KB'
' table stats: 100 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=1 row-size=0B cardinality=100'
====
---- QUERY
# Tests explaining an CTAS statement.
explain create table t as
select * from functional.alltypes
where month = 2
---- RESULTS
'Estimated Per-Host Requirements: Memory=16.02MB VCores=1'
''
'F00:PLAN FRAGMENT [PARTITION=RANDOM]'
' WRITE TO HDFS [functional.t, OVERWRITE=false]'
' | partitions=1'
' | hosts=3 per-host-mem=17.62KB'
' |'
' 00:SCAN HDFS [functional.alltypes, PARTITION=RANDOM]'
' partitions=2/24 size=36.51KB'
' table stats: 7300 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=1 row-size=97B cardinality=560'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS
'Estimated Per-Host Requirements: Memory=4.03GB VCores=3'
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
''
'F03:PLAN FRAGMENT [PARTITION=UNPARTITIONED]'
' 09:AGGREGATE [MERGE FINALIZE]'
' | output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))'
' | hosts=3 per-host-mem=unavailable'
' | tuple-ids=3 row-size=32B cardinality=1'
' |'
' 08:EXCHANGE [PARTITION=UNPARTITIONED]'
' hosts=3 per-host-mem=unavailable'
' tuple-ids=3 row-size=32B cardinality=1'
''
'F00:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=08, PARTITION=UNPARTITIONED]'
' 05:AGGREGATE'
' | output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)'
' | hosts=3 per-host-mem=10.00MB'
' | tuple-ids=3 row-size=32B cardinality=1'
' |'
' 04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]'
' | hash predicates: t2.id = t3.id'
' | other predicates: t3.bool_col = FALSE'
' | hosts=3 per-host-mem=2.00GB'
' | tuple-ids=0,1,2N row-size=37B cardinality=unavailable'
' |'
' |--07:EXCHANGE [BROADCAST]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=2 row-size=13B cardinality=unavailable'
' |'
' 03:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: t1.id = t2.id'
' | hosts=3 per-host-mem=2.00GB'
' | tuple-ids=0,1 row-size=24B cardinality=unavailable'
' |'
' |--06:EXCHANGE [BROADCAST]'
' | hosts=3 per-host-mem=0B'
' | tuple-ids=1 row-size=12B cardinality=unavailable'
' |'
' 00:SCAN HDFS [functional_avro.alltypes t1, PARTITION=RANDOM]'
' partitions=2/24 size=39.87KB'
' table stats: unavailable'
' columns missing stats: id, int_col'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=0 row-size=12B cardinality=unavailable'
''
'F02:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=07, BROADCAST]'
' 02:SCAN HDFS [functional_avro.alltypes t3, PARTITION=RANDOM]'
' partitions=24/24 size=470.35KB'
' predicates: t3.bool_col = FALSE'
' table stats: unavailable'
' column stats: unavailable'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=2 row-size=13B cardinality=unavailable'
''
'F01:PLAN FRAGMENT [PARTITION=RANDOM]'
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=06, BROADCAST]'
' 01:SCAN HDFS [functional_parquet.alltypessmall t2, PARTITION=RANDOM]'
' partitions=4/4 size=9.63KB'
' table stats: unavailable'
' columns missing stats: id, float_col'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=1 row-size=12B cardinality=unavailable'
====