==== ---- QUERY # Tests explaining a query (TPCDS-Q19) explain select i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from tpcds.store_sales join tpcds.item on (store_sales.ss_item_sk = item.i_item_sk) join tpcds.customer on (store_sales.ss_customer_sk = customer.c_customer_sk) join tpcds.customer_address on (customer.c_current_addr_sk = customer_address.ca_address_sk) join tpcds.store on (store_sales.ss_store_sk = store.s_store_sk) where ss_date between '1999-11-01' and '1999-11-30' and i_manager_id = 7 and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) group by i_brand, i_brand_id, i_manufact_id, i_manufact order by ext_price desc, i_brand, i_brand_id, i_manufact_id, i_manufact limit 100 ---- RESULTS 'Estimated Per-Host Requirements: Memory=72.66MB VCores=5' '' 'F07:PLAN FRAGMENT [UNPARTITIONED]' ' 19:TOP-N [LIMIT=100]' ' | order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC' ' | hosts=3 per-host-mem=unavailable' ' | tuple-ids=5 row-size=48B cardinality=100' ' |' ' 18:EXCHANGE [UNPARTITIONED]' ' hosts=3 per-host-mem=unavailable' ' tuple-ids=5 row-size=48B cardinality=100' '' 'F06:PLAN FRAGMENT [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]' ' DATASTREAM SINK [FRAGMENT=F07, EXCHANGE=18, UNPARTITIONED]' ' 10:TOP-N [LIMIT=100]' ' | order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC' ' | hosts=3 per-host-mem=4.69KB' ' | tuple-ids=5 row-size=48B cardinality=100' ' |' ' 17:AGGREGATE [MERGE FINALIZE]' ' | output: sum(sum(ss_ext_sales_price))' ' | group by: i_brand, i_brand_id, i_manufact_id, i_manufact' ' | hosts=3 per-host-mem=10.00MB' ' | tuple-ids=5 row-size=48B cardinality=87208' ' |' ' 16:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]' ' hosts=3 per-host-mem=0B' ' tuple-ids=5 row-size=48B cardinality=87208' '' 'F02:PLAN FRAGMENT [HASH(customer_address.ca_address_sk)]' ' DATASTREAM SINK [FRAGMENT=F06, EXCHANGE=16, HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]' ' 09:AGGREGATE' ' | output: sum(ss_ext_sales_price)' ' | group by: i_brand, i_brand_id, i_manufact_id, i_manufact' ' | hosts=3 per-host-mem=10.00MB' ' | tuple-ids=5 row-size=48B cardinality=87208' ' |' ' 08:HASH JOIN [INNER JOIN, BROADCAST]' ' | hash predicates: store_sales.ss_store_sk = store.s_store_sk' ' | other predicates: substr(ca_zip, 1, 5) != substr(s_zip, 1, 5)' ' | hosts=3 per-host-mem=383B' ' | tuple-ids=3,2,0,1,4 row-size=192B cardinality=87208' ' |' ' |--15:EXCHANGE [BROADCAST]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=4 row-size=29B cardinality=12' ' |' ' 07:HASH JOIN [INNER JOIN, BROADCAST]' ' | hash predicates: store_sales.ss_item_sk = item.i_item_sk' ' | hosts=3 per-host-mem=14.60KB' ' | tuple-ids=3,2,0,1 row-size=163B cardinality=87208' ' |' ' |--14:EXCHANGE [BROADCAST]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=1 row-size=79B cardinality=171' ' |' ' 06:HASH JOIN [INNER JOIN, BROADCAST]' ' | hash predicates: customer.c_customer_sk = store_sales.ss_customer_sk' ' | hosts=3 per-host-mem=234.75KB' ' | tuple-ids=3,2,0 row-size=84B cardinality=87208' ' |' ' |--13:EXCHANGE [BROADCAST]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=0 row-size=43B cardinality=5082' ' |' ' 05:HASH JOIN [INNER JOIN, PARTITIONED]' ' | hash predicates: customer_address.ca_address_sk = customer.c_current_addr_sk' ' | hosts=3 per-host-mem=429.69KB' ' | tuple-ids=3,2 row-size=41B cardinality=87208' ' |' ' |--12:EXCHANGE [HASH(customer.c_current_addr_sk)]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=2 row-size=12B cardinality=100000' ' |' ' 11:EXCHANGE [HASH(customer_address.ca_address_sk)]' ' hosts=3 per-host-mem=0B' ' tuple-ids=3 row-size=29B cardinality=50000' '' 'F05:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=15, BROADCAST]' ' 04:SCAN HDFS [tpcds.store, RANDOM]' ' partitions=1/1 size=3.08KB' ' table stats: 12 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=4 row-size=29B cardinality=12' '' 'F04:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=14, BROADCAST]' ' 01:SCAN HDFS [tpcds.item, RANDOM]' ' partitions=1/1 size=4.82MB' ' predicates: i_manager_id = 7' ' table stats: 18000 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=79B cardinality=171' '' 'F03:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=13, BROADCAST]' ' 00:SCAN HDFS [tpcds.store_sales, RANDOM]' ' partitions=2/120 size=663.52KB' ' table stats: 183592 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=0 row-size=43B cardinality=5082' '' 'F01:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=12, HASH(customer.c_current_addr_sk)]' ' 02:SCAN HDFS [tpcds.customer, RANDOM]' ' partitions=1/1 size=12.60MB' ' table stats: 100000 rows total' ' column stats: all' ' hosts=3 per-host-mem=24.00MB' ' tuple-ids=2 row-size=12B cardinality=100000' '' 'F00:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=11, HASH(customer_address.ca_address_sk)]' ' 03:SCAN HDFS [tpcds.customer_address, RANDOM]' ' partitions=1/1 size=5.25MB' ' table stats: 50000 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=3 row-size=29B cardinality=50000' ==== ---- QUERY # Tests explaining an insert query explain insert overwrite functional.alltypessmall (id, string_col) partition (year, month) select a.id, a.string_col, a.year, a.month from functional.alltypes a left semi join functional.alltypesagg b on (a.id = b.id) where a.year > 2009 and a.month = 4 union distinct select id, string_col, year, month from functional.alltypes ---- RESULTS 'Estimated Per-Host Requirements: Memory=30.00MB VCores=3' '' 'F05:PLAN FRAGMENT [HASH(year,month)]' ' WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(year,month)]' ' | partitions=96' ' | hosts=3 per-host-mem=69.26KB' ' |' ' 14:EXCHANGE [HASH(year,month)]' ' hosts=3 per-host-mem=0B' ' tuple-ids=3 row-size=28B cardinality=7600' '' 'F04:PLAN FRAGMENT [HASH(id,string_col,year,month)]' ' DATASTREAM SINK [FRAGMENT=F05, EXCHANGE=14, HASH(year,month)]' ' 13:AGGREGATE [MERGE FINALIZE]' ' | group by: id, string_col, year, month' ' | hosts=3 per-host-mem=10.00MB' ' | tuple-ids=3 row-size=28B cardinality=7600' ' |' ' 12:EXCHANGE [HASH(id,string_col,year,month)]' ' hosts=3 per-host-mem=0B' ' tuple-ids=3 row-size=28B cardinality=7600' '' 'F03:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F04, EXCHANGE=12, HASH(id,string_col,year,month)]' ' 11:AGGREGATE' ' | group by: id, string_col, year, month' ' | hosts=3 per-host-mem=10.00MB' ' | tuple-ids=3 row-size=28B cardinality=7300' ' |' ' 10:MERGE' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=3 row-size=28B cardinality=7300' ' |' ' 04:SCAN HDFS [functional.alltypes, RANDOM]' ' partitions=24/24 size=478.45KB' ' table stats: 7300 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=2 row-size=29B cardinality=7300' '' 'F02:PLAN FRAGMENT [HASH(a.id)]' ' DATASTREAM SINK [FRAGMENT=F04, EXCHANGE=12, HASH(id,string_col,year,month)]' ' 05:AGGREGATE' ' | group by: id, string_col, year, month' ' | hosts=3 per-host-mem=10.00MB' ' | tuple-ids=3 row-size=28B cardinality=300' ' |' ' 09:MERGE' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=3 row-size=28B cardinality=300' ' |' ' 03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]' ' | hash predicates: a.id = b.id' ' | hosts=3 per-host-mem=14.32KB' ' | tuple-ids=0,1 row-size=33B cardinality=300' ' |' ' |--07:EXCHANGE [HASH(b.id)]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=1 row-size=4B cardinality=10000' ' |' ' 06:EXCHANGE [HASH(a.id)]' ' hosts=3 per-host-mem=0B' ' tuple-ids=0 row-size=29B cardinality=300' '' 'F01:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=07, HASH(b.id)]' ' 02:SCAN HDFS [functional.alltypesagg b, RANDOM]' ' partitions=10/10 size=743.67KB' ' table stats: 10000 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=4B cardinality=10000' '' 'F00:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=06, HASH(a.id)]' ' 01:SCAN HDFS [functional.alltypes a, RANDOM]' ' partitions=1/24 size=19.71KB' ' table stats: 7300 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=0 row-size=29B cardinality=300' ==== ---- QUERY # Tests explaining an insert query to/from an HBase table explain insert into functional_hbase.alltypes select a.* from functional_hbase.alltypessmall a cross join functional.alltypessmall b where a.year > 2009 and a.month = 4 union all select * from functional_hbase.alltypessmall ---- RESULTS 'Estimated Per-Host Requirements: Memory=1.02GB VCores=3' '' 'F03:PLAN FRAGMENT [UNPARTITIONED]' ' WRITE TO HBASE table=functional_hbase.alltypes' ' | hosts=1 per-host-mem=unavailable' ' |' ' 06:EXCHANGE [UNPARTITIONED]' ' hosts=100 per-host-mem=unavailable' ' tuple-ids=3 row-size=88B cardinality=596' '' 'F02:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=06, UNPARTITIONED]' ' 08:MERGE' ' | hosts=100 per-host-mem=0B' ' | tuple-ids=3 row-size=88B cardinality=196' ' |' ' 04:SCAN HBASE [functional_hbase.alltypessmall]' ' table stats: 100 rows total' ' column stats: all' ' hosts=100 per-host-mem=1.00GB' ' tuple-ids=2 row-size=97B cardinality=196' '' 'F00:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=06, UNPARTITIONED]' ' 07:MERGE' ' | hosts=100 per-host-mem=0B' ' | tuple-ids=3 row-size=88B cardinality=400' ' |' ' 03:CROSS JOIN [BROADCAST]' ' | hosts=100 per-host-mem=0B' ' | tuple-ids=0,1 row-size=97B cardinality=400' ' |' ' |--05:EXCHANGE [BROADCAST]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=1 row-size=0B cardinality=100' ' |' ' 01:SCAN HBASE [functional_hbase.alltypessmall a]' ' predicates: a.year > 2009, a.month = 4' ' table stats: 100 rows total' ' column stats: all' ' hosts=100 per-host-mem=1.00GB' ' tuple-ids=0 row-size=97B cardinality=4' '' 'F01:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=05, BROADCAST]' ' 02:SCAN HDFS [functional.alltypessmall b, RANDOM]' ' partitions=4/4 size=6.32KB' ' table stats: 100 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=0B cardinality=100' ==== ---- QUERY # Tests explaining an CTAS statement. explain create table t as select * from functional.alltypes where month = 2 ---- RESULTS 'Estimated Per-Host Requirements: Memory=16.02MB VCores=1' '' 'F00:PLAN FRAGMENT [RANDOM]' ' WRITE TO HDFS [functional.t, OVERWRITE=false]' ' | partitions=1' ' | hosts=3 per-host-mem=17.62KB' ' |' ' 00:SCAN HDFS [functional.alltypes, RANDOM]' ' partitions=2/24 size=36.51KB' ' table stats: 7300 rows total' ' column stats: all' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=97B cardinality=560' ==== ---- QUERY # Tests the warning about missing table stats in the explain header. explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col) from functional_avro.alltypes t1 inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id) left outer join functional_avro.alltypes t3 on (t2.id = t3.id) where t1.month = 1 and t2.year = 2009 and t3.bool_col = false ---- RESULTS 'Estimated Per-Host Requirements: Memory=4.03GB VCores=3' 'WARNING: The following tables are missing relevant table and/or column statistics.' 'functional_avro.alltypes, functional_parquet.alltypessmall' '' 'F03:PLAN FRAGMENT [UNPARTITIONED]' ' 09:AGGREGATE [MERGE FINALIZE]' ' | output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))' ' | hosts=3 per-host-mem=unavailable' ' | tuple-ids=3 row-size=32B cardinality=1' ' |' ' 08:EXCHANGE [UNPARTITIONED]' ' hosts=3 per-host-mem=unavailable' ' tuple-ids=3 row-size=32B cardinality=1' '' 'F00:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=08, UNPARTITIONED]' ' 05:AGGREGATE' ' | output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)' ' | hosts=3 per-host-mem=10.00MB' ' | tuple-ids=3 row-size=32B cardinality=1' ' |' ' 04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]' ' | hash predicates: t2.id = t3.id' ' | other predicates: t3.bool_col = FALSE' ' | hosts=3 per-host-mem=2.00GB' ' | tuple-ids=0,1,2N row-size=37B cardinality=unavailable' ' |' ' |--07:EXCHANGE [BROADCAST]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=2 row-size=13B cardinality=unavailable' ' |' ' 03:HASH JOIN [INNER JOIN, BROADCAST]' ' | hash predicates: t1.id = t2.id' ' | hosts=3 per-host-mem=2.00GB' ' | tuple-ids=0,1 row-size=24B cardinality=unavailable' ' |' ' |--06:EXCHANGE [BROADCAST]' ' | hosts=3 per-host-mem=0B' ' | tuple-ids=1 row-size=12B cardinality=unavailable' ' |' ' 00:SCAN HDFS [functional_avro.alltypes t1, RANDOM]' ' partitions=2/24 size=39.87KB' ' table stats: unavailable' ' columns missing stats: id, int_col' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=0 row-size=12B cardinality=unavailable' '' 'F02:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=07, BROADCAST]' ' 02:SCAN HDFS [functional_avro.alltypes t3, RANDOM]' ' partitions=24/24 size=470.35KB' ' predicates: t3.bool_col = FALSE' ' table stats: unavailable' ' column stats: unavailable' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=2 row-size=13B cardinality=unavailable' '' 'F01:PLAN FRAGMENT [RANDOM]' ' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=06, BROADCAST]' ' 01:SCAN HDFS [functional_parquet.alltypessmall t2, RANDOM]' ' partitions=4/4 size=9.63KB' ' table stats: unavailable' ' columns missing stats: id, float_col' ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=12B cardinality=unavailable' ====