==== ---- QUERY # Tests explaining a query (TPCDS-Q19) explain select i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from tpcds.store_sales join tpcds.item on (store_sales.ss_item_sk = item.i_item_sk) join tpcds.customer on (store_sales.ss_customer_sk = customer.c_customer_sk) join tpcds.customer_address on (customer.c_current_addr_sk = customer_address.ca_address_sk) join tpcds.store on (store_sales.ss_store_sk = store.s_store_sk) where ss_date between '1999-11-01' and '1999-11-30' and i_manager_id = 7 and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) group by i_brand, i_brand_id, i_manufact_id, i_manufact order by ext_price desc, i_brand, i_brand_id, i_manufact_id, i_manufact limit 100 ---- RESULTS 'Estimated Per-Host Requirements: Memory=72.66MB VCores=5' '' '19:TOP-N [LIMIT=100]' '| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC' '|' '18:EXCHANGE [UNPARTITIONED]' '|' '10:TOP-N [LIMIT=100]' '| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC' '|' '17:AGGREGATE [MERGE FINALIZE]' '| output: sum(sum(ss_ext_sales_price))' '| group by: i_brand, i_brand_id, i_manufact_id, i_manufact' '|' '16:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]' '|' '09:AGGREGATE' '| output: sum(ss_ext_sales_price)' '| group by: i_brand, i_brand_id, i_manufact_id, i_manufact' '|' '08:HASH JOIN [INNER JOIN, BROADCAST]' '| hash predicates: store_sales.ss_store_sk = store.s_store_sk' '| other predicates: substr(ca_zip, 1, 5) != substr(s_zip, 1, 5)' '|' '|--15:EXCHANGE [BROADCAST]' '| |' '| 04:SCAN HDFS [tpcds.store]' '| partitions=1/1 size=3.08KB' '|' '07:HASH JOIN [INNER JOIN, BROADCAST]' '| hash predicates: store_sales.ss_item_sk = item.i_item_sk' '|' '|--14:EXCHANGE [BROADCAST]' '| |' '| 01:SCAN HDFS [tpcds.item]' '| partitions=1/1 size=4.82MB' '| predicates: i_manager_id = 7' '|' '06:HASH JOIN [INNER JOIN, BROADCAST]' '| hash predicates: customer.c_customer_sk = store_sales.ss_customer_sk' '|' '|--13:EXCHANGE [BROADCAST]' '| |' '| 00:SCAN HDFS [tpcds.store_sales]' '| partitions=2/120 size=663.52KB' '|' '05:HASH JOIN [INNER JOIN, PARTITIONED]' '| hash predicates: customer_address.ca_address_sk = customer.c_current_addr_sk' '|' '|--12:EXCHANGE [HASH(customer.c_current_addr_sk)]' '| |' '| 02:SCAN HDFS [tpcds.customer]' '| partitions=1/1 size=12.60MB' '|' '11:EXCHANGE [HASH(customer_address.ca_address_sk)]' '|' '03:SCAN HDFS [tpcds.customer_address]' ' partitions=1/1 size=5.25MB' ==== ---- QUERY # Tests explaining an insert query explain insert overwrite functional.alltypessmall (id, string_col) partition (year, month) select a.id, a.string_col, a.year, a.month from functional.alltypes a left semi join functional.alltypesagg b on (a.id = b.id) where a.year > 2009 and a.month = 4 union distinct select id, string_col, year, month from functional.alltypes ---- RESULTS 'Estimated Per-Host Requirements: Memory=30.00MB VCores=3' '' 'WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(year,month)]' '| partitions=96' '|' '14:EXCHANGE [HASH(year,month)]' '|' '13:AGGREGATE [MERGE FINALIZE]' '| group by: id, string_col, year, month' '|' '12:EXCHANGE [HASH(id,string_col,year,month)]' '|' '|--11:AGGREGATE' '| | group by: id, string_col, year, month' '| |' '| 10:MERGE' '| |' '| 04:SCAN HDFS [functional.alltypes]' '| partitions=24/24 size=478.45KB' '|' '05:AGGREGATE' '| group by: id, string_col, year, month' '|' '09:MERGE' '|' '03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]' '| hash predicates: a.id = b.id' '|' '|--07:EXCHANGE [HASH(b.id)]' '| |' '| 02:SCAN HDFS [functional.alltypesagg b]' '| partitions=10/10 size=743.67KB' '|' '06:EXCHANGE [HASH(a.id)]' '|' '01:SCAN HDFS [functional.alltypes a]' ' partitions=1/24 size=19.71KB' ==== ---- QUERY # Tests explaining an insert query to/from an HBase table explain insert into functional_hbase.alltypes select a.* from functional_hbase.alltypessmall a cross join functional.alltypessmall b where a.year > 2009 and a.month = 4 union all select * from functional_hbase.alltypessmall ---- RESULTS 'Estimated Per-Host Requirements: Memory=1.02GB VCores=3' '' 'WRITE TO HBASE table=functional_hbase.alltypes' '|' '06:EXCHANGE [UNPARTITIONED]' '|' '|--08:MERGE' '| |' '| 04:SCAN HBASE [functional_hbase.alltypessmall]' '|' '07:MERGE' '|' '03:CROSS JOIN [BROADCAST]' '|' '|--05:EXCHANGE [BROADCAST]' '| |' '| 02:SCAN HDFS [functional.alltypessmall b]' '| partitions=4/4 size=6.32KB' '|' '01:SCAN HBASE [functional_hbase.alltypessmall a]' ' predicates: a.year > 2009, a.month = 4' ==== ---- QUERY # Tests explaining an CTAS statement. explain create table t as select * from functional.alltypes where month = 2 ---- RESULTS 'Estimated Per-Host Requirements: Memory=16.02MB VCores=1' '' 'WRITE TO HDFS [functional.t, OVERWRITE=false]' '| partitions=1' '|' '00:SCAN HDFS [functional.alltypes]' ' partitions=2/24 size=36.51KB' ==== ---- QUERY # Tests the warning about missing table stats in the explain header. explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col) from functional_avro.alltypes t1 inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id) left outer join functional_avro.alltypes t3 on (t2.id = t3.id) where t1.month = 1 and t2.year = 2009 and t3.bool_col = false ---- RESULTS 'Estimated Per-Host Requirements: Memory=4.03GB VCores=3' 'WARNING: The following tables are missing relevant table and/or column statistics.' 'functional_avro.alltypes, functional_parquet.alltypessmall' '' '09:AGGREGATE [MERGE FINALIZE]' '| output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))' '|' '08:EXCHANGE [UNPARTITIONED]' '|' '05:AGGREGATE' '| output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)' '|' '04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]' '| hash predicates: t2.id = t3.id' '| other predicates: t3.bool_col = FALSE' '|' '|--07:EXCHANGE [BROADCAST]' '| |' '| 02:SCAN HDFS [functional_avro.alltypes t3]' '| partitions=24/24 size=470.35KB' '| predicates: t3.bool_col = FALSE' '|' '03:HASH JOIN [INNER JOIN, BROADCAST]' '| hash predicates: t1.id = t2.id' '|' '|--06:EXCHANGE [BROADCAST]' '| |' '| 01:SCAN HDFS [functional_parquet.alltypessmall t2]' '| partitions=4/4 size=9.63KB' '|' '00:SCAN HDFS [functional_avro.alltypes t1]' ' partitions=2/24 size=39.87KB' ====