Files
impala/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
Nong Li 5d903efca3 ExecSummary
The runtime profile as we present it is not very useful and I think the structure of
it makes it hard to consume. This patch adds a new client facing schemed set of
counters that are collected from the runtime profiles. For example, with this structure
it would be easy to have the shell get the stats of a running query and print a useful
progress report or to check the most relevant metrics for diagnosing issues.

Here's an example of the output for one of the tpch queries:
Operator              #Hosts   Avg Time   Max Time    #Rows  Est. #Rows  Peak Mem  Est. Peak Mem  Detail
------------------------------------------------------------------------------------------------------------------------
09:MERGING-EXCHANGE        1   79.738us   79.738us        5           5         0        -1.00 B  UNPARTITIONED
05:TOP-N                   3   84.693us   88.810us        5           5  12.00 KB       120.00 B
04:AGGREGATE               3    5.263ms    6.432ms        5           5  44.00 KB       10.00 MB  MERGE FINALIZE
08:AGGREGATE               3   16.659ms   27.444ms   52.52K     600.12K   3.20 MB       15.11 MB  MERGE
07:EXCHANGE                3    2.644ms      5.1ms   52.52K     600.12K         0              0  HASH(o_orderpriority)
03:AGGREGATE               3  342.913ms  966.291ms   52.52K     600.12K  10.80 MB       15.11 MB
02:HASH JOIN               3    2s165ms    2s171ms  144.87K     600.12K  13.63 MB      941.01 KB  INNER JOIN, BROADCAST
|--06:EXCHANGE             3    8.296ms    8.692ms   57.22K      15.00K         0              0  BROADCAST
|  01:SCAN HDFS            2    1s412ms    1s978ms   57.22K      15.00K  24.21 MB      176.00 MB  tpch.orders o
00:SCAN HDFS               3    8s032ms    8s558ms    3.79M     600.12K  32.29 MB      264.00 MB  tpch.lineitem l

Change-Id: Iaad4b9dd577c375006313f19442bee6d3e27246a
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2964
Reviewed-by: Nong Li <nong@cloudera.com>
Tested-by: jenkins
2014-06-11 03:10:11 -07:00

364 lines
12 KiB
Plaintext

====
---- QUERY
# Tests explaining a query (TPCDS-Q19)
explain
select
i_brand_id,
i_brand,
i_manufact_id,
i_manufact,
sum(ss_ext_sales_price) ext_price
from
tpcds.store_sales
join tpcds.item on (store_sales.ss_item_sk = item.i_item_sk)
join tpcds.customer on (store_sales.ss_customer_sk = customer.c_customer_sk)
join tpcds.customer_address on (customer.c_current_addr_sk = customer_address.ca_address_sk)
join tpcds.store on (store_sales.ss_store_sk = store.s_store_sk)
where
ss_date between '1999-11-01' and '1999-11-30'
and i_manager_id = 7
and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
group by
i_brand,
i_brand_id,
i_manufact_id,
i_manufact
order by
ext_price desc,
i_brand,
i_brand_id,
i_manufact_id,
i_manufact
limit 100
---- RESULTS
'Estimated Per-Host Requirements: Memory=72.66MB VCores=5'
''
'19:TOP-N [LIMIT=100]'
'| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC'
'| hosts=3 per-host-mem=unavailable'
'| tuple-ids=5 row-size=48B cardinality=100'
'|'
'18:EXCHANGE [UNPARTITIONED]'
'| hosts=3 per-host-mem=unavailable'
'| tuple-ids=5 row-size=48B cardinality=100'
'|'
'10:TOP-N [LIMIT=100]'
'| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC'
'| hosts=3 per-host-mem=4.69KB'
'| tuple-ids=5 row-size=48B cardinality=100'
'|'
'17:AGGREGATE [MERGE FINALIZE]'
'| output: sum(sum(ss_ext_sales_price))'
'| group by: i_brand, i_brand_id, i_manufact_id, i_manufact'
'| hosts=3 per-host-mem=10.00MB'
'| tuple-ids=5 row-size=48B cardinality=87208'
'|'
'16:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]'
'| hosts=3 per-host-mem=0B'
'| tuple-ids=5 row-size=48B cardinality=87208'
'|'
'09:AGGREGATE'
'| output: sum(ss_ext_sales_price)'
'| group by: i_brand, i_brand_id, i_manufact_id, i_manufact'
'| hosts=3 per-host-mem=10.00MB'
'| tuple-ids=5 row-size=48B cardinality=87208'
'|'
'08:HASH JOIN [INNER JOIN, BROADCAST]'
'| hash predicates: store_sales.ss_store_sk = store.s_store_sk'
'| other predicates: substr(ca_zip, 1, 5) != substr(s_zip, 1, 5)'
'| hosts=3 per-host-mem=383B'
'| tuple-ids=3,2,0,1,4 row-size=192B cardinality=87208'
'|'
'|--15:EXCHANGE [BROADCAST]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=4 row-size=29B cardinality=12'
'| |'
'| 04:SCAN HDFS [tpcds.store, RANDOM]'
'| partitions=1/1 size=3.08KB'
'| table stats: 12 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=4 row-size=29B cardinality=12'
'|'
'07:HASH JOIN [INNER JOIN, BROADCAST]'
'| hash predicates: store_sales.ss_item_sk = item.i_item_sk'
'| hosts=3 per-host-mem=14.60KB'
'| tuple-ids=3,2,0,1 row-size=163B cardinality=87208'
'|'
'|--14:EXCHANGE [BROADCAST]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=1 row-size=79B cardinality=171'
'| |'
'| 01:SCAN HDFS [tpcds.item, RANDOM]'
'| partitions=1/1 size=4.82MB'
'| predicates: i_manager_id = 7'
'| table stats: 18000 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=1 row-size=79B cardinality=171'
'|'
'06:HASH JOIN [INNER JOIN, BROADCAST]'
'| hash predicates: customer.c_customer_sk = store_sales.ss_customer_sk'
'| hosts=3 per-host-mem=234.75KB'
'| tuple-ids=3,2,0 row-size=84B cardinality=87208'
'|'
'|--13:EXCHANGE [BROADCAST]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=0 row-size=43B cardinality=5082'
'| |'
'| 00:SCAN HDFS [tpcds.store_sales, RANDOM]'
'| partitions=2/120 size=663.52KB'
'| table stats: 183592 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=0 row-size=43B cardinality=5082'
'|'
'05:HASH JOIN [INNER JOIN, PARTITIONED]'
'| hash predicates: customer_address.ca_address_sk = customer.c_current_addr_sk'
'| hosts=3 per-host-mem=429.69KB'
'| tuple-ids=3,2 row-size=41B cardinality=87208'
'|'
'|--12:EXCHANGE [HASH(customer.c_current_addr_sk)]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=2 row-size=12B cardinality=100000'
'| |'
'| 02:SCAN HDFS [tpcds.customer, RANDOM]'
'| partitions=1/1 size=12.60MB'
'| table stats: 100000 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=24.00MB'
'| tuple-ids=2 row-size=12B cardinality=100000'
'|'
'11:EXCHANGE [HASH(customer_address.ca_address_sk)]'
'| hosts=3 per-host-mem=0B'
'| tuple-ids=3 row-size=29B cardinality=50000'
'|'
'03:SCAN HDFS [tpcds.customer_address, RANDOM]'
' partitions=1/1 size=5.25MB'
' table stats: 50000 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=3 row-size=29B cardinality=50000'
====
---- QUERY
# Tests explaining an insert query
explain insert overwrite functional.alltypessmall (id, string_col)
partition (year, month)
select a.id, a.string_col, a.year, a.month from functional.alltypes a
left semi join functional.alltypesagg b on (a.id = b.id)
where a.year > 2009 and a.month = 4
union distinct
select id, string_col, year, month from functional.alltypes
---- RESULTS
'Estimated Per-Host Requirements: Memory=30.00MB VCores=3'
''
'WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(year,month)]'
'| partitions=96'
'| hosts=3 per-host-mem=69.26KB'
'|'
'14:EXCHANGE [HASH(year,month)]'
'| hosts=3 per-host-mem=0B'
'| tuple-ids=3 row-size=28B cardinality=7600'
'|'
'13:AGGREGATE [MERGE FINALIZE]'
'| group by: id, string_col, year, month'
'| hosts=3 per-host-mem=10.00MB'
'| tuple-ids=3 row-size=28B cardinality=7600'
'|'
'12:EXCHANGE [HASH(id,string_col,year,month)]'
'| hosts=3 per-host-mem=0B'
'| tuple-ids=3 row-size=28B cardinality=7600'
'|'
'|--11:AGGREGATE'
'| | group by: id, string_col, year, month'
'| | hosts=3 per-host-mem=10.00MB'
'| | tuple-ids=3 row-size=28B cardinality=7300'
'| |'
'| 10:MERGE'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=3 row-size=28B cardinality=7300'
'| |'
'| 04:SCAN HDFS [functional.alltypes, RANDOM]'
'| partitions=24/24 size=478.45KB'
'| table stats: 7300 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=2 row-size=29B cardinality=7300'
'|'
'05:AGGREGATE'
'| group by: id, string_col, year, month'
'| hosts=3 per-host-mem=10.00MB'
'| tuple-ids=3 row-size=28B cardinality=300'
'|'
'09:MERGE'
'| hosts=3 per-host-mem=0B'
'| tuple-ids=3 row-size=28B cardinality=300'
'|'
'03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]'
'| hash predicates: a.id = b.id'
'| hosts=3 per-host-mem=14.32KB'
'| tuple-ids=0,1 row-size=33B cardinality=300'
'|'
'|--07:EXCHANGE [HASH(b.id)]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=1 row-size=4B cardinality=10000'
'| |'
'| 02:SCAN HDFS [functional.alltypesagg b, RANDOM]'
'| partitions=10/10 size=743.67KB'
'| table stats: 10000 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=1 row-size=4B cardinality=10000'
'|'
'06:EXCHANGE [HASH(a.id)]'
'| hosts=3 per-host-mem=0B'
'| tuple-ids=0 row-size=29B cardinality=300'
'|'
'01:SCAN HDFS [functional.alltypes a, RANDOM]'
' partitions=1/24 size=19.71KB'
' table stats: 7300 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=0 row-size=29B cardinality=300'
====
---- QUERY
# Tests explaining an insert query to/from an HBase table
explain insert into functional_hbase.alltypes
select a.* from functional_hbase.alltypessmall a
cross join functional.alltypessmall b
where a.year > 2009 and a.month = 4
union all
select * from functional_hbase.alltypessmall
---- RESULTS
'Estimated Per-Host Requirements: Memory=1.02GB VCores=3'
''
'WRITE TO HBASE table=functional_hbase.alltypes'
'| hosts=1 per-host-mem=unavailable'
'|'
'06:EXCHANGE [UNPARTITIONED]'
'| hosts=100 per-host-mem=unavailable'
'| tuple-ids=3 row-size=88B cardinality=596'
'|'
'|--08:MERGE'
'| | hosts=100 per-host-mem=0B'
'| | tuple-ids=3 row-size=88B cardinality=196'
'| |'
'| 04:SCAN HBASE [functional_hbase.alltypessmall]'
'| table stats: 100 rows total'
'| column stats: all'
'| hosts=100 per-host-mem=1.00GB'
'| tuple-ids=2 row-size=97B cardinality=196'
'|'
'07:MERGE'
'| hosts=100 per-host-mem=0B'
'| tuple-ids=3 row-size=88B cardinality=400'
'|'
'03:CROSS JOIN [BROADCAST]'
'| hosts=100 per-host-mem=0B'
'| tuple-ids=0,1 row-size=97B cardinality=400'
'|'
'|--05:EXCHANGE [BROADCAST]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=1 row-size=0B cardinality=100'
'| |'
'| 02:SCAN HDFS [functional.alltypessmall b, RANDOM]'
'| partitions=4/4 size=6.32KB'
'| table stats: 100 rows total'
'| column stats: all'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=1 row-size=0B cardinality=100'
'|'
'01:SCAN HBASE [functional_hbase.alltypessmall a]'
' predicates: a.year > 2009, a.month = 4'
' table stats: 100 rows total'
' column stats: all'
' hosts=100 per-host-mem=1.00GB'
' tuple-ids=0 row-size=97B cardinality=4'
====
---- QUERY
# Tests explaining an CTAS statement.
explain create table t as
select * from functional.alltypes
where month = 2
---- RESULTS
'Estimated Per-Host Requirements: Memory=16.02MB VCores=1'
''
'WRITE TO HDFS [functional.t, OVERWRITE=false]'
'| partitions=1'
'| hosts=3 per-host-mem=17.62KB'
'|'
'00:SCAN HDFS [functional.alltypes, RANDOM]'
' partitions=2/24 size=36.51KB'
' table stats: 7300 rows total'
' column stats: all'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=1 row-size=97B cardinality=560'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS
'Estimated Per-Host Requirements: Memory=4.03GB VCores=3'
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
''
'09:AGGREGATE [MERGE FINALIZE]'
'| output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))'
'| hosts=3 per-host-mem=unavailable'
'| tuple-ids=3 row-size=32B cardinality=1'
'|'
'08:EXCHANGE [UNPARTITIONED]'
'| hosts=3 per-host-mem=unavailable'
'| tuple-ids=3 row-size=32B cardinality=1'
'|'
'05:AGGREGATE'
'| output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)'
'| hosts=3 per-host-mem=10.00MB'
'| tuple-ids=3 row-size=32B cardinality=1'
'|'
'04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]'
'| hash predicates: t2.id = t3.id'
'| other predicates: t3.bool_col = FALSE'
'| hosts=3 per-host-mem=2.00GB'
'| tuple-ids=0,1,2N row-size=37B cardinality=unavailable'
'|'
'|--07:EXCHANGE [BROADCAST]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=2 row-size=13B cardinality=unavailable'
'| |'
'| 02:SCAN HDFS [functional_avro.alltypes t3, RANDOM]'
'| partitions=24/24 size=470.35KB'
'| predicates: t3.bool_col = FALSE'
'| table stats: unavailable'
'| column stats: unavailable'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=2 row-size=13B cardinality=unavailable'
'|'
'03:HASH JOIN [INNER JOIN, BROADCAST]'
'| hash predicates: t1.id = t2.id'
'| hosts=3 per-host-mem=2.00GB'
'| tuple-ids=0,1 row-size=24B cardinality=unavailable'
'|'
'|--06:EXCHANGE [BROADCAST]'
'| | hosts=3 per-host-mem=0B'
'| | tuple-ids=1 row-size=12B cardinality=unavailable'
'| |'
'| 01:SCAN HDFS [functional_parquet.alltypessmall t2, RANDOM]'
'| partitions=4/4 size=9.63KB'
'| table stats: unavailable'
'| columns missing stats: id, float_col'
'| hosts=3 per-host-mem=16.00MB'
'| tuple-ids=1 row-size=12B cardinality=unavailable'
'|'
'00:SCAN HDFS [functional_avro.alltypes t1, RANDOM]'
' partitions=2/24 size=39.87KB'
' table stats: unavailable'
' columns missing stats: id, int_col'
' hosts=3 per-host-mem=16.00MB'
' tuple-ids=0 row-size=12B cardinality=unavailable'
====