mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
The runtime profile as we present it is not very useful and I think the structure of it makes it hard to consume. This patch adds a new client facing schemed set of counters that are collected from the runtime profiles. For example, with this structure it would be easy to have the shell get the stats of a running query and print a useful progress report or to check the most relevant metrics for diagnosing issues. Here's an example of the output for one of the tpch queries: Operator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail ------------------------------------------------------------------------------------------------------------------------ 09:MERGING-EXCHANGE 1 79.738us 79.738us 5 5 0 -1.00 B UNPARTITIONED 05:TOP-N 3 84.693us 88.810us 5 5 12.00 KB 120.00 B 04:AGGREGATE 3 5.263ms 6.432ms 5 5 44.00 KB 10.00 MB MERGE FINALIZE 08:AGGREGATE 3 16.659ms 27.444ms 52.52K 600.12K 3.20 MB 15.11 MB MERGE 07:EXCHANGE 3 2.644ms 5.1ms 52.52K 600.12K 0 0 HASH(o_orderpriority) 03:AGGREGATE 3 342.913ms 966.291ms 52.52K 600.12K 10.80 MB 15.11 MB 02:HASH JOIN 3 2s165ms 2s171ms 144.87K 600.12K 13.63 MB 941.01 KB INNER JOIN, BROADCAST |--06:EXCHANGE 3 8.296ms 8.692ms 57.22K 15.00K 0 0 BROADCAST | 01:SCAN HDFS 2 1s412ms 1s978ms 57.22K 15.00K 24.21 MB 176.00 MB tpch.orders o 00:SCAN HDFS 3 8s032ms 8s558ms 3.79M 600.12K 32.29 MB 264.00 MB tpch.lineitem l Change-Id: Iaad4b9dd577c375006313f19442bee6d3e27246a Reviewed-on: http://gerrit.ent.cloudera.com:8080/2964 Reviewed-by: Nong Li <nong@cloudera.com> Tested-by: jenkins
227 lines
6.4 KiB
Plaintext
227 lines
6.4 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Tests explaining a query (TPCDS-Q19)
|
|
explain
|
|
select
|
|
i_brand_id,
|
|
i_brand,
|
|
i_manufact_id,
|
|
i_manufact,
|
|
sum(ss_ext_sales_price) ext_price
|
|
from
|
|
tpcds.store_sales
|
|
join tpcds.item on (store_sales.ss_item_sk = item.i_item_sk)
|
|
join tpcds.customer on (store_sales.ss_customer_sk = customer.c_customer_sk)
|
|
join tpcds.customer_address on (customer.c_current_addr_sk = customer_address.ca_address_sk)
|
|
join tpcds.store on (store_sales.ss_store_sk = store.s_store_sk)
|
|
where
|
|
ss_date between '1999-11-01' and '1999-11-30'
|
|
and i_manager_id = 7
|
|
and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
|
|
group by
|
|
i_brand,
|
|
i_brand_id,
|
|
i_manufact_id,
|
|
i_manufact
|
|
order by
|
|
ext_price desc,
|
|
i_brand,
|
|
i_brand_id,
|
|
i_manufact_id,
|
|
i_manufact
|
|
limit 100
|
|
---- RESULTS
|
|
'Estimated Per-Host Requirements: Memory=72.66MB VCores=5'
|
|
''
|
|
'19:TOP-N [LIMIT=100]'
|
|
'| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC'
|
|
'|'
|
|
'18:EXCHANGE [UNPARTITIONED]'
|
|
'|'
|
|
'10:TOP-N [LIMIT=100]'
|
|
'| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC'
|
|
'|'
|
|
'17:AGGREGATE [MERGE FINALIZE]'
|
|
'| output: sum(sum(ss_ext_sales_price))'
|
|
'| group by: i_brand, i_brand_id, i_manufact_id, i_manufact'
|
|
'|'
|
|
'16:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]'
|
|
'|'
|
|
'09:AGGREGATE'
|
|
'| output: sum(ss_ext_sales_price)'
|
|
'| group by: i_brand, i_brand_id, i_manufact_id, i_manufact'
|
|
'|'
|
|
'08:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
'| hash predicates: store_sales.ss_store_sk = store.s_store_sk'
|
|
'| other predicates: substr(ca_zip, 1, 5) != substr(s_zip, 1, 5)'
|
|
'|'
|
|
'|--15:EXCHANGE [BROADCAST]'
|
|
'| |'
|
|
'| 04:SCAN HDFS [tpcds.store]'
|
|
'| partitions=1/1 size=3.08KB'
|
|
'|'
|
|
'07:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
'| hash predicates: store_sales.ss_item_sk = item.i_item_sk'
|
|
'|'
|
|
'|--14:EXCHANGE [BROADCAST]'
|
|
'| |'
|
|
'| 01:SCAN HDFS [tpcds.item]'
|
|
'| partitions=1/1 size=4.82MB'
|
|
'| predicates: i_manager_id = 7'
|
|
'|'
|
|
'06:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
'| hash predicates: customer.c_customer_sk = store_sales.ss_customer_sk'
|
|
'|'
|
|
'|--13:EXCHANGE [BROADCAST]'
|
|
'| |'
|
|
'| 00:SCAN HDFS [tpcds.store_sales]'
|
|
'| partitions=2/120 size=663.52KB'
|
|
'|'
|
|
'05:HASH JOIN [INNER JOIN, PARTITIONED]'
|
|
'| hash predicates: customer_address.ca_address_sk = customer.c_current_addr_sk'
|
|
'|'
|
|
'|--12:EXCHANGE [HASH(customer.c_current_addr_sk)]'
|
|
'| |'
|
|
'| 02:SCAN HDFS [tpcds.customer]'
|
|
'| partitions=1/1 size=12.60MB'
|
|
'|'
|
|
'11:EXCHANGE [HASH(customer_address.ca_address_sk)]'
|
|
'|'
|
|
'03:SCAN HDFS [tpcds.customer_address]'
|
|
' partitions=1/1 size=5.25MB'
|
|
====
|
|
---- QUERY
|
|
# Tests explaining an insert query
|
|
explain insert overwrite functional.alltypessmall (id, string_col)
|
|
partition (year, month)
|
|
select a.id, a.string_col, a.year, a.month from functional.alltypes a
|
|
left semi join functional.alltypesagg b on (a.id = b.id)
|
|
where a.year > 2009 and a.month = 4
|
|
union distinct
|
|
select id, string_col, year, month from functional.alltypes
|
|
---- RESULTS
|
|
'Estimated Per-Host Requirements: Memory=30.00MB VCores=3'
|
|
''
|
|
'WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(year,month)]'
|
|
'| partitions=96'
|
|
'|'
|
|
'14:EXCHANGE [HASH(year,month)]'
|
|
'|'
|
|
'13:AGGREGATE [MERGE FINALIZE]'
|
|
'| group by: id, string_col, year, month'
|
|
'|'
|
|
'12:EXCHANGE [HASH(id,string_col,year,month)]'
|
|
'|'
|
|
'|--11:AGGREGATE'
|
|
'| | group by: id, string_col, year, month'
|
|
'| |'
|
|
'| 10:MERGE'
|
|
'| |'
|
|
'| 04:SCAN HDFS [functional.alltypes]'
|
|
'| partitions=24/24 size=478.45KB'
|
|
'|'
|
|
'05:AGGREGATE'
|
|
'| group by: id, string_col, year, month'
|
|
'|'
|
|
'09:MERGE'
|
|
'|'
|
|
'03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]'
|
|
'| hash predicates: a.id = b.id'
|
|
'|'
|
|
'|--07:EXCHANGE [HASH(b.id)]'
|
|
'| |'
|
|
'| 02:SCAN HDFS [functional.alltypesagg b]'
|
|
'| partitions=10/10 size=743.67KB'
|
|
'|'
|
|
'06:EXCHANGE [HASH(a.id)]'
|
|
'|'
|
|
'01:SCAN HDFS [functional.alltypes a]'
|
|
' partitions=1/24 size=19.71KB'
|
|
====
|
|
---- QUERY
|
|
# Tests explaining an insert query to/from an HBase table
|
|
explain insert into functional_hbase.alltypes
|
|
select a.* from functional_hbase.alltypessmall a
|
|
cross join functional.alltypessmall b
|
|
where a.year > 2009 and a.month = 4
|
|
union all
|
|
select * from functional_hbase.alltypessmall
|
|
---- RESULTS
|
|
'Estimated Per-Host Requirements: Memory=1.02GB VCores=3'
|
|
''
|
|
'WRITE TO HBASE table=functional_hbase.alltypes'
|
|
'|'
|
|
'06:EXCHANGE [UNPARTITIONED]'
|
|
'|'
|
|
'|--08:MERGE'
|
|
'| |'
|
|
'| 04:SCAN HBASE [functional_hbase.alltypessmall]'
|
|
'|'
|
|
'07:MERGE'
|
|
'|'
|
|
'03:CROSS JOIN [BROADCAST]'
|
|
'|'
|
|
'|--05:EXCHANGE [BROADCAST]'
|
|
'| |'
|
|
'| 02:SCAN HDFS [functional.alltypessmall b]'
|
|
'| partitions=4/4 size=6.32KB'
|
|
'|'
|
|
'01:SCAN HBASE [functional_hbase.alltypessmall a]'
|
|
' predicates: a.year > 2009, a.month = 4'
|
|
====
|
|
---- QUERY
|
|
# Tests explaining an CTAS statement.
|
|
explain create table t as
|
|
select * from functional.alltypes
|
|
where month = 2
|
|
---- RESULTS
|
|
'Estimated Per-Host Requirements: Memory=16.02MB VCores=1'
|
|
''
|
|
'WRITE TO HDFS [functional.t, OVERWRITE=false]'
|
|
'| partitions=1'
|
|
'|'
|
|
'00:SCAN HDFS [functional.alltypes]'
|
|
' partitions=2/24 size=36.51KB'
|
|
====
|
|
---- QUERY
|
|
# Tests the warning about missing table stats in the explain header.
|
|
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
|
|
from functional_avro.alltypes t1
|
|
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
|
|
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
|
|
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
|
|
---- RESULTS
|
|
'Estimated Per-Host Requirements: Memory=4.03GB VCores=3'
|
|
'WARNING: The following tables are missing relevant table and/or column statistics.'
|
|
'functional_avro.alltypes, functional_parquet.alltypessmall'
|
|
''
|
|
'09:AGGREGATE [MERGE FINALIZE]'
|
|
'| output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))'
|
|
'|'
|
|
'08:EXCHANGE [UNPARTITIONED]'
|
|
'|'
|
|
'05:AGGREGATE'
|
|
'| output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)'
|
|
'|'
|
|
'04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]'
|
|
'| hash predicates: t2.id = t3.id'
|
|
'| other predicates: t3.bool_col = FALSE'
|
|
'|'
|
|
'|--07:EXCHANGE [BROADCAST]'
|
|
'| |'
|
|
'| 02:SCAN HDFS [functional_avro.alltypes t3]'
|
|
'| partitions=24/24 size=470.35KB'
|
|
'| predicates: t3.bool_col = FALSE'
|
|
'|'
|
|
'03:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
'| hash predicates: t1.id = t2.id'
|
|
'|'
|
|
'|--06:EXCHANGE [BROADCAST]'
|
|
'| |'
|
|
'| 01:SCAN HDFS [functional_parquet.alltypessmall t2]'
|
|
'| partitions=4/4 size=9.63KB'
|
|
'|'
|
|
'00:SCAN HDFS [functional_avro.alltypes t1]'
|
|
' partitions=2/24 size=39.87KB'
|
|
====
|