mirror of
https://github.com/apache/impala.git
synced 2026-01-25 09:01:08 -05:00
This only factors in fragment execution threads. E.g. this does *not*
try to account for the number of threads on the old Thrift RPC
code path if that is enabled.
This is loosely related to the old VCores estimate, but is different in
that it:
* Directly ties into the notion of required threads in
ThreadResourceMgr.
* Is a strict upper bound on the number of such threads, rather than
an estimate.
Does not include "optional" threads. ThreadResourceMgr in the backend
bounds the number of "optional" threads per impalad, so the number of
execution threads on a backend is limited by
sum(required threads per query) +
CpuInfo::num_cores() * FLAGS_num_threads_per_core
DCHECKS in the backend enforce that the calculation is correct. They
were actually hit in KuduScanNode because of some races in thread
management leading to multiple "required" threads running. Now the
first thread in the multithreaded scans never exits, which means
that it's always safe for any of the other threads to exit early,
which simplifies the logic a lot.
Testing:
Updated planner tests.
Ran core tests.
Change-Id: I982837ef883457fa4d2adc3bdbdc727353469140
Reviewed-on: http://gerrit.cloudera.org:8080/10256
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
66 lines
2.8 KiB
Plaintext
66 lines
2.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Explain a simple hash join query.
|
|
explain
|
|
select *
|
|
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
|
|
''
|
|
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
'| Per-Host Resources: mem-estimate=0B mem-reservation=0B thread-reservation=1'
|
|
'PLAN-ROOT SINK'
|
|
'| mem-estimate=0B mem-reservation=0B thread-reservation=0'
|
|
'|'
|
|
'04:EXCHANGE [UNPARTITIONED]'
|
|
'| mem-estimate=0B mem-reservation=0B thread-reservation=0'
|
|
'| tuple-ids=0,1 row-size=454B cardinality=5757710'
|
|
'|'
|
|
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*MB mem-reservation=[0-9.]*MB thread-reservation=.*
|
|
'02:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
'| hash predicates: l_orderkey = o_orderkey'
|
|
'| fk/pk conjuncts: l_orderkey = o_orderkey'
|
|
'| runtime filters: RF000[bloom] <- o_orderkey'
|
|
row_regex:.*| mem-estimate=[0-9.]*MB mem-reservation=[0-9.]*MB spill-buffer=[0-9.]*MB thread-reservation=0.*
|
|
'| tuple-ids=0,1 row-size=454B cardinality=5757710'
|
|
'|'
|
|
'|--03:EXCHANGE [BROADCAST]'
|
|
'| | mem-estimate=0B mem-reservation=0B thread-reservation=0'
|
|
'| | tuple-ids=1 row-size=191B cardinality=1500000'
|
|
'| |'
|
|
'| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
|
|
row_regex:.*| Per-Host Resources: mem-estimate=[0-9.]*MB mem-reservation=[0-9.]*MB thread-reservation=.*
|
|
'| 01:SCAN HDFS [tpch.orders, RANDOM]'
|
|
row_regex:.*partitions=1/1 files=1 size=.*
|
|
'| stored statistics:'
|
|
row_regex:.*table: rows=1500000 size=.*
|
|
'| columns: all'
|
|
row_regex:.*| extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
|
|
row_regex:.*| mem-estimate=[0-9.]*MB mem-reservation=[0-9.]*MB thread-reservation=1.*
|
|
'| tuple-ids=1 row-size=191B cardinality=1500000'
|
|
'|'
|
|
'00:SCAN HDFS [tpch.lineitem, RANDOM]'
|
|
row_regex:.*partitions=1/1 files=1 size=.*
|
|
' runtime filters: RF000[bloom] -> l_orderkey'
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=6001215 size=.*
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
|
|
row_regex:.* mem-estimate=[0-9.]*MB mem-reservation=[0-9.]*MB thread-reservation=1.*
|
|
' tuple-ids=0 row-size=263B cardinality=6001215'
|
|
====
|
|
---- QUERY
|
|
# Tests the warning about missing table stats in the explain header.
|
|
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
|
|
from functional_avro.alltypes t1
|
|
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
|
|
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
|
|
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Per-Host Resource Estimates: Memory=4.07GB'
|
|
'WARNING: The following tables are missing relevant table and/or column statistics.'
|
|
'functional_avro.alltypes, functional_parquet.alltypessmall'
|
|
====
|