mirror of
https://github.com/apache/impala.git
synced 2026-01-22 09:01:58 -05:00
This only factors in fragment execution threads. E.g. this does *not*
try to account for the number of threads on the old Thrift RPC
code path if that is enabled.
This is loosely related to the old VCores estimate, but is different in
that it:
* Directly ties into the notion of required threads in
ThreadResourceMgr.
* Is a strict upper bound on the number of such threads, rather than
an estimate.
Does not include "optional" threads. ThreadResourceMgr in the backend
bounds the number of "optional" threads per impalad, so the number of
execution threads on a backend is limited by
sum(required threads per query) +
CpuInfo::num_cores() * FLAGS_num_threads_per_core
DCHECKS in the backend enforce that the calculation is correct. They
were actually hit in KuduScanNode because of some races in thread
management leading to multiple "required" threads running. Now the
first thread in the multithreaded scans never exits, which means
that it's always safe for any of the other threads to exit early,
which simplifies the logic a lot.
Testing:
Updated planner tests.
Ran core tests.
Change-Id: I982837ef883457fa4d2adc3bdbdc727353469140
Reviewed-on: http://gerrit.cloudera.org:8080/10256
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
29 lines
1.0 KiB
Plaintext
29 lines
1.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Explain a simple hash join query.
|
|
explain
|
|
select *
|
|
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
|
|
''
|
|
'PLAN-ROOT SINK'
|
|
'04:EXCHANGE [UNPARTITIONED]'
|
|
'02:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
'|--03:EXCHANGE [BROADCAST]'
|
|
'| 01:SCAN HDFS [tpch.orders]'
|
|
'00:SCAN HDFS [tpch.lineitem]'
|
|
====
|
|
---- QUERY
|
|
# Tests the warning about missing table stats in the explain header.
|
|
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
|
|
from functional_avro.alltypes t1
|
|
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
|
|
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
|
|
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'WARNING: The following tables are missing relevant table and/or column statistics.'
|
|
'functional_avro.alltypes, functional_parquet.alltypessmall'
|
|
====
|