mirror of
https://github.com/apache/impala.git
synced 2026-02-01 21:00:29 -05:00
PlanNode does not consider some factors when estimating memory,
this will cause a large error rate
AggregationNode
1.MemoryEstimate = Ndv * (AvgRowSize + SizeOfBucket)
2.When estimating the Ndv of merge aggregation, Ndv should be
divided only once.
3.If there is no grouping exprs, MemoryEstimate =
MIN_PLAIN_AGG_MEM
SortNode
1.MemoryEstimate = Cardinality * AvgRowSize. Memory used when
there is enough memory
HashJoinNode
1.MemoryEstimate= DataRows + Buckets + DuplicateNodes,
DataRows = RightTableCardinality * AvgRowSize,
Buckets= roundUpToPowerOf2(RightTableCardinality) *
SizeOfBucket,
DuplicateNodes = (RightTableCardinality - RightNdv) *
SizeOfDuplicateNode
KuduScanNode
1.MemoryEstimate = Columns * BytesPerColumn * MaxScannerThreads,
Columns are scanned in query, not all the columns of the table
UnitTest
1.CardinalityTest adds test cases to test memory estimation.
Modify existing test cases related to memory estimation
Change-Id: Ic01db168ff2c6d6de33ee553a8175599f035d7a1
Reviewed-on: http://gerrit.cloudera.org:8080/16842
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
97 lines
3.2 KiB
Plaintext
97 lines
3.2 KiB
Plaintext
====
|
|
---- QUERY
|
|
# CTAS
|
|
create table test as select id from functional.alltypes where id > 1
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=16MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=100MB.*
|
|
row_regex: .*Cluster Memory Admitted: 132.00 MB.*
|
|
====
|
|
---- QUERY
|
|
# Truncate table to run the following inserts.
|
|
truncate table test
|
|
====
|
|
---- QUERY
|
|
# Small insert(i.e. values list, runs on coordinator only).
|
|
insert into test values (1), (2), (3)
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=10MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=100MB.*
|
|
row_regex: .*Cluster Memory Admitted: 10.00 MB.*
|
|
====
|
|
---- QUERY
|
|
# Large insert where it doesn't run on the coordinator.
|
|
insert into test select id from functional.alltypes where id > 3
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=16MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=100MB.*
|
|
row_regex: .*Cluster Memory Admitted: 132.00 MB.*
|
|
====
|
|
---- QUERY
|
|
# SELECT with merging exchange (i.e. order by).
|
|
select * from functional.alltypes order by int_col;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=32MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=104MB.*
|
|
row_regex: .*Cluster Memory Admitted: 169.47 MB.*
|
|
====
|
|
---- QUERY
|
|
# SELECT with non-merging exchange.
|
|
select * from functional.alltypes;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=20MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=104MB.*
|
|
row_regex: .*Cluster Memory Admitted: 145.47 MB.*
|
|
====
|
|
---- QUERY
|
|
# SELECT with a non-grouping aggregate in the coordinator fragment.
|
|
select avg(int_col) from functional.alltypes;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=20MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=104MB.*
|
|
row_regex: .*Cluster Memory Admitted: 144.08 MB.*
|
|
====
|
|
---- QUERY
|
|
# SELECT with num_nodes=1 and a complex plan in the coordinator.
|
|
set num_nodes=1;
|
|
select
|
|
l_returnflag,
|
|
l_linestatus,
|
|
sum(l_quantity) as sum_qty,
|
|
sum(l_extendedprice) as sum_base_price,
|
|
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
|
|
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
|
|
avg(l_quantity) as avg_qty,
|
|
avg(l_extendedprice) as avg_price,
|
|
avg(l_discount) as avg_disc,
|
|
count(*) as count_order
|
|
from
|
|
tpch.lineitem
|
|
where
|
|
l_shipdate <= '1998-09-02'
|
|
group by
|
|
l_returnflag,
|
|
l_linestatus
|
|
order by
|
|
l_returnflag,
|
|
l_linestatus
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=98MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=198MB.*
|
|
row_regex: .*Cluster Memory Admitted: 198.00 MB.*
|
|
====
|
|
---- QUERY
|
|
# SELECT with multiple unpartitioned analytic functions to force the sort and analytics
|
|
# into the coordinator fragment.
|
|
select id,
|
|
min(int_col) over (order by year),
|
|
min(int_col) over (order by bigint_col),
|
|
avg(smallint_col) over (order by int_col),
|
|
max(int_col) over (order by smallint_col rows between unbounded preceding and 1 following)
|
|
from functional.alltypes;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*Per-Host Resource Estimates: Memory=46MB.*
|
|
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=124MB.*
|
|
row_regex: .*Cluster Memory Admitted: 216.00 MB.*
|
|
====
|