Files
impala/testdata/workloads/functional-query/queries/QueryTest/dedicated-coord-mem-estimates.test
liuyao 1a01bfe831 IMPALA-10377: Improve the accuracy of resource estimation
PlanNode does not consider some factors when estimating memory,
this will cause a large error rate

AggregationNode
1.MemoryEstimate = Ndv * (AvgRowSize + SizeOfBucket)
2.When estimating the Ndv of merge aggregation, Ndv should be
  divided only once.
3.If there is no grouping exprs, MemoryEstimate =
  MIN_PLAIN_AGG_MEM

SortNode
1.MemoryEstimate = Cardinality * AvgRowSize. Memory used when
  there is enough memory

HashJoinNode
1.MemoryEstimate= DataRows + Buckets + DuplicateNodes,
  DataRows = RightTableCardinality * AvgRowSize,
  Buckets= roundUpToPowerOf2(RightTableCardinality) *
           SizeOfBucket,
  DuplicateNodes = (RightTableCardinality - RightNdv) *
                    SizeOfDuplicateNode

KuduScanNode
1.MemoryEstimate = Columns * BytesPerColumn * MaxScannerThreads,
  Columns are scanned in query, not all the columns of the table

UnitTest
1.CardinalityTest adds test cases to test memory estimation.
  Modify existing test cases related to memory estimation

Change-Id: Ic01db168ff2c6d6de33ee553a8175599f035d7a1
Reviewed-on: http://gerrit.cloudera.org:8080/16842
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2021-03-12 14:23:04 +00:00

97 lines
3.2 KiB
Plaintext

====
---- QUERY
# CTAS
create table test as select id from functional.alltypes where id > 1
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=16MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=100MB.*
row_regex: .*Cluster Memory Admitted: 132.00 MB.*
====
---- QUERY
# Truncate table to run the following inserts.
truncate table test
====
---- QUERY
# Small insert(i.e. values list, runs on coordinator only).
insert into test values (1), (2), (3)
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=10MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=100MB.*
row_regex: .*Cluster Memory Admitted: 10.00 MB.*
====
---- QUERY
# Large insert where it doesn't run on the coordinator.
insert into test select id from functional.alltypes where id > 3
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=16MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=100MB.*
row_regex: .*Cluster Memory Admitted: 132.00 MB.*
====
---- QUERY
# SELECT with merging exchange (i.e. order by).
select * from functional.alltypes order by int_col;
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=32MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=104MB.*
row_regex: .*Cluster Memory Admitted: 169.47 MB.*
====
---- QUERY
# SELECT with non-merging exchange.
select * from functional.alltypes;
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=20MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=104MB.*
row_regex: .*Cluster Memory Admitted: 145.47 MB.*
====
---- QUERY
# SELECT with a non-grouping aggregate in the coordinator fragment.
select avg(int_col) from functional.alltypes;
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=20MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=104MB.*
row_regex: .*Cluster Memory Admitted: 144.08 MB.*
====
---- QUERY
# SELECT with num_nodes=1 and a complex plan in the coordinator.
set num_nodes=1;
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
tpch.lineitem
where
l_shipdate <= '1998-09-02'
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=98MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=198MB.*
row_regex: .*Cluster Memory Admitted: 198.00 MB.*
====
---- QUERY
# SELECT with multiple unpartitioned analytic functions to force the sort and analytics
# into the coordinator fragment.
select id,
min(int_col) over (order by year),
min(int_col) over (order by bigint_col),
avg(smallint_col) over (order by int_col),
max(int_col) over (order by smallint_col rows between unbounded preceding and 1 following)
from functional.alltypes;
---- RUNTIME_PROFILE
row_regex: .*Per-Host Resource Estimates: Memory=46MB.*
row_regex: .*Dedicated Coordinator Resource Estimate: Memory=124MB.*
row_regex: .*Cluster Memory Admitted: 216.00 MB.*
====