Files
impala/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
Tim Armstrong 64fd0115e5 IMPALA-4862: make resource profile consistent with backend behaviour
This moves away from the PipelinedPlanNodeSet approach of enumerating
sets of concurrently-executing nodes because unions would force
creating many overlapping sets of nodes. The new approach computes
the peak resources during Open() and the peak resources between Open()
and Close() (i.e. while calling GetNext()) bottom-up for each plan node
in a fragment. The fragment resources are then combined to produce the
query resources.

The basic assumptions for the new resource estimates are:
* resources are acquired during or after the first call to Open()
  and released in Close().
* Blocking nodes call Open() on their child before acquiring
  their own resources (this required some backend changes).
* Blocking nodes call Close() on their children before returning
  from Open().
* The peak resource consumption of the query is the sum of the
  independent fragments (except for the parallel join build plans
  where we can assume there will be synchronisation). This is
  conservative but we don't synchronise fragment Open() and Close()
  across exchanges so can't make stronger assumptions in general.

Also compute the sum of minimum reservations. This will be useful
in the backend to determine exactly when all of the initial
reservations have been claimed from a shared pool of initial reservations.

Testing:
* Updated planner tests to reflect behavioural changes.
* Added extra resource requirement planner tests for unions, subplans,
  pipelines of blocking operators, and bushy join plans.
* Added single-node plans to resource-requirements tests. These have
  more complex plan trees inside a single fragment, which is useful
  for testing the peak resource requirement logic.

Change-Id: I492cf5052bb27e4e335395e2a8f8a3b07248ec9d
Reviewed-on: http://gerrit.cloudera.org:8080/7223
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
2017-07-12 01:17:24 +00:00

158 lines
4.8 KiB
Plaintext

====
---- QUERY
create table alltypes like functional_parquet.alltypes;
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2009;
====
---- QUERY
# No stats are available.
explain select id from alltypes;
---- RESULTS: VERIFY_IS_SUBSET
' stats-rows=unavailable extrapolated-rows=unavailable'
' table stats: rows=unavailable size=unavailable'
' column stats: unavailable'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=unavailable'
---- TYPES
STRING
====
---- QUERY
compute stats alltypes
---- RESULTS
'Updated 12 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
# Stats are available now.
explain select id from alltypes;
---- RESULTS: VERIFY_IS_EQUAL
'Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/12 files=12 size=.*
' stats-rows=3650 extrapolated-rows=3650'
row_regex:.*table stats: rows=3650 size=.*
' column stats: all'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3650'
---- TYPES
STRING
====
---- QUERY
# Select a subset of partitions.
explain select id from alltypes where month in (1, 2, 3);
---- RESULTS: VERIFY_IS_EQUAL
'Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=3/12 files=3 size=.*
' stats-rows=900 extrapolated-rows=904'
row_regex:.*table stats: rows=3650 size=.*
' column stats: all'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=904'
---- TYPES
STRING
====
---- QUERY
# Double the data in existing partitions.
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2009;
explain select id from alltypes;
---- RESULTS: VERIFY_IS_EQUAL
'Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/12 files=24 size=.*
' stats-rows=3650 extrapolated-rows=7300'
row_regex:.*table stats: rows=3650 size=.*
' column stats: all'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=7300'
---- TYPES
STRING
====
---- QUERY
# Create new partitions and extrapolate their row count.
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2010;
explain select id from alltypes where year = 2010;
---- RESULTS: VERIFY_IS_EQUAL
'Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/24 files=12 size=.*
' stats-rows=unavailable extrapolated-rows=3651'
row_regex:.*table stats: rows=3650 size=.*
' column stats: all'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3651'
---- TYPES
STRING
====
---- QUERY
# Compute stats and run the same query again.
compute stats alltypes;
explain select id from alltypes where year = 2010;
---- RESULTS: VERIFY_IS_EQUAL
'Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/24 files=12 size=.*
' stats-rows=3650 extrapolated-rows=3651'
row_regex:.*table stats: rows=10950 size=.*
' column stats: all'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3651'
---- TYPES
STRING
====
---- QUERY
# Test that dropping stats resets everything.
drop stats alltypes;
explain select id from alltypes;
---- RESULTS: VERIFY_IS_SUBSET
' stats-rows=unavailable extrapolated-rows=unavailable'
' table stats: rows=unavailable size=unavailable'
' column stats: unavailable'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=unavailable'
---- TYPES
STRING
====