Files
impala/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
Alex Behm 1a1927b07d IMPALA-6228: Control stats extrapolation via tbl prop.
Introduces a new TBLPROPERTY for controlling stats
extrapolation on a per-table basis:

impala.enable.stats.extrapolation=true/false

The property key was chosen to be consistent with
the impalad startup flag --enable_stats_extrapolation
and to indicate that the property was set and is used
by Impala.

Behavior:
- If the property is not set, then the extrapolation
  behavior is determined by the impalad startup flag.
- If the property is set, it overrides the impalad
  startup flag, i.e., extrapolation can be explicitly
  enabled or disabled regardless of the startup flag.

Testing:
- added new unit tests
- code/hdfs run passed

Change-Id: Ie49597bf1b93b7572106abc620d91f199cba0cfd
Reviewed-on: http://gerrit.cloudera.org:8080/9139
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins
2018-02-03 22:56:13 +00:00

196 lines
7.3 KiB
Plaintext

====
---- QUERY
# This test relies on a deterministic row order so we use "sort by (id)".
create table alltypes sort by (id) like functional_parquet.alltypes;
alter table alltypes set tblproperties("impala.enable.stats.extrapolation"="true");
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2009;
====
---- QUERY
# No stats are available.
explain select id from alltypes;
---- RESULTS: VERIFY_IS_SUBSET
' stored statistics:'
' table: rows=unavailable size=unavailable'
' partitions: 0/12 rows=unavailable'
' columns: unavailable'
' extrapolated-rows=unavailable'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=unavailable'
---- TYPES
STRING
====
---- QUERY
compute stats alltypes
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
# Only the table-level row count is stored. The partition row counts are extrapolated.
show table stats alltypes
---- LABELS
YEAR, MONTH, #ROWS, EXTRAP #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'2009','1',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=1'
'2009','2',-1,288,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=2'
'2009','3',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=3'
'2009','4',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=4'
'2009','5',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=5'
'2009','6',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=6'
'2009','7',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=7'
'2009','8',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=8'
'2009','9',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=9'
'2009','10',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=10'
'2009','11',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=11'
'2009','12',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=12'
'Total','',3650,3650,12,regex:.*B,'0B','','','',''
---- TYPES
STRING,STRING,BIGINT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING
====
---- QUERY
# Stats are available now.
explain select id from alltypes;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/12 files=12 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=3650'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3650'
---- TYPES
STRING
====
---- QUERY
# Select a subset of partitions.
explain select id from alltypes where month in (1, 2, 3);
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=3/12 files=3 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/3 rows=unavailable'
' columns: all'
' extrapolated-rows=904'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=904'
---- TYPES
STRING
====
---- QUERY
# Double the data in existing partitions.
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2009;
explain select id from alltypes;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/12 files=24 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=7300'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=7300'
---- TYPES
STRING
====
---- QUERY
# Create new partitions and extrapolate their row count.
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2010;
explain select id from alltypes where year = 2010;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/24 files=12 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=3651'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3651'
---- TYPES
STRING
====
---- QUERY
# Compute stats and run the same query again.
compute stats alltypes;
explain select id from alltypes where year = 2010;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/24 files=12 size=.*
' stored statistics:'
row_regex:.*table: rows=10950 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=3651'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3651'
---- TYPES
STRING
====
---- QUERY
# Test that dropping stats resets everything.
drop stats alltypes;
explain select id from alltypes;
---- RESULTS: VERIFY_IS_SUBSET
' stored statistics:'
' table: rows=unavailable size=unavailable'
' partitions: 0/24 rows=unavailable'
' columns: unavailable'
' extrapolated-rows=unavailable'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=unavailable'
---- TYPES
STRING
====