Files
impala/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
Lars Volker fc529b7f9f IMPALA-5293: Turn insert clustering on by default
This change enables clustering by default. IMPALA-2521 introduced the
'clustered' hint which inserts a local sort by the partitioning columns
to a query plan. The hint is only effective for HDFS and Kudu tables.

Like before, the 'noclustered' hint prevents clustering. If a table has
ordering columns defined, the 'noclustered' hint is ignored and we
issue a warning.

This change removes some tests that were added specifically to test
that clustering can be enabled using the 'clustered' hint. It changes
some tests to use the 'noclustered' hint to make sure that clustering
can be disabled. It also adds tests to make sure that we cover the
'noclustered' case properly.

Cherry-picks: not for 2.x.

Change-Id: Idbf2368cf4415e6ecfa65058daf6ff87ef62f9d9
Reviewed-on: http://gerrit.cloudera.org:8080/9153
Reviewed-by: Lars Volker <lv@cloudera.com>
Tested-by: Impala Public Jenkins
2018-02-03 05:58:50 +00:00

195 lines
7.3 KiB
Plaintext

====
---- QUERY
# This test relies on a deterministic row order so we use "sort by (id)".
create table alltypes sort by (id) like functional_parquet.alltypes;
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2009;
====
---- QUERY
# No stats are available.
explain select id from alltypes;
---- RESULTS: VERIFY_IS_SUBSET
' stored statistics:'
' table: rows=unavailable size=unavailable'
' partitions: 0/12 rows=unavailable'
' columns: unavailable'
' extrapolated-rows=unavailable'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=unavailable'
---- TYPES
STRING
====
---- QUERY
compute stats alltypes
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
# Only the table-level row count is stored. The partition row counts are extrapolated.
show table stats alltypes
---- LABELS
YEAR, MONTH, #ROWS, EXTRAP #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'2009','1',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=1'
'2009','2',-1,288,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=2'
'2009','3',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=3'
'2009','4',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=4'
'2009','5',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=5'
'2009','6',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=6'
'2009','7',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=7'
'2009','8',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=8'
'2009','9',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=9'
'2009','10',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=10'
'2009','11',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=11'
'2009','12',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=12'
'Total','',3650,3650,12,regex:.*B,'0B','','','',''
---- TYPES
STRING,STRING,BIGINT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING
====
---- QUERY
# Stats are available now.
explain select id from alltypes;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/12 files=12 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=3650'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3650'
---- TYPES
STRING
====
---- QUERY
# Select a subset of partitions.
explain select id from alltypes where month in (1, 2, 3);
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=3/12 files=3 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/3 rows=unavailable'
' columns: all'
' extrapolated-rows=904'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=904'
---- TYPES
STRING
====
---- QUERY
# Double the data in existing partitions.
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2009;
explain select id from alltypes;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/12 files=24 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=7300'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=7300'
---- TYPES
STRING
====
---- QUERY
# Create new partitions and extrapolate their row count.
insert into alltypes partition(year, month)
select * from functional_parquet.alltypes where year = 2010;
explain select id from alltypes where year = 2010;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/24 files=12 size=.*
' stored statistics:'
row_regex:.*table: rows=3650 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=3651'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3651'
---- TYPES
STRING
====
---- QUERY
# Compute stats and run the same query again.
compute stats alltypes;
explain select id from alltypes where year = 2010;
---- RESULTS: VERIFY_IS_EQUAL
'Max Per-Host Resource Reservation: Memory=0B'
'Per-Host Resource Estimates: Memory=16.00MB'
'Codegen disabled by planner'
''
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
'PLAN-ROOT SINK'
'| mem-estimate=0B mem-reservation=0B'
'|'
'00:SCAN HDFS [$DATABASE.alltypes]'
row_regex:.*partitions=12/24 files=12 size=.*
' stored statistics:'
row_regex:.*table: rows=10950 size=.*
' partitions: 0/12 rows=unavailable'
' columns: all'
' extrapolated-rows=3651'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=3651'
---- TYPES
STRING
====
---- QUERY
# Test that dropping stats resets everything.
drop stats alltypes;
explain select id from alltypes;
---- RESULTS: VERIFY_IS_SUBSET
' stored statistics:'
' table: rows=unavailable size=unavailable'
' partitions: 0/24 rows=unavailable'
' columns: unavailable'
' extrapolated-rows=unavailable'
' mem-estimate=16.00MB mem-reservation=0B'
' tuple-ids=0 row-size=4B cardinality=unavailable'
---- TYPES
STRING
====