mirror of
https://github.com/apache/impala.git
synced 2026-01-07 09:02:19 -05:00
This change enables clustering by default. IMPALA-2521 introduced the 'clustered' hint which inserts a local sort by the partitioning columns to a query plan. The hint is only effective for HDFS and Kudu tables. Like before, the 'noclustered' hint prevents clustering. If a table has ordering columns defined, the 'noclustered' hint is ignored and we issue a warning. This change removes some tests that were added specifically to test that clustering can be enabled using the 'clustered' hint. It changes some tests to use the 'noclustered' hint to make sure that clustering can be disabled. It also adds tests to make sure that we cover the 'noclustered' case properly. Cherry-picks: not for 2.x. Change-Id: Idbf2368cf4415e6ecfa65058daf6ff87ef62f9d9 Reviewed-on: http://gerrit.cloudera.org:8080/9153 Reviewed-by: Lars Volker <lv@cloudera.com> Tested-by: Impala Public Jenkins
195 lines
7.3 KiB
Plaintext
195 lines
7.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
# This test relies on a deterministic row order so we use "sort by (id)".
|
|
create table alltypes sort by (id) like functional_parquet.alltypes;
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2009;
|
|
====
|
|
---- QUERY
|
|
# No stats are available.
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: unavailable'
|
|
' extrapolated-rows=unavailable'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
compute stats alltypes
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Only the table-level row count is stored. The partition row counts are extrapolated.
|
|
show table stats alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, EXTRAP #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=1'
|
|
'2009','2',-1,288,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=2'
|
|
'2009','3',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=3'
|
|
'2009','4',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=4'
|
|
'2009','5',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=5'
|
|
'2009','6',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=6'
|
|
'2009','7',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=7'
|
|
'2009','8',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=8'
|
|
'2009','9',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=9'
|
|
'2009','10',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=10'
|
|
'2009','11',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=11'
|
|
'2009','12',-1,308,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=12'
|
|
'Total','',3650,3650,12,regex:.*B,'0B','','','',''
|
|
---- TYPES
|
|
STRING,STRING,BIGINT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Stats are available now.
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
'Max Per-Host Resource Reservation: Memory=0B'
|
|
'Per-Host Resource Estimates: Memory=16.00MB'
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
|
|
'PLAN-ROOT SINK'
|
|
'| mem-estimate=0B mem-reservation=0B'
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/12 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
' extrapolated-rows=3650'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=3650'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Select a subset of partitions.
|
|
explain select id from alltypes where month in (1, 2, 3);
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
'Max Per-Host Resource Reservation: Memory=0B'
|
|
'Per-Host Resource Estimates: Memory=16.00MB'
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
|
|
'PLAN-ROOT SINK'
|
|
'| mem-estimate=0B mem-reservation=0B'
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=3/12 files=3 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/3 rows=unavailable'
|
|
' columns: all'
|
|
' extrapolated-rows=904'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=904'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Double the data in existing partitions.
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2009;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
'Max Per-Host Resource Reservation: Memory=0B'
|
|
'Per-Host Resource Estimates: Memory=16.00MB'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
|
|
'PLAN-ROOT SINK'
|
|
'| mem-estimate=0B mem-reservation=0B'
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/12 files=24 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
' extrapolated-rows=7300'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=7300'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Create new partitions and extrapolate their row count.
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2010;
|
|
explain select id from alltypes where year = 2010;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
'Max Per-Host Resource Reservation: Memory=0B'
|
|
'Per-Host Resource Estimates: Memory=16.00MB'
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
|
|
'PLAN-ROOT SINK'
|
|
'| mem-estimate=0B mem-reservation=0B'
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/24 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3650 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
' extrapolated-rows=3651'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=3651'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Compute stats and run the same query again.
|
|
compute stats alltypes;
|
|
explain select id from alltypes where year = 2010;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
'Max Per-Host Resource Reservation: Memory=0B'
|
|
'Per-Host Resource Estimates: Memory=16.00MB'
|
|
'Codegen disabled by planner'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
'| Per-Host Resources: mem-estimate=16.00MB mem-reservation=0B'
|
|
'PLAN-ROOT SINK'
|
|
'| mem-estimate=0B mem-reservation=0B'
|
|
'|'
|
|
'00:SCAN HDFS [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/24 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=10950 size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
' extrapolated-rows=3651'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=3651'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test that dropping stats resets everything.
|
|
drop stats alltypes;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/24 rows=unavailable'
|
|
' columns: unavailable'
|
|
' extrapolated-rows=unavailable'
|
|
' mem-estimate=16.00MB mem-reservation=0B'
|
|
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
|
---- TYPES
|
|
STRING
|
|
====
|