mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
Adds erasure coding policy to introspection commands SHOW FILES, SHOW PARTITIONS, SHOW TABLE STATS, and DESCRIBE EXTENDED. Remove `throws IOException` for methods that don't throw. Removes null check for getSd because getStorageDescriptorInfo - which is called right after getTableMetaDataInformation - uses it without checking for null. Adds '$ERASURECODE_POLICY' for runtime test substitution. The test suite replaces this with the current erasure code policy - from HDFS_ERASURECODE_POLICY - or NONE to match expected output. Testing: - ran backend, end-to-end, and custom cluster tests with erasure coding - ran backend, end-to-end, and custom cluster tests with exhaustive strategy Change-Id: Idd95f2d18b3980581788c92993b6d2f53504b5e0 Reviewed-on: http://gerrit.cloudera.org:8080/19268 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Michael Smith <michael.smith@cloudera.com>
245 lines
9.7 KiB
Plaintext
245 lines
9.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
# This test relies on a deterministic row order so we use "sort by (id)".
|
|
create table alltypes sort by (id) like functional_parquet.alltypes;
|
|
alter table alltypes set tblproperties("impala.enable.stats.extrapolation"="true");
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2009;
|
|
====
|
|
---- QUERY
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/12 rows=5.97K'
|
|
' columns: unavailable'
|
|
row_regex:.* extrapolated-rows=unavailable.*
|
|
' tuple-ids=0 row-size=4B cardinality=5.97K'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Disable the estimation of cardinality for an hdfs table withot stats.
|
|
SET DISABLE_HDFS_NUM_ROWS_ESTIMATE=1;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: unavailable'
|
|
row_regex:.* extrapolated-rows=unavailable.*
|
|
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
compute stats alltypes
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Only the table-level row count is stored. The partition row counts are extrapolated.
|
|
show table stats alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, EXTRAP #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'2009','1',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=1','$ERASURECODE_POLICY'
|
|
'2009','10',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=10','$ERASURECODE_POLICY'
|
|
'2009','11',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=11','$ERASURECODE_POLICY'
|
|
'2009','12',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=12','$ERASURECODE_POLICY'
|
|
'2009','2',-1,290,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=2','$ERASURECODE_POLICY'
|
|
'2009','3',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=3','$ERASURECODE_POLICY'
|
|
'2009','4',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=4','$ERASURECODE_POLICY'
|
|
'2009','5',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=5','$ERASURECODE_POLICY'
|
|
'2009','6',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=6','$ERASURECODE_POLICY'
|
|
'2009','7',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=7','$ERASURECODE_POLICY'
|
|
'2009','8',-1,307,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=8','$ERASURECODE_POLICY'
|
|
'2009','9',-1,302,1,regex:.*B,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/alltypes/year=2009/month=9','$ERASURECODE_POLICY'
|
|
'Total','',3650,3650,12,regex:.*B,'0B','','','','',''
|
|
---- TYPES
|
|
STRING,STRING,BIGINT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Stats are available now.
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=.*
|
|
'Codegen disabled by planner'
|
|
row_regex:.*Analyzed query: SELECT id FROM test_stats_extrapolation_.*.alltypes.*
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
'| output exprs: id'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN $FILESYSTEM_NAME [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/12 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3.65K size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=3.65K .*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=3.65K'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Select a subset of partitions.
|
|
explain select id from alltypes where month in (1, 2, 3);
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=.*
|
|
'Codegen disabled by planner'
|
|
row_regex:.*Analyzed query: SELECT id FROM test_stats_extrapolation_.*.alltypes WHERE.*
|
|
'`month` IN (CAST(1 AS INT), CAST(2 AS INT), CAST(3 AS INT))'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
'| output exprs: id'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN $FILESYSTEM_NAME [$DATABASE.alltypes]'
|
|
' partition predicates: `month` IN (CAST(1 AS INT), CAST(2 AS INT), CAST(3 AS INT))'
|
|
row_regex:.*partitions=3/12 files=3 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3.65K size=.*
|
|
' partitions: 0/3 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=904.*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=904'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Double the data in existing partitions.
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2009;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=.*
|
|
row_regex:.*Analyzed query: SELECT id FROM test_stats_extrapolation_.*.alltypes.*
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
'| output exprs: id'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN $FILESYSTEM_NAME [$DATABASE.alltypes]'
|
|
row_regex:.*partitions=12/12 files=24 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3.65K size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=7.30K .*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=7.30K'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Create new partitions and extrapolate their row count.
|
|
insert into alltypes partition(year, month)
|
|
select * from functional_parquet.alltypes where year = 2010;
|
|
explain select id from alltypes where year = 2010;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=20MB.*
|
|
'Codegen disabled by planner'
|
|
row_regex:.*Analyzed query: SELECT id FROM test_stats_extrapolation_.*.alltypes WHERE.*
|
|
'`year` = CAST(2010 AS INT)'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
'| output exprs: id'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN $FILESYSTEM_NAME [$DATABASE.alltypes]'
|
|
' partition predicates: `year` = CAST(2010 AS INT)'
|
|
row_regex:.*partitions=12/24 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=3.65K size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=3.65K .*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=3.65K'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Compute stats and run the same query again.
|
|
compute stats alltypes;
|
|
explain select id from alltypes where year = 2010;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=20MB.*
|
|
'Codegen disabled by planner'
|
|
row_regex:.*Analyzed query: SELECT id FROM test_stats_extrapolation_.*.alltypes WHERE.*
|
|
'`year` = CAST(2010 AS INT)'
|
|
''
|
|
'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=.* mem-reservation=.*
|
|
'PLAN-ROOT SINK'
|
|
'| output exprs: id'
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
'|'
|
|
'00:SCAN $FILESYSTEM_NAME [$DATABASE.alltypes]'
|
|
' partition predicates: `year` = CAST(2010 AS INT)'
|
|
row_regex:.*partitions=12/24 files=12 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=10.95K size=.*
|
|
' partitions: 0/12 rows=unavailable'
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=3.65K .*
|
|
row_regex:.*mem-estimate=.* mem-reservation=.*
|
|
' tuple-ids=0 row-size=4B cardinality=3.65K'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test that dropping stats resets everything.
|
|
drop stats alltypes;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
row_regex:.* partitions: 0/24 rows=17\.9.*K
|
|
' columns: unavailable'
|
|
row_regex:.* extrapolated-rows=unavailable.*
|
|
row_regex:.* tuple-ids=0 row-size=4B cardinality=17\.9.*K
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test that dropping stats resets everything.
|
|
SET DISABLE_HDFS_NUM_ROWS_ESTIMATE=1;
|
|
drop stats alltypes;
|
|
explain select id from alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' stored statistics:'
|
|
' table: rows=unavailable size=unavailable'
|
|
' partitions: 0/24 rows=unavailable'
|
|
' columns: unavailable'
|
|
row_regex:.* extrapolated-rows=unavailable.*
|
|
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
|
' in pipelines: 00(GETNEXT)'
|
|
---- TYPES
|
|
STRING
|
|
====
|