mirror of
https://github.com/apache/impala.git
synced 2026-01-08 12:02:54 -05:00
IMPALA-1983: Warn if table stats are potentially corrupt.
When the `numRows` parameter stored in the table properties is
errornously set to 0 and a number of non-empty files are present
the table statistics are considered to be corrupt.
To hint that there might be a problem, the explain statement will emit
an additional warning if it detects potentially corrupt table stats like
in the following example:
Estimated Per-Host Requirements: Memory=42.00MB VCores=1
WARNING: The following tables have potentially corrupt table and/or
column statistics.
compute_stats_db.corrupted
03:AGGREGATE [FINALIZE]
| output: count:merge(*)
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: count(*)
|
00:SCAN HDFS [compute_stats_db.corrupted]
partitions=1/2 files=1 size=24B
In addition, the small query optimization is disabled for such queries.
Change-Id: I0fa911f5132aa62195b854248663a94dcd8b14de
Reviewed-on: http://gerrit.cloudera.org:8080/689
Reviewed-by: Martin Grund <mgrund@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
committed by
Internal Jenkins
parent
9dbdf81fbd
commit
60c5140ea7
184
testdata/workloads/functional-query/queries/QueryTest/corrupt_stats.test
vendored
Normal file
184
testdata/workloads/functional-query/queries/QueryTest/corrupt_stats.test
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
====
|
||||
---- QUERY
|
||||
use compute_stats_db;
|
||||
====
|
||||
---- QUERY
|
||||
create table corrupted (id int, name string) partitioned by (org int);
|
||||
====
|
||||
---- QUERY
|
||||
insert into corrupted partition (org=1) values (1, "Martin"), (2, "Hans"), (3, "Peter");
|
||||
====
|
||||
---- QUERY
|
||||
insert into corrupted partition (org=2) values (4, "Martin"), (5, "Hans"), (6, "Peter");
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted;
|
||||
---- LABELS
|
||||
ORG, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
'1',-1,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=1'
|
||||
'2',-1,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=2'
|
||||
'Total',-1,2,'48B','0B','','','',''
|
||||
---- TYPES
|
||||
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
compute stats corrupted;
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted;
|
||||
---- LABELS
|
||||
ORG, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
'1',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=1'
|
||||
'2',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=2'
|
||||
'Total',6,2,'48B','0B','','','',''
|
||||
---- TYPES
|
||||
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
alter table corrupted partition(org=1) set tblproperties('numRows'='0');
|
||||
====
|
||||
---- QUERY
|
||||
invalidate metadata corrupted;
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted;
|
||||
---- LABELS
|
||||
ORG, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
'1',0,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=1'
|
||||
'2',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=2'
|
||||
'Total',6,2,'48B','0B','','','',''
|
||||
---- TYPES
|
||||
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
explain select count(*) from corrupted where org = 1;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'WARNING: The following tables have potentially corrupt table'
|
||||
'statistics. Drop and re-compute statistics to resolve this problem.'
|
||||
'compute_stats_db.corrupted'
|
||||
''
|
||||
'03:AGGREGATE [FINALIZE]'
|
||||
'| output: count:merge(*)'
|
||||
'|'
|
||||
'02:EXCHANGE [UNPARTITIONED]'
|
||||
'|'
|
||||
'01:AGGREGATE'
|
||||
'| output: count(*)'
|
||||
'|'
|
||||
'00:SCAN HDFS [compute_stats_db.corrupted]'
|
||||
' partitions=1/2 files=1 size=24B'
|
||||
---- TYPES
|
||||
STRING
|
||||
====
|
||||
---- QUERY
|
||||
alter table corrupted partition(org=1) set tblproperties('numRows'='3');
|
||||
alter table corrupted set tblproperties('numRows'='0');
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted;
|
||||
---- LABELS
|
||||
ORG, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
'1',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=1'
|
||||
'2',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=2'
|
||||
'Total',0,2,'48B','0B','','','',''
|
||||
---- TYPES
|
||||
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
explain select count(*) from corrupted;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'01:AGGREGATE [FINALIZE]'
|
||||
'| output: count(*)'
|
||||
'|'
|
||||
'00:SCAN HDFS [compute_stats_db.corrupted]'
|
||||
' partitions=2/2 files=2 size=48B'
|
||||
---- TYPES
|
||||
STRING
|
||||
====
|
||||
---- QUERY
|
||||
alter table corrupted set tblproperties('numRows'='6');
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted;
|
||||
---- LABELS
|
||||
ORG, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
'1',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=1'
|
||||
'2',3,1,'24B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted/org=2'
|
||||
'Total',6,2,'48B','0B','','','',''
|
||||
---- TYPES
|
||||
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
explain select count(*) from corrupted;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'01:AGGREGATE [FINALIZE]'
|
||||
'| output: count(*)'
|
||||
'|'
|
||||
'00:SCAN HDFS [compute_stats_db.corrupted]'
|
||||
' partitions=2/2 files=2 size=48B'
|
||||
---- TYPES
|
||||
STRING
|
||||
====
|
||||
---- QUERY
|
||||
create table corrupted_no_part (id int);
|
||||
insert into corrupted_no_part values (1),(2),(3);
|
||||
compute stats corrupted_no_part;
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted_no_part;
|
||||
---- LABELS
|
||||
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
3,1,'6B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted_no_part'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
-- Check that small query optimization is executed.
|
||||
explain select count(*) from corrupted_no_part;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'01:AGGREGATE [FINALIZE]'
|
||||
'| output: count(*)'
|
||||
'|'
|
||||
'00:SCAN HDFS [compute_stats_db.corrupted_no_part]'
|
||||
' partitions=1/1 files=1 size=6B'
|
||||
---- TYPES
|
||||
STRING
|
||||
====
|
||||
---- QUERY
|
||||
alter table corrupted_no_part set tblproperties('numRows'='0');
|
||||
====
|
||||
---- QUERY
|
||||
show table stats corrupted_no_part;
|
||||
---- LABELS
|
||||
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
||||
---- RESULTS
|
||||
-1,1,'6B','NOT CACHED','NOT CACHED','TEXT','false','hdfs://localhost:20500/test-warehouse/compute_stats_db.db/corrupted_no_part'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
-- After setting num rows to 0, the HMS will set it to -1 and avoids bad behavior.
|
||||
explain select count(*) from corrupted_no_part;
|
||||
---- RESULTS: VERIFY_IS_SUBSET
|
||||
'WARNING: The following tables are missing relevant table and/or column statistics.'
|
||||
'compute_stats_db.corrupted_no_part'
|
||||
''
|
||||
'03:AGGREGATE [FINALIZE]'
|
||||
'| output: count:merge(*)'
|
||||
'|'
|
||||
'02:EXCHANGE [UNPARTITIONED]'
|
||||
'|'
|
||||
'01:AGGREGATE'
|
||||
'| output: count(*)'
|
||||
'|'
|
||||
'00:SCAN HDFS [compute_stats_db.corrupted_no_part]'
|
||||
' partitions=1/1 files=1 size=6B'
|
||||
---- TYPES
|
||||
STRING
|
||||
====
|
||||
Reference in New Issue
Block a user