Files
impala/testdata/workloads/functional-query/queries/QueryTest/alter-table-set-column-stats.test
Chang Wu a93f2c2675 IMPALA-8205: Support number of true and false statistics for boolean column
This change compute the real number of true and false statistics
information for boolean columns. Before this, impala used to set
numTrues and numFalses to hardcoded -1 to indicate that its
statistics is missing.

Test Done:
Append the numTrue and numFalse test for all the statistics-related
test cases including the non-incremental, incremental and other test
cases.

Change-Id: I991bee8e7fdc644d908289f5fe2ee8032cc2c431
Reviewed-on: http://gerrit.cloudera.org:8080/14666
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-05-12 23:29:04 +00:00

152 lines
5.8 KiB
Plaintext

====
---- QUERY
create external table alltypes_clone like functional_parquet.alltypes
location '$FILESYSTEM_PREFIX/test-warehouse/alltypes_parquet';
alter table alltypes_clone recover partitions;
====
---- QUERY
# Set various column stats.
alter table alltypes_clone set column stats double_col ('numDVs'='2');
alter table alltypes_clone set column stats timestamp_col ('numNulls'='9');
alter table alltypes_clone set column stats int_col ('numDVs'='100','numNulls'='20');
alter table alltypes_clone set column stats string_col ('maxSize'='555','avgSize'='60');
====
---- QUERY
show column stats alltypes_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',-1,-1,4,4,-1,-1
'bool_col','BOOLEAN',-1,-1,1,1,-1,-1
'tinyint_col','TINYINT',-1,-1,1,1,-1,-1
'smallint_col','SMALLINT',-1,-1,2,2,-1,-1
'int_col','INT',100,20,4,4,-1,-1
'bigint_col','BIGINT',-1,-1,8,8,-1,-1
'float_col','FLOAT',-1,-1,4,4,-1,-1
'double_col','DOUBLE',2,-1,8,8,-1,-1
'date_string_col','STRING',-1,-1,-1,-1,-1,-1
'string_col','STRING',-1,-1,555,60,-1,-1
'timestamp_col','TIMESTAMP',-1,9,16,16,-1,-1
'year','INT',2,0,4,4,-1,-1
'month','INT',12,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Make sure compute stats still works.
compute stats alltypes_clone
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
# Reset the column stats to an unknown state by setting the values to -1
alter table alltypes_clone set column stats double_col ('numDVs'='-1');
alter table alltypes_clone set column stats timestamp_col ('numNulls'='-1');
alter table alltypes_clone set column stats int_col ('numDVs'='-1','numNulls'='-1');
alter table alltypes_clone set column stats string_col ('maxSize'='-1','avgSize'='-1');
====
---- QUERY
show column stats alltypes_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',7300,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,3650,3650
'tinyint_col','TINYINT',10,0,1,1,-1,-1
'smallint_col','SMALLINT',10,0,2,2,-1,-1
'int_col','INT',-1,-1,4,4,-1,-1
'bigint_col','BIGINT',10,0,8,8,-1,-1
'float_col','FLOAT',10,0,4,4,-1,-1
'double_col','DOUBLE',-1,0,8,8,-1,-1
'date_string_col','STRING',736,0,8,8,-1,-1
'string_col','STRING',10,0,-1,-1,-1,-1
'timestamp_col','TIMESTAMP',7300,-1,16,16,-1,-1
'year','INT',2,0,4,4,-1,-1
'month','INT',12,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Also alter a few 'numRows' parameters to make sure manually setting all stats works.
alter table alltypes_clone partition(year=2009,month=2) set tblproperties('numRows'='280');
alter table alltypes_clone set tblproperties('numRows'='7300');
====
---- QUERY
# Check that we can query the table.
select id, int_col, double_col, string_col, timestamp_col from alltypes_clone
where year = 2009 and month between 2 and 3 and int_col = 9 and id between 300 and 400
---- RESULTS
319,9,90.89999999999999,'9',2009-02-01 00:09:00.360000000
329,9,90.89999999999999,'9',2009-02-02 00:19:00.810000000
339,9,90.89999999999999,'9',2009-02-03 00:29:01.260000000
349,9,90.89999999999999,'9',2009-02-04 00:39:01.710000000
359,9,90.89999999999999,'9',2009-02-05 00:49:02.160000000
369,9,90.89999999999999,'9',2009-02-06 00:59:02.610000000
379,9,90.89999999999999,'9',2009-02-07 01:09:03.600000000
389,9,90.89999999999999,'9',2009-02-08 01:19:03.510000000
399,9,90.89999999999999,'9',2009-02-09 01:29:03.960000000
---- TYPES
INT, INT, DOUBLE, STRING, TIMESTAMP
====
---- QUERY
# Similar test on an HBase table.
create external table alltypes_hbase_clone like functional_hbase.alltypes
====
---- QUERY
alter table alltypes_hbase_clone set column stats double_col ('numDVs'='2');
alter table alltypes_hbase_clone set column stats timestamp_col ('numNulls'='9');
alter table alltypes_hbase_clone set column stats int_col ('numDVs'='100','numNulls'='20');
alter table alltypes_hbase_clone set column stats string_col ('maxSize'='555','avgSize'='60');
====
---- QUERY
show column stats alltypes_hbase_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',-1,-1,4,4,-1,-1
'bool_col','BOOLEAN',-1,-1,1,1,-1,-1
'tinyint_col','TINYINT',-1,-1,1,1,-1,-1
'smallint_col','SMALLINT',-1,-1,2,2,-1,-1
'int_col','INT',100,20,4,4,-1,-1
'bigint_col','BIGINT',-1,-1,8,8,-1,-1
'float_col','FLOAT',-1,-1,4,4,-1,-1
'double_col','DOUBLE',2,-1,8,8,-1,-1
'date_string_col','STRING',-1,-1,-1,-1,-1,-1
'string_col','STRING',-1,-1,555,60,-1,-1
'timestamp_col','TIMESTAMP',-1,9,16,16,-1,-1
'year','INT',-1,-1,4,4,-1,-1
'month','INT',-1,-1,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Reset the column stats to an unknown state by setting the values to -1
alter table alltypes_hbase_clone set column stats double_col ('numDVs'='-1');
alter table alltypes_hbase_clone set column stats timestamp_col ('numNulls'='-1');
alter table alltypes_hbase_clone set column stats int_col ('numDVs'='-1','numNulls'='-1');
alter table alltypes_hbase_clone set column stats string_col ('maxSize'='-1','avgSize'='-1');
====
---- QUERY
show column stats alltypes_hbase_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',-1,-1,4,4,-1,-1
'bool_col','BOOLEAN',-1,-1,1,1,-1,-1
'tinyint_col','TINYINT',-1,-1,1,1,-1,-1
'smallint_col','SMALLINT',-1,-1,2,2,-1,-1
'int_col','INT',-1,-1,4,4,-1,-1
'bigint_col','BIGINT',-1,-1,8,8,-1,-1
'float_col','FLOAT',-1,-1,4,4,-1,-1
'double_col','DOUBLE',-1,-1,8,8,-1,-1
'date_string_col','STRING',-1,-1,-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16,-1,-1
'year','INT',-1,-1,4,4,-1,-1
'month','INT',-1,-1,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====