Files
impala/testdata/workloads/functional-query/queries/QueryTest/alter-table-set-column-stats.test
Taras Bobrovytsky d07580c171 IMPALA-4962: Fix SHOW COLUMN STATS for HS2
Impala incorrectly returned NULLs in the "Max Size" column of the SHOW
COLUMN STATS result when executed through the HS2 interface. The issue
was that the column was specified to be type INT in the result schema,
but the actual type of the contents that we inserted into it was
"long". The reason why this is not an issue in Impala shell is because
we stringify the contents without inspecting the metadata for beeswax
results.

The issue was fixed by changing the type from INT to BIGINT.

Change-Id: I419657744635dfdc2e1562fe60a597617fff446e
Reviewed-on: http://gerrit.cloudera.org:8080/6109
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins
2017-02-22 23:10:34 +00:00

152 lines
5.4 KiB
Plaintext

====
---- QUERY
create external table alltypes_clone like functional_parquet.alltypes
location '$FILESYSTEM_PREFIX/test-warehouse/alltypes_parquet';
alter table alltypes_clone recover partitions;
====
---- QUERY
# Set various column stats.
alter table alltypes_clone set column stats double_col ('numDVs'='2');
alter table alltypes_clone set column stats timestamp_col ('numNulls'='9');
alter table alltypes_clone set column stats int_col ('numDVs'='100','numNulls'='20');
alter table alltypes_clone set column stats string_col ('maxSize'='555','avgSize'='60');
====
---- QUERY
show column stats alltypes_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',100,20,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',2,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,555,60
'timestamp_col','TIMESTAMP',-1,9,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Make sure compute stats still works.
compute stats alltypes_clone
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
# Reset the column stats to an unknown state by setting the values to -1
alter table alltypes_clone set column stats double_col ('numDVs'='-1');
alter table alltypes_clone set column stats timestamp_col ('numNulls'='-1');
alter table alltypes_clone set column stats int_col ('numDVs'='-1','numNulls'='-1');
alter table alltypes_clone set column stats string_col ('maxSize'='-1','avgSize'='-1');
====
---- QUERY
show column stats alltypes_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',7505,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',736,-1,8,8
'string_col','STRING',10,-1,-1,-1
'timestamp_col','TIMESTAMP',7554,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Also alter a few 'numRows' parameters to make sure manually setting all stats works.
alter table alltypes_clone partition(year=2009,month=2) set tblproperties('numRows'='280');
alter table alltypes_clone set tblproperties('numRows'='7300');
====
---- QUERY
# Check that we can query the table.
select id, int_col, double_col, string_col, timestamp_col from alltypes_clone
where year = 2009 and month between 2 and 3 and int_col = 9 and id between 300 and 400
---- RESULTS
319,9,90.89999999999999,'9',2009-02-01 00:09:00.360000000
329,9,90.89999999999999,'9',2009-02-02 00:19:00.810000000
339,9,90.89999999999999,'9',2009-02-03 00:29:01.260000000
349,9,90.89999999999999,'9',2009-02-04 00:39:01.710000000
359,9,90.89999999999999,'9',2009-02-05 00:49:02.160000000
369,9,90.89999999999999,'9',2009-02-06 00:59:02.610000000
379,9,90.89999999999999,'9',2009-02-07 01:09:03.600000000
389,9,90.89999999999999,'9',2009-02-08 01:19:03.510000000
399,9,90.89999999999999,'9',2009-02-09 01:29:03.960000000
---- TYPES
INT, INT, DOUBLE, STRING, TIMESTAMP
====
---- QUERY
# Similar test on an HBase table.
create external table alltypes_hbase_clone like functional_hbase.alltypes
====
---- QUERY
alter table alltypes_hbase_clone set column stats double_col ('numDVs'='2');
alter table alltypes_hbase_clone set column stats timestamp_col ('numNulls'='9');
alter table alltypes_hbase_clone set column stats int_col ('numDVs'='100','numNulls'='20');
alter table alltypes_hbase_clone set column stats string_col ('maxSize'='555','avgSize'='60');
====
---- QUERY
show column stats alltypes_hbase_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',100,20,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',2,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,555,60
'timestamp_col','TIMESTAMP',-1,9,16,16
'year','INT',-1,-1,4,4
'month','INT',-1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Reset the column stats to an unknown state by setting the values to -1
alter table alltypes_hbase_clone set column stats double_col ('numDVs'='-1');
alter table alltypes_hbase_clone set column stats timestamp_col ('numNulls'='-1');
alter table alltypes_hbase_clone set column stats int_col ('numDVs'='-1','numNulls'='-1');
alter table alltypes_hbase_clone set column stats string_col ('maxSize'='-1','avgSize'='-1');
====
---- QUERY
show column stats alltypes_hbase_clone
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',-1,-1,4,4
'month','INT',-1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====