Files
impala/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
Alex Behm adb19deece Re-enable tests that had been temporarily removed to unblock the full data load.
The following commits disabled tests to unblock the full data load:
a00a9a5e53f7a8e7a1e3c931ea0e4b7db21c6f00
bf29d06f2e53bb924d250275d51f5ccd1213531d

This patch re-enables those tests and adds new tests to guard against
regressions to HIVE-6308.

Unfortunately, we cannot completely remove the analysis check for HIVE-6308
in our code, because there is still one case where COMPUTE STATS will fail on
a Hive-created Avro table: If there is a mismatch in column names between
the Avro schema and the column defs given to a CREATE TABLE in Hive.

Change-Id: I81ae6b526db02fdfc634e09eeb9d12036e2adfdd
Reviewed-on: http://gerrit.cloudera.org:8080/180
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
2015-03-11 16:39:38 -07:00

888 lines
29 KiB
Plaintext

====
---- QUERY
# test computing stats on a partitioned text table with all types
create table compute_stats_db.alltypes like functional.alltypes;
insert into compute_stats_db.alltypes partition(year, month)
select * from functional.alltypes;
====
---- QUERY
compute stats compute_stats_db.alltypes
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'Total','',7300,24,'586.84KB','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',8161,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',666,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',5678,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# drop stats from this table
drop stats compute_stats_db.alltypes
====
---- QUERY
show table stats compute_stats_db.alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'2009','1',-1,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','2',-1,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','3',-1,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','4',-1,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2009','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','1',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','2',-1,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','3',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','4',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false'
'2010','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false'
'Total','',-1,24,'586.84KB','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Note - the NDV for partition columns is read from the table metadata.
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Add partitions with NULL values and check for stats.
alter table compute_stats_db.alltypes add partition (year=NULL, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',3,1,4,4
'month','INT',13,1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
alter table compute_stats_db.alltypes add partition (year=2011, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',4,1,4,4
'month','INT',13,2,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Drop the partitions with NULL values and check for stats.
alter table compute_stats_db.alltypes drop partition (year=NULL, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',3,0,4,4
'month','INT',13,1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
alter table compute_stats_db.alltypes drop partition (year=2011, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# drop stats from this table a second time, should not throw an error.
drop stats compute_stats_db.alltypes
====
---- QUERY
# test computing stats on an partitioned text table with all types
create table compute_stats_db.alltypesnopart like functional.alltypesnopart;
insert into compute_stats_db.alltypesnopart
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
double_col, date_string_col, string_col, timestamp_col
from functional.alltypessmall;
====
---- QUERY
compute stats compute_stats_db.alltypesnopart
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypesnopart
---- LABELS
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
100,3,'7.73KB','NOT CACHED','NOT CACHED','TEXT','false'
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypesnopart
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',105,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',12,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',101,-1,16,16
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# test computing stats on a partitioned parquet table with all types
create table compute_stats_db.alltypes_parquet
like functional_parquet.alltypes;
insert into compute_stats_db.alltypes_parquet partition(year, month)
select * from functional.alltypes;
====
---- QUERY
compute stats compute_stats_db.alltypes_parquet
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypes_parquet
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'2009','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2009','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'2010','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false'
'Total','',7300,24,regex:.+KB,'0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypes_parquet
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',8161,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',666,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',5678,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# test computing stats on an empty table
create table compute_stats_db.alltypes_empty like functional_rc_snap.alltypes
====
---- QUERY
compute stats compute_stats_db.alltypes_empty
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypes_empty
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypes_empty
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',0,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','TIMESTAMP',0,-1,16,16
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMPALA-867: Test computing stats on Avro tables created by Hive with
# matching/mismatched column definitions and Avro schema.
# Clone the used tables here.
create table compute_stats_db.avro_hive_alltypes
like functional_avro_snap.alltypes;
create table compute_stats_db.avro_hive_alltypes_extra_coldef
like functional_avro_snap.alltypes_extra_coldef;
create table compute_stats_db.avro_hive_alltypes_missing_coldef
like functional_avro_snap.alltypes_missing_coldef;
create table compute_stats_db.avro_hive_alltypes_type_mismatch
like functional_avro_snap.alltypes_type_mismatch;
====
---- QUERY
# Avro table with matching column definitions and Avro schema
compute stats compute_stats_db.avro_hive_alltypes
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table with an extra column definition.
compute stats compute_stats_db.avro_hive_alltypes_extra_coldef
---- RESULTS
'Updated 0 partition(s) and 12 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes_extra_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes_extra_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',0,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','TIMESTAMP',0,-1,16,16
'extra_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table with missing two column definitions.
compute stats compute_stats_db.avro_hive_alltypes_missing_coldef
---- RESULTS
'Updated 0 partition(s) and 9 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes_missing_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes_missing_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table with one column definition having a different
# type than the Avro schema (bigint_col is a string).
compute stats compute_stats_db.avro_hive_alltypes_type_mismatch
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes_type_mismatch
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes_type_mismatch
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',0,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','STRING',0,-1,0,0
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','TIMESTAMP',0,-1,16,16
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Test Avro table created without any column definitions.
create table compute_stats_db.avro_impala_alltypes_no_coldefs
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats compute_stats_db.avro_impala_alltypes_no_coldefs
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_impala_alltypes_no_coldefs
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_impala_alltypes_no_coldefs
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column name.
create table compute_stats_db.avro_impala_alltypes_bad_colname
(id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int,
bigint_col bigint, float_col float, double_col double, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats compute_stats_db.avro_impala_alltypes_bad_colname
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_impala_alltypes_bad_colname
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_impala_alltypes_bad_colname
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column type.
create table compute_stats_db.avro_impala_alltypes_bad_coltype
(id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int,
bigint_col bigint, float_col float, double_col bigint, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats compute_stats_db.avro_impala_alltypes_bad_coltype
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_impala_alltypes_bad_coltype
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS
---- RESULTS
'Total','',0,0,'0B','0B','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_impala_alltypes_bad_coltype
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# For IMPALA-1055, using a database called `parquet` to test cases where the name of
# the database is a keyword.
CREATE TABLE `parquet`.billion_parquet(id INT);
====
---- QUERY
COMPUTE STATS `parquet`.billion_parquet
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
CREATE TABLE `parquet`.`parquet`(id INT)
====
---- QUERY
COMPUTE STATS `parquet`.`parquet`
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
# IMPALA-883: Compute table stats for an empty partition.
create table compute_stats_db.empty_partitioned (i int) partitioned by (j int);
alter table compute_stats_db.empty_partitioned add partition (j=1);
====
---- QUERY
compute stats compute_stats_db.empty_partitioned
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.empty_partitioned
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false'
'Total',0,0,'0B','0B','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Insert non empty partition to the table with empty partition.
insert into table compute_stats_db.empty_partitioned partition (j=2) select 1;
====
---- QUERY
# Verify partition stats work with empty and non-empty partition.
drop stats compute_stats_db.empty_partitioned;
compute stats compute_stats_db.empty_partitioned;
---- RESULTS
'Updated 2 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.empty_partitioned
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false'
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false'
'Total',1,1,'2B','0B','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Verify partition stats work with empty and non-empty partition.
drop stats compute_stats_db.empty_partitioned;
compute stats compute_stats_db.empty_partitioned;
---- RESULTS
'Updated 2 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.empty_partitioned
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false'
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false'
'Total',1,1,'2B','0B','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# IMPALA-1614 Verify that COMPUTE STATS works on a table whose name starts with numbers.
create table compute_stats_db.`123_table` (i int, 1p int) partitioned by (2j int);
alter table compute_stats_db.`123_table` add partition (2j=1);
====
---- QUERY
compute stats compute_stats_db.`123_table`
---- RESULTS
'Updated 1 partition(s) and 2 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.`123_table`
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false'
'Total',0,0,'0B','0B','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
====
---- QUERY
# IMPALA-1629: Verify that the column stats for CHAR/VARCHAR columns are populated.
# The values of date_string_col always have exactly 8 characters. The CHAR/VARCHAR
# sizes below are chosen such that they are smaller, equal, and greater than the
# source data values, in particular, to test the CHAR padding behavior.
create table compute_stats_db.chars_tbl (
id int,
ch1 char(1),
ch2 char(8),
ch3 char(20),
ts timestamp,
vc1 varchar(1),
vc2 varchar(8),
vc3 varchar(20)
)
partitioned by (
year char(5),
day varchar(13)
);
insert overwrite compute_stats_db.chars_tbl partition(year, day)
select
id,
cast(date_string_col as char(1)),
cast(date_string_col as char(8)),
cast(date_string_col as char(20)),
timestamp_col,
cast(date_string_col as varchar(1)),
cast(date_string_col as varchar(8)),
cast(date_string_col as varchar(20)),
cast(year as char(5)),
cast(day as varchar(13))
from functional.alltypesagg
where day is null or day in (3, 7);
---- RESULTS: VERIFY_IS_EQUAL_SORTED
year=2010 /day=7/: 1000
year=2010 /day=3/: 1000
year=2010 /day=__HIVE_DEFAULT_PARTITION__/: 1000
====
---- QUERY
compute stats compute_stats_db.chars_tbl
---- RESULTS
'Updated 3 partition(s) and 8 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats compute_stats_db.chars_tbl
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',4030,-1,4,4
'ch1','CHAR(1)',1,-1,1,1
'ch2','CHAR(8)',10,-1,8,8
'ch3','CHAR(20)',10,-1,8,8
'ts','TIMESTAMP',2994,-1,16,16
'vc1','VARCHAR(1)',1,-1,1,1
'vc2','VARCHAR(8)',10,-1,8,8
'vc3','VARCHAR(20)',10,-1,8,8
'year','CHAR(5)',1,0,5,5
'day','VARCHAR(13)',3,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Test that compute stats on a Hive-created Avro table without column defs
# works (HIVE-6308, IMPALA-867).
create table compute_stats_db.alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef;
compute stats compute_stats_db.alltypes_no_coldef
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats compute_stats_db.alltypes_no_coldef
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====