==== ---- QUERY # test computing stats on a partitioned text table with all types create table alltypes like functional.alltypes; insert into alltypes partition(year, month) select * from functional.alltypes; ==== ---- QUERY compute stats alltypes ---- RESULTS 'Updated 24 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats alltypes ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS '2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* 'Total','',7300,24,'586.84KB','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',7505,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','TINYINT',10,-1,1,1 'smallint_col','SMALLINT',10,-1,2,2 'int_col','INT',10,-1,4,4 'bigint_col','BIGINT',10,-1,8,8 'float_col','FLOAT',10,-1,4,4 'double_col','DOUBLE',10,-1,8,8 'date_string_col','STRING',736,-1,8,8 'string_col','STRING',10,-1,1,1 'timestamp_col','TIMESTAMP',7554,-1,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # drop stats from this table drop stats alltypes ==== ---- QUERY show table stats alltypes ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS '2009','1',-1,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','2',-1,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','3',-1,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','4',-1,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2009','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','1',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','2',-1,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','3',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','4',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2010','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* 'Total','',-1,24,'586.84KB','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY # Note - the NDV for partition columns is read from the table metadata. show column stats alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',-1,-1,4,4 'bool_col','BOOLEAN',-1,-1,1,1 'tinyint_col','TINYINT',-1,-1,1,1 'smallint_col','SMALLINT',-1,-1,2,2 'int_col','INT',-1,-1,4,4 'bigint_col','BIGINT',-1,-1,8,8 'float_col','FLOAT',-1,-1,4,4 'double_col','DOUBLE',-1,-1,8,8 'date_string_col','STRING',-1,-1,-1,-1 'string_col','STRING',-1,-1,-1,-1 'timestamp_col','TIMESTAMP',-1,-1,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Add partitions with NULL values and check for stats. alter table alltypes add partition (year=NULL, month=NULL) ---- RESULTS ==== ---- QUERY show column stats alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',-1,-1,4,4 'bool_col','BOOLEAN',-1,-1,1,1 'tinyint_col','TINYINT',-1,-1,1,1 'smallint_col','SMALLINT',-1,-1,2,2 'int_col','INT',-1,-1,4,4 'bigint_col','BIGINT',-1,-1,8,8 'float_col','FLOAT',-1,-1,4,4 'double_col','DOUBLE',-1,-1,8,8 'date_string_col','STRING',-1,-1,-1,-1 'string_col','STRING',-1,-1,-1,-1 'timestamp_col','TIMESTAMP',-1,-1,16,16 'year','INT',3,1,4,4 'month','INT',13,1,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY alter table alltypes add partition (year=2011, month=NULL) ---- RESULTS ==== ---- QUERY show column stats alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',-1,-1,4,4 'bool_col','BOOLEAN',-1,-1,1,1 'tinyint_col','TINYINT',-1,-1,1,1 'smallint_col','SMALLINT',-1,-1,2,2 'int_col','INT',-1,-1,4,4 'bigint_col','BIGINT',-1,-1,8,8 'float_col','FLOAT',-1,-1,4,4 'double_col','DOUBLE',-1,-1,8,8 'date_string_col','STRING',-1,-1,-1,-1 'string_col','STRING',-1,-1,-1,-1 'timestamp_col','TIMESTAMP',-1,-1,16,16 'year','INT',4,1,4,4 'month','INT',13,2,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Drop the partitions with NULL values and check for stats. alter table alltypes drop partition (year=NULL, month=NULL) ---- RESULTS ==== ---- QUERY show column stats alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',-1,-1,4,4 'bool_col','BOOLEAN',-1,-1,1,1 'tinyint_col','TINYINT',-1,-1,1,1 'smallint_col','SMALLINT',-1,-1,2,2 'int_col','INT',-1,-1,4,4 'bigint_col','BIGINT',-1,-1,8,8 'float_col','FLOAT',-1,-1,4,4 'double_col','DOUBLE',-1,-1,8,8 'date_string_col','STRING',-1,-1,-1,-1 'string_col','STRING',-1,-1,-1,-1 'timestamp_col','TIMESTAMP',-1,-1,16,16 'year','INT',3,0,4,4 'month','INT',13,1,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY alter table alltypes drop partition (year=2011, month=NULL) ---- RESULTS ==== ---- QUERY show column stats alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',-1,-1,4,4 'bool_col','BOOLEAN',-1,-1,1,1 'tinyint_col','TINYINT',-1,-1,1,1 'smallint_col','SMALLINT',-1,-1,2,2 'int_col','INT',-1,-1,4,4 'bigint_col','BIGINT',-1,-1,8,8 'float_col','FLOAT',-1,-1,4,4 'double_col','DOUBLE',-1,-1,8,8 'date_string_col','STRING',-1,-1,-1,-1 'string_col','STRING',-1,-1,-1,-1 'timestamp_col','TIMESTAMP',-1,-1,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # drop stats from this table a second time, should not throw an error. drop stats alltypes ==== ---- QUERY # test computing stats on an partitioned text table with all types create table alltypesnopart like functional.alltypesnopart; insert into alltypesnopart select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col from functional.alltypessmall; ==== ---- QUERY compute stats alltypesnopart ---- RESULTS 'Updated 1 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats alltypesnopart ---- LABELS #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 100,3,'7.73KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* ---- TYPES BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats alltypesnopart ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',99,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','TINYINT',10,-1,1,1 'smallint_col','SMALLINT',10,-1,2,2 'int_col','INT',10,-1,4,4 'bigint_col','BIGINT',10,-1,8,8 'float_col','FLOAT',10,-1,4,4 'double_col','DOUBLE',10,-1,8,8 'date_string_col','STRING',12,-1,8,8 'string_col','STRING',10,-1,1,1 'timestamp_col','TIMESTAMP',101,-1,16,16 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # test computing stats on a partitioned parquet table with all types create table alltypes_parquet like functional_parquet.alltypes; insert into alltypes_parquet partition(year, month) select * from functional.alltypes; ==== ---- QUERY compute stats alltypes_parquet ---- RESULTS 'Updated 24 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats alltypes_parquet ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS '2009','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2009','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* '2010','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* 'Total','',7300,24,regex:.+KB,'0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats alltypes_parquet ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',7505,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','TINYINT',10,-1,1,1 'smallint_col','SMALLINT',10,-1,2,2 'int_col','INT',10,-1,4,4 'bigint_col','BIGINT',10,-1,8,8 'float_col','FLOAT',10,-1,4,4 'double_col','DOUBLE',10,-1,8,8 'date_string_col','STRING',736,-1,8,8 'string_col','STRING',10,-1,1,1 'timestamp_col','TIMESTAMP',7554,-1,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # test computing stats on an empty table create table alltypes_empty like functional_rc_snap.alltypes ==== ---- QUERY compute stats alltypes_empty ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats alltypes_empty ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats alltypes_empty ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','TINYINT',0,-1,1,1 'smallint_col','SMALLINT',0,-1,2,2 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','TIMESTAMP',0,-1,16,16 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # IMPALA-867: Test computing stats on Avro tables created by Hive with # matching/mismatched column definitions and Avro schema. # Clone the used tables here. create table avro_hive_alltypes like functional_avro_snap.alltypes; create table avro_hive_alltypes_extra_coldef like functional_avro_snap.alltypes_extra_coldef; create table avro_hive_alltypes_missing_coldef like functional_avro_snap.alltypes_missing_coldef; create table avro_hive_alltypes_type_mismatch like functional_avro_snap.alltypes_type_mismatch; create table avro_hive_no_avro_schema like functional_avro_snap.no_avro_schema; ==== ---- QUERY # Avro table with matching column definitions and Avro schema compute stats avro_hive_alltypes ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_hive_alltypes ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_hive_alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','INT',0,-1,4,4 'smallint_col','INT',0,-1,4,4 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Avro table with an extra column definition. compute stats avro_hive_alltypes_extra_coldef ---- RESULTS 'Updated 0 partition(s) and 12 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_hive_alltypes_extra_coldef ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_hive_alltypes_extra_coldef ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','TINYINT',0,-1,1,1 'smallint_col','SMALLINT',0,-1,2,2 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','TIMESTAMP',0,-1,16,16 'extra_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Avro table with missing two column definitions. compute stats avro_hive_alltypes_missing_coldef ---- RESULTS 'Updated 0 partition(s) and 9 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_hive_alltypes_missing_coldef ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_hive_alltypes_missing_coldef ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'smallint_col','SMALLINT',0,-1,2,2 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Avro table with one column definition having a different # type than the Avro schema (bigint_col is a string). compute stats avro_hive_alltypes_type_mismatch ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_hive_alltypes_type_mismatch ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_hive_alltypes_type_mismatch ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','TINYINT',0,-1,1,1 'smallint_col','SMALLINT',0,-1,2,2 'int_col','INT',0,-1,4,4 'bigint_col','STRING',0,-1,0,0 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','TIMESTAMP',0,-1,16,16 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Avro table without an Avro schema created by Hive. # The Avro schema is inferred from the column definitions, compute stats avro_hive_no_avro_schema ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_hive_no_avro_schema ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_hive_no_avro_schema ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','INT',0,-1,4,4 'smallint_col','INT',0,-1,4,4 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Test Avro table created without any column definitions. create table avro_impala_alltypes_no_coldefs partitioned by (year int, month int) with serdeproperties ('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json') stored as avro; ==== ---- QUERY compute stats avro_impala_alltypes_no_coldefs ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_impala_alltypes_no_coldefs ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_impala_alltypes_no_coldefs ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','INT',0,-1,4,4 'smallint_col','INT',0,-1,4,4 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # IMPALA-1104: Test computing stats on Avro tables created by Impala # with mismatched column definitions and Avro schema. Mismatched column name. create table avro_impala_alltypes_bad_colname (id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int, bigint_col bigint, float_col float, double_col double, date_string_col string, string_col string, timestamp_col timestamp) partitioned by (year int, month int) with serdeproperties ('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json') stored as avro; ==== ---- QUERY compute stats avro_impala_alltypes_bad_colname ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_impala_alltypes_bad_colname ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_impala_alltypes_bad_colname ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','INT',0,-1,4,4 'smallint_col','INT',0,-1,4,4 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # IMPALA-1104: Test computing stats on Avro tables created by Impala # with mismatched column definitions and Avro schema. Mismatched column type. create table avro_impala_alltypes_bad_coltype (id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int, bigint_col bigint, float_col float, double_col bigint, date_string_col string, string_col string, timestamp_col timestamp) partitioned by (year int, month int) with serdeproperties ('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json') stored as avro; ==== ---- QUERY compute stats avro_impala_alltypes_bad_coltype ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats avro_impala_alltypes_bad_coltype ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION ---- RESULTS 'Total','',0,0,'0B','0B','','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY show column stats avro_impala_alltypes_bad_coltype ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','INT',0,-1,4,4 'smallint_col','INT',0,-1,4,4 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','STRING',0,-1,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # IMPALA-883: Compute table stats for an empty partition. create table empty_partitioned (i int) partitioned by (j int); alter table empty_partitioned add partition (j=1); ==== ---- QUERY compute stats empty_partitioned ---- RESULTS 'Updated 1 partition(s) and 1 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats empty_partitioned ---- RESULTS '1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.* 'Total',0,0,'0B','0B','','','','' ---- TYPES STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY # Insert non empty partition to the table with empty partition. insert into table empty_partitioned partition (j=2) select 1; ==== ---- QUERY # Verify partition stats work with empty and non-empty partition. drop stats empty_partitioned; compute stats empty_partitioned; ---- RESULTS 'Updated 2 partition(s) and 1 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats empty_partitioned ---- RESULTS '1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false',regex:.* 'Total',1,1,'2B','0B','','','','' ---- TYPES STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY # Verify partition stats work with empty and non-empty partition. drop stats empty_partitioned; compute stats empty_partitioned; ---- RESULTS 'Updated 2 partition(s) and 1 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats empty_partitioned ---- RESULTS '1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.* '2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false',regex:.* 'Total',1,1,'2B','0B','','','','' ---- TYPES STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY # IMPALA-1614 Verify that COMPUTE STATS works on a table whose name starts with numbers. create table `123_table` (i int, 1p int) partitioned by (2j int); alter table `123_table` add partition (2j=1); ==== ---- QUERY compute stats `123_table` ---- RESULTS 'Updated 1 partition(s) and 2 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats `123_table` ---- RESULTS '1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.* 'Total',0,0,'0B','0B','','','','' ---- TYPES STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== ==== ---- QUERY # IMPALA-1629: Verify that the column stats for CHAR/VARCHAR columns are populated. # The values of date_string_col always have exactly 8 characters. The CHAR/VARCHAR # sizes below are chosen such that they are smaller, equal, and greater than the # source data values, in particular, to test the CHAR padding behavior. create table chars_tbl ( id int, ch1 char(1), ch2 char(8), ch3 char(20), ts timestamp, vc1 varchar(1), vc2 varchar(8), vc3 varchar(20) ) partitioned by ( year char(5), day varchar(13) ); insert overwrite chars_tbl partition(year, day) select id, cast(date_string_col as char(1)), cast(date_string_col as char(8)), cast(date_string_col as char(20)), timestamp_col, cast(date_string_col as varchar(1)), cast(date_string_col as varchar(8)), cast(date_string_col as varchar(20)), cast(year as char(5)), cast(day as varchar(13)) from functional.alltypesagg where day is null or day in (3, 7); ---- RESULTS: VERIFY_IS_EQUAL_SORTED year=2010 /day=7/: 1000 year=2010 /day=3/: 1000 year=2010 /day=__HIVE_DEFAULT_PARTITION__/: 1000 ==== ---- QUERY compute stats chars_tbl ---- RESULTS 'Updated 3 partition(s) and 8 column(s).' ---- TYPES STRING ==== ---- QUERY show column stats chars_tbl ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',2915,-1,4,4 'ch1','CHAR(1)',1,-1,1,1 'ch2','CHAR(8)',10,-1,8,8 'ch3','CHAR(20)',10,-1,8,8 'ts','TIMESTAMP',2871,-1,16,16 'vc1','VARCHAR(1)',1,-1,1,1 'vc2','VARCHAR(8)',10,-1,8,8 'vc3','VARCHAR(20)',10,-1,8,8 'year','CHAR(5)',1,0,5,5 'day','VARCHAR(13)',3,1,-1,-1 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY # Test that compute stats on a Hive-created Avro table without column defs # works (HIVE-6308, IMPALA-867). create table alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef; compute stats alltypes_no_coldef ---- RESULTS 'Updated 1 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show column stats alltypes_no_coldef ---- RESULTS 'id','INT',0,-1,4,4 'bool_col','BOOLEAN',2,-1,1,1 'tinyint_col','INT',0,-1,4,4 'smallint_col','INT',0,-1,4,4 'int_col','INT',0,-1,4,4 'bigint_col','BIGINT',0,-1,8,8 'float_col','FLOAT',0,-1,4,4 'double_col','DOUBLE',0,-1,8,8 'date_string_col','STRING',0,-1,0,0 'string_col','STRING',0,-1,0,0 'timestamp_col','STRING',0,-1,0,0 ---- TYPES STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ====