==== ---- QUERY # test computing stats on a partitioned text table with all types create table compute_stats_db.alltypes like functional.alltypes; insert into compute_stats_db.alltypes partition(year, month) select * from functional.alltypes; ==== ---- QUERY compute stats compute_stats_db.alltypes ---- RESULTS 'Updated 24 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.alltypes ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS 2009,1,310,1,'24.56KB','TEXT' 2009,2,280,1,'22.27KB','TEXT' 2009,3,310,1,'24.67KB','TEXT' 2009,4,300,1,'24.06KB','TEXT' 2009,5,310,1,'24.97KB','TEXT' 2009,6,300,1,'24.16KB','TEXT' 2009,7,310,1,'24.97KB','TEXT' 2009,8,310,1,'24.97KB','TEXT' 2009,9,300,1,'24.16KB','TEXT' 2009,10,310,1,'24.97KB','TEXT' 2009,11,300,1,'24.16KB','TEXT' 2009,12,310,1,'24.97KB','TEXT' 2010,1,310,1,'24.97KB','TEXT' 2010,2,280,1,'22.54KB','TEXT' 2010,3,310,1,'24.97KB','TEXT' 2010,4,300,1,'24.16KB','TEXT' 2010,5,310,1,'24.97KB','TEXT' 2010,6,300,1,'24.16KB','TEXT' 2010,7,310,1,'24.97KB','TEXT' 2010,8,310,1,'24.97KB','TEXT' 2010,9,300,1,'24.16KB','TEXT' 2010,10,310,1,'24.97KB','TEXT' 2010,11,300,1,'24.16KB','TEXT' 2010,12,310,1,'24.97KB','TEXT' Total,,7300,24,'586.84KB','' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',8161,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','TINYINT',10,0,1,1 'smallint_col','SMALLINT',10,0,2,2 'int_col','INT',10,0,4,4 'bigint_col','BIGINT',10,0,8,8 'float_col','FLOAT',10,0,4,4 'double_col','DOUBLE',10,0,8,8 'date_string_col','STRING',666,0,8,8 'string_col','STRING',10,0,1,1 'timestamp_col','TIMESTAMP',5678,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # test computing stats on an partitioned text table with all types create table compute_stats_db.alltypesnopart like functional.alltypesnopart; insert into compute_stats_db.alltypesnopart select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col from functional.alltypessmall; ==== ---- QUERY compute stats compute_stats_db.alltypesnopart ---- RESULTS 'Updated 1 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.alltypesnopart ---- LABELS #ROWS, #FILES, SIZE, FORMAT ---- RESULTS 100,3,'7.73KB','TEXT' ---- TYPES BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.alltypesnopart ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',105,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','TINYINT',10,0,1,1 'smallint_col','SMALLINT',10,0,2,2 'int_col','INT',10,0,4,4 'bigint_col','BIGINT',10,0,8,8 'float_col','FLOAT',10,0,4,4 'double_col','DOUBLE',10,0,8,8 'date_string_col','STRING',12,0,8,8 'string_col','STRING',10,0,1,1 'timestamp_col','TIMESTAMP',101,0,16,16 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # test computing stats on a partitioned parquet table with all types create table compute_stats_db.alltypes_parquet like functional_parquet.alltypes; insert into compute_stats_db.alltypes_parquet partition(year, month) select * from functional.alltypes; ==== ---- QUERY compute stats compute_stats_db.alltypes_parquet ---- RESULTS 'Updated 24 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.alltypes_parquet ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS 2009,1,310,1,regex:.+KB,'PARQUET' 2009,2,280,1,regex:.+KB,'PARQUET' 2009,3,310,1,regex:.+KB,'PARQUET' 2009,4,300,1,regex:.+KB,'PARQUET' 2009,5,310,1,regex:.+KB,'PARQUET' 2009,6,300,1,regex:.+KB,'PARQUET' 2009,7,310,1,regex:.+KB,'PARQUET' 2009,8,310,1,regex:.+KB,'PARQUET' 2009,9,300,1,regex:.+KB,'PARQUET' 2009,10,310,1,regex:.+KB,'PARQUET' 2009,11,300,1,regex:.+KB,'PARQUET' 2009,12,310,1,regex:.+KB,'PARQUET' 2010,1,310,1,regex:.+KB,'PARQUET' 2010,2,280,1,regex:.+KB,'PARQUET' 2010,3,310,1,regex:.+KB,'PARQUET' 2010,4,300,1,regex:.+KB,'PARQUET' 2010,5,310,1,regex:.+KB,'PARQUET' 2010,6,300,1,regex:.+KB,'PARQUET' 2010,7,310,1,regex:.+KB,'PARQUET' 2010,8,310,1,regex:.+KB,'PARQUET' 2010,9,300,1,regex:.+KB,'PARQUET' 2010,10,310,1,regex:.+KB,'PARQUET' 2010,11,300,1,regex:.+KB,'PARQUET' 2010,12,310,1,regex:.+KB,'PARQUET' Total,,7300,24,regex:.+KB,'' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.alltypes_parquet ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',8161,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','TINYINT',10,0,1,1 'smallint_col','SMALLINT',10,0,2,2 'int_col','INT',10,0,4,4 'bigint_col','BIGINT',10,0,8,8 'float_col','FLOAT',10,0,4,4 'double_col','DOUBLE',10,0,8,8 'date_string_col','STRING',666,0,8,8 'string_col','STRING',10,0,1,1 'timestamp_col','TIMESTAMP',5678,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # test computing stats on an HBase table create table compute_stats_db.alltypessmall_hbase like functional_hbase.alltypessmall; ==== ---- QUERY compute stats compute_stats_db.alltypessmall_hbase ---- RESULTS 'Updated 1 partition(s) and 13 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.alltypessmall_hbase ---- LABELS REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ---- RESULTS: VERIFY_IS_EQUAL regex:.+,'',regex:.+,regex:.+KB regex:.+,'1',regex:.+,regex:.+KB regex:.+,'3',regex:.+,regex:.+KB regex:.+,'5',regex:.+,regex:.+KB regex:.+,'7',regex:.+,regex:.+KB regex:.+,'9',regex:.+,regex:.+KB 'Total','',regex:.+,regex:.+KB ---- TYPES STRING, STRING, BIGINT, STRING ==== ---- QUERY show column stats compute_stats_db.alltypessmall_hbase ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',105,0,4,4 'bigint_col','BIGINT',10,0,8,8 'bool_col','BOOLEAN',2,0,1,1 'date_string_col','STRING',12,0,8,8 'double_col','DOUBLE',10,0,8,8 'float_col','FLOAT',10,0,4,4 'int_col','INT',10,0,4,4 'month','INT',4,0,4,4 'smallint_col','SMALLINT',10,0,2,2 'string_col','STRING',10,0,1,1 'timestamp_col','TIMESTAMP',101,0,16,16 'tinyint_col','TINYINT',10,0,1,1 'year','INT',1,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # test computing stats on an binary HBase table create table compute_stats_db.alltypessmall_hbase_bin like functional_hbase.alltypessmallbinary; ==== ---- QUERY compute stats compute_stats_db.alltypessmall_hbase_bin ---- RESULTS 'Updated 1 partition(s) and 13 column(s).' ---- TYPES STRING ==== ---- QUERY: VERIFY_IS_EQUAL show table stats compute_stats_db.alltypessmall_hbase_bin ---- LABELS REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ---- RESULTS regex:.+,'',regex:.+,regex:.+ ---- TYPES STRING, STRING, BIGINT, STRING ==== ---- QUERY show column stats compute_stats_db.alltypessmall_hbase_bin ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',105,0,4,4 'bigint_col','BIGINT',10,0,8,8 'bool_col','BOOLEAN',2,0,1,1 'date_string_col','STRING',12,0,8,8 'double_col','DOUBLE',10,0,8,8 'float_col','FLOAT',10,0,4,4 'int_col','INT',10,0,4,4 'month','INT',4,0,4,4 'smallint_col','SMALLINT',10,0,2,2 'string_col','STRING',10,0,1,1 'timestamp_col','TIMESTAMP',101,0,16,16 'tinyint_col','TINYINT',10,0,1,1 'year','INT',1,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # test computing stats on an empty table create table compute_stats_db.alltypes_empty like functional_rc_snap.alltypes ==== ---- QUERY compute stats compute_stats_db.alltypes_empty ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.alltypes_empty ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS Total,,0,0,'0B','' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.alltypes_empty ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','TINYINT',0,0,1,1 'smallint_col','SMALLINT',0,0,2,2 'int_col','INT',0,0,4,4 'bigint_col','BIGINT',0,0,8,8 'float_col','FLOAT',0,0,4,4 'double_col','DOUBLE',0,0,8,8 'date_string_col','STRING',0,0,0,0 'string_col','STRING',0,0,0,0 'timestamp_col','TIMESTAMP',0,0,16,16 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # IMP-1227: Test computing stats on an HBase table that has a # complex-typed column that Impala does not yet support. create table compute_stats_db.map_table like functional_hbase.map_table_hbase; ==== ---- QUERY compute stats compute_stats_db.map_table ---- RESULTS 'Updated 1 partition(s) and 1 column(s).' ---- TYPES STRING ==== ---- QUERY: VERIFY_IS_EQUAL show table stats compute_stats_db.map_table ---- LABELS REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ---- RESULTS regex:.+,'',regex:.+,regex:.+ ---- TYPES STRING, STRING, BIGINT, STRING ==== ---- QUERY show column stats compute_stats_db.map_table ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'key','STRING',0,0,0,0 'map_col','INVALID_TYPE',-1,-1,-1,-1 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # IMPALA-867: Test computing stats on Avro tables with matching/mismatched # column definitions and Avro schema. Clone the used tables here. create table compute_stats_db.avro_alltypes like functional_avro_snap.alltypes; create table compute_stats_db.avro_alltypes_extra_coldef like functional_avro_snap.alltypes_extra_coldef; create table compute_stats_db.avro_alltypes_missing_coldef like functional_avro_snap.alltypes_missing_coldef; create table compute_stats_db.avro_alltypes_type_mismatch like functional_avro_snap.alltypes_type_mismatch; ==== ---- QUERY # Avro table with matching column definitions and Avro schema compute stats compute_stats_db.avro_alltypes ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.avro_alltypes ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS Total,,0,0,'0B','' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.avro_alltypes ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','INT',0,0,4,4 'smallint_col','INT',0,0,4,4 'int_col','INT',0,0,4,4 'bigint_col','BIGINT',0,0,8,8 'float_col','FLOAT',0,0,4,4 'double_col','DOUBLE',0,0,8,8 'date_string_col','STRING',0,0,0,0 'string_col','STRING',0,0,0,0 'timestamp_col','STRING',0,0,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # Avro table with an extra column definition. compute stats compute_stats_db.avro_alltypes_extra_coldef ---- RESULTS 'Updated 0 partition(s) and 12 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.avro_alltypes_extra_coldef ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS Total,,0,0,'0B','' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.avro_alltypes_extra_coldef ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','TINYINT',0,0,1,1 'smallint_col','SMALLINT',0,0,2,2 'int_col','INT',0,0,4,4 'bigint_col','BIGINT',0,0,8,8 'float_col','FLOAT',0,0,4,4 'double_col','DOUBLE',0,0,8,8 'date_string_col','STRING',0,0,0,0 'string_col','STRING',0,0,0,0 'timestamp_col','TIMESTAMP',0,0,16,16 'extra_col','STRING',0,0,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # Avro table with missing two column definitions. compute stats compute_stats_db.avro_alltypes_missing_coldef ---- RESULTS 'Updated 0 partition(s) and 9 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.avro_alltypes_missing_coldef ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS Total,,0,0,'0B','' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.avro_alltypes_missing_coldef ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'smallint_col','SMALLINT',0,0,2,2 'int_col','INT',0,0,4,4 'bigint_col','BIGINT',0,0,8,8 'float_col','FLOAT',0,0,4,4 'double_col','DOUBLE',0,0,8,8 'date_string_col','STRING',0,0,0,0 'string_col','STRING',0,0,0,0 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== ---- QUERY # Avro table with one column definition having a different # type than the Avro schema (bigint_col is a string). compute stats compute_stats_db.avro_alltypes_type_mismatch ---- RESULTS 'Updated 0 partition(s) and 11 column(s).' ---- TYPES STRING ==== ---- QUERY show table stats compute_stats_db.avro_alltypes_type_mismatch ---- LABELS YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT ---- RESULTS Total,,0,0,'0B','' ---- TYPES INT, INT, BIGINT, BIGINT, STRING, STRING ==== ---- QUERY show column stats compute_stats_db.avro_alltypes_type_mismatch ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS 'id','INT',0,0,4,4 'bool_col','BOOLEAN',2,0,1,1 'tinyint_col','TINYINT',0,0,1,1 'smallint_col','SMALLINT',0,0,2,2 'int_col','INT',0,0,4,4 'bigint_col','STRING',0,0,0,0 'float_col','FLOAT',0,0,4,4 'double_col','DOUBLE',0,0,8,8 'date_string_col','STRING',0,0,0,0 'string_col','STRING',0,0,0,0 'timestamp_col','TIMESTAMP',0,0,16,16 'year','INT',0,0,4,4 'month','INT',0,0,4,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ====