Files
impala/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
Alex Behm c908ba1b7e IMPALA-1136: Support loading Avro tables without an explicit Avro schema
Hive allows creating Avro tables without an explicit Avro schema since 0.14.0.
For such tables, the Avro schema is inferred from the column definitions,
and not stored in the metadata at all (no Avro schema literal or Avro schema file).

This patch adds support for loading the metadata of such tables, although Impala
currently cannot create such tables (expect a follow-on patch).

Change-Id: I9e66921ffbeff7ce6db9619bcfb30278b571cd95
Reviewed-on: http://gerrit.cloudera.org:8080/538
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
2015-07-31 12:13:37 +00:00

929 lines
31 KiB
Plaintext

====
---- QUERY
# test computing stats on a partitioned text table with all types
create table compute_stats_db.alltypes like functional.alltypes;
insert into compute_stats_db.alltypes partition(year, month)
select * from functional.alltypes;
====
---- QUERY
compute stats compute_stats_db.alltypes
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total','',7300,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',7505,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',736,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',7554,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# drop stats from this table
drop stats compute_stats_db.alltypes
====
---- QUERY
show table stats compute_stats_db.alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'2009','1',-1,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','2',-1,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','3',-1,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','4',-1,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','1',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','2',-1,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','3',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','4',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total','',-1,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Note - the NDV for partition columns is read from the table metadata.
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Add partitions with NULL values and check for stats.
alter table compute_stats_db.alltypes add partition (year=NULL, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',3,1,4,4
'month','INT',13,1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
alter table compute_stats_db.alltypes add partition (year=2011, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',4,1,4,4
'month','INT',13,2,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Drop the partitions with NULL values and check for stats.
alter table compute_stats_db.alltypes drop partition (year=NULL, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',3,0,4,4
'month','INT',13,1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
alter table compute_stats_db.alltypes drop partition (year=2011, month=NULL)
---- RESULTS
====
---- QUERY
show column stats compute_stats_db.alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','TINYINT',-1,-1,1,1
'smallint_col','SMALLINT',-1,-1,2,2
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','TIMESTAMP',-1,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# drop stats from this table a second time, should not throw an error.
drop stats compute_stats_db.alltypes
====
---- QUERY
# test computing stats on an partitioned text table with all types
create table compute_stats_db.alltypesnopart like functional.alltypesnopart;
insert into compute_stats_db.alltypesnopart
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
double_col, date_string_col, string_col, timestamp_col
from functional.alltypessmall;
====
---- QUERY
compute stats compute_stats_db.alltypesnopart
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypesnopart
---- LABELS
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
100,3,'7.73KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypesnopart
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',99,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',12,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',101,-1,16,16
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# test computing stats on a partitioned parquet table with all types
create table compute_stats_db.alltypes_parquet
like functional_parquet.alltypes;
insert into compute_stats_db.alltypes_parquet partition(year, month)
select * from functional.alltypes;
====
---- QUERY
compute stats compute_stats_db.alltypes_parquet
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypes_parquet
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'2009','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2009','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'2010','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
'Total','',7300,24,regex:.+KB,'0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypes_parquet
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',7505,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',736,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',7554,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# test computing stats on an empty table
create table compute_stats_db.alltypes_empty like functional_rc_snap.alltypes
====
---- QUERY
compute stats compute_stats_db.alltypes_empty
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.alltypes_empty
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.alltypes_empty
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',0,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','TIMESTAMP',0,-1,16,16
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMPALA-867: Test computing stats on Avro tables created by Hive with
# matching/mismatched column definitions and Avro schema.
# Clone the used tables here.
create table compute_stats_db.avro_hive_alltypes
like functional_avro_snap.alltypes;
create table compute_stats_db.avro_hive_alltypes_extra_coldef
like functional_avro_snap.alltypes_extra_coldef;
create table compute_stats_db.avro_hive_alltypes_missing_coldef
like functional_avro_snap.alltypes_missing_coldef;
create table compute_stats_db.avro_hive_alltypes_type_mismatch
like functional_avro_snap.alltypes_type_mismatch;
create table compute_stats_db.avro_hive_no_avro_schema
like functional_avro_snap.no_avro_schema;
====
---- QUERY
# Avro table with matching column definitions and Avro schema
compute stats compute_stats_db.avro_hive_alltypes
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table with an extra column definition.
compute stats compute_stats_db.avro_hive_alltypes_extra_coldef
---- RESULTS
'Updated 0 partition(s) and 12 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes_extra_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes_extra_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',0,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','TIMESTAMP',0,-1,16,16
'extra_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table with missing two column definitions.
compute stats compute_stats_db.avro_hive_alltypes_missing_coldef
---- RESULTS
'Updated 0 partition(s) and 9 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes_missing_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes_missing_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table with one column definition having a different
# type than the Avro schema (bigint_col is a string).
compute stats compute_stats_db.avro_hive_alltypes_type_mismatch
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_alltypes_type_mismatch
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_alltypes_type_mismatch
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',0,-1,1,1
'smallint_col','SMALLINT',0,-1,2,2
'int_col','INT',0,-1,4,4
'bigint_col','STRING',0,-1,0,0
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','TIMESTAMP',0,-1,16,16
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Avro table without an Avro schema created by Hive.
# The Avro schema is inferred from the column definitions,
compute stats compute_stats_db.avro_hive_no_avro_schema
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_hive_no_avro_schema
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_hive_no_avro_schema
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Test Avro table created without any column definitions.
create table compute_stats_db.avro_impala_alltypes_no_coldefs
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats compute_stats_db.avro_impala_alltypes_no_coldefs
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_impala_alltypes_no_coldefs
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_impala_alltypes_no_coldefs
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column name.
create table compute_stats_db.avro_impala_alltypes_bad_colname
(id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int,
bigint_col bigint, float_col float, double_col double, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats compute_stats_db.avro_impala_alltypes_bad_colname
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_impala_alltypes_bad_colname
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_impala_alltypes_bad_colname
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column type.
create table compute_stats_db.avro_impala_alltypes_bad_coltype
(id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int,
bigint_col bigint, float_col float, double_col bigint, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats compute_stats_db.avro_impala_alltypes_bad_coltype
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.avro_impala_alltypes_bad_coltype
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.avro_impala_alltypes_bad_coltype
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# For IMPALA-1055, using a database called `parquet` to test cases where the name of
# the database is a keyword.
CREATE TABLE `parquet`.billion_parquet(id INT);
====
---- QUERY
COMPUTE STATS `parquet`.billion_parquet
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
CREATE TABLE `parquet`.`parquet`(id INT)
====
---- QUERY
COMPUTE STATS `parquet`.`parquet`
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
# IMPALA-883: Compute table stats for an empty partition.
create table compute_stats_db.empty_partitioned (i int) partitioned by (j int);
alter table compute_stats_db.empty_partitioned add partition (j=1);
====
---- QUERY
compute stats compute_stats_db.empty_partitioned
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.empty_partitioned
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total',0,0,'0B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Insert non empty partition to the table with empty partition.
insert into table compute_stats_db.empty_partitioned partition (j=2) select 1;
====
---- QUERY
# Verify partition stats work with empty and non-empty partition.
drop stats compute_stats_db.empty_partitioned;
compute stats compute_stats_db.empty_partitioned;
---- RESULTS
'Updated 2 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.empty_partitioned
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total',1,1,'2B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Verify partition stats work with empty and non-empty partition.
drop stats compute_stats_db.empty_partitioned;
compute stats compute_stats_db.empty_partitioned;
---- RESULTS
'Updated 2 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.empty_partitioned
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total',1,1,'2B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# IMPALA-1614 Verify that COMPUTE STATS works on a table whose name starts with numbers.
create table compute_stats_db.`123_table` (i int, 1p int) partitioned by (2j int);
alter table compute_stats_db.`123_table` add partition (2j=1);
====
---- QUERY
compute stats compute_stats_db.`123_table`
---- RESULTS
'Updated 1 partition(s) and 2 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db.`123_table`
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total',0,0,'0B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
====
---- QUERY
# IMPALA-1629: Verify that the column stats for CHAR/VARCHAR columns are populated.
# The values of date_string_col always have exactly 8 characters. The CHAR/VARCHAR
# sizes below are chosen such that they are smaller, equal, and greater than the
# source data values, in particular, to test the CHAR padding behavior.
create table compute_stats_db.chars_tbl (
id int,
ch1 char(1),
ch2 char(8),
ch3 char(20),
ts timestamp,
vc1 varchar(1),
vc2 varchar(8),
vc3 varchar(20)
)
partitioned by (
year char(5),
day varchar(13)
);
insert overwrite compute_stats_db.chars_tbl partition(year, day)
select
id,
cast(date_string_col as char(1)),
cast(date_string_col as char(8)),
cast(date_string_col as char(20)),
timestamp_col,
cast(date_string_col as varchar(1)),
cast(date_string_col as varchar(8)),
cast(date_string_col as varchar(20)),
cast(year as char(5)),
cast(day as varchar(13))
from functional.alltypesagg
where day is null or day in (3, 7);
---- RESULTS: VERIFY_IS_EQUAL_SORTED
year=2010 /day=7/: 1000
year=2010 /day=3/: 1000
year=2010 /day=__HIVE_DEFAULT_PARTITION__/: 1000
====
---- QUERY
compute stats compute_stats_db.chars_tbl
---- RESULTS
'Updated 3 partition(s) and 8 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats compute_stats_db.chars_tbl
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',2915,-1,4,4
'ch1','CHAR(1)',1,-1,1,1
'ch2','CHAR(8)',10,-1,8,8
'ch3','CHAR(20)',10,-1,8,8
'ts','TIMESTAMP',2871,-1,16,16
'vc1','VARCHAR(1)',1,-1,1,1
'vc2','VARCHAR(8)',10,-1,8,8
'vc3','VARCHAR(20)',10,-1,8,8
'year','CHAR(5)',1,0,5,5
'day','VARCHAR(13)',3,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# Test that compute stats on a Hive-created Avro table without column defs
# works (HIVE-6308, IMPALA-867).
create table compute_stats_db.alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef;
compute stats compute_stats_db.alltypes_no_coldef
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats compute_stats_db.alltypes_no_coldef
---- RESULTS
'id','INT',0,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','INT',0,-1,4,4
'smallint_col','INT',0,-1,4,4
'int_col','INT',0,-1,4,4
'bigint_col','BIGINT',0,-1,8,8
'float_col','FLOAT',0,-1,4,4
'double_col','DOUBLE',0,-1,8,8
'date_string_col','STRING',0,-1,0,0
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====