mirror of
https://github.com/apache/impala.git
synced 2026-01-03 15:00:52 -05:00
Hive allows creating Avro tables without an explicit Avro schema since 0.14.0. For such tables, the Avro schema is inferred from the column definitions, and not stored in the metadata at all (no Avro schema literal or Avro schema file). This patch adds support for loading the metadata of such tables, although Impala currently cannot create such tables (expect a follow-on patch). Change-Id: I9e66921ffbeff7ce6db9619bcfb30278b571cd95 Reviewed-on: http://gerrit.cloudera.org:8080/538 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
929 lines
31 KiB
Plaintext
929 lines
31 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test computing stats on a partitioned text table with all types
|
|
create table compute_stats_db.alltypes like functional.alltypes;
|
|
insert into compute_stats_db.alltypes partition(year, month)
|
|
select * from functional.alltypes;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypes
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total','',7300,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',7505,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',736,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',7554,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# drop stats from this table
|
|
drop stats compute_stats_db.alltypes
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',-1,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','2',-1,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','3',-1,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','4',-1,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','1',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','2',-1,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','3',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','4',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total','',-1,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Note - the NDV for partition columns is read from the table metadata.
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Add partitions with NULL values and check for stats.
|
|
alter table compute_stats_db.alltypes add partition (year=NULL, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',3,1,4,4
|
|
'month','INT',13,1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table compute_stats_db.alltypes add partition (year=2011, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',4,1,4,4
|
|
'month','INT',13,2,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Drop the partitions with NULL values and check for stats.
|
|
alter table compute_stats_db.alltypes drop partition (year=NULL, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',3,0,4,4
|
|
'month','INT',13,1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table compute_stats_db.alltypes drop partition (year=2011, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# drop stats from this table a second time, should not throw an error.
|
|
drop stats compute_stats_db.alltypes
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an partitioned text table with all types
|
|
create table compute_stats_db.alltypesnopart like functional.alltypesnopart;
|
|
insert into compute_stats_db.alltypesnopart
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
|
|
double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypessmall;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypesnopart
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypesnopart
|
|
---- LABELS
|
|
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
100,3,'7.73KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypesnopart
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',99,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',12,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',101,-1,16,16
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on a partitioned parquet table with all types
|
|
create table compute_stats_db.alltypes_parquet
|
|
like functional_parquet.alltypes;
|
|
insert into compute_stats_db.alltypes_parquet partition(year, month)
|
|
select * from functional.alltypes;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypes_parquet
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes_parquet
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2009','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'2010','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
|
|
'Total','',7300,24,regex:.+KB,'0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes_parquet
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',7505,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',736,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',7554,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an empty table
|
|
create table compute_stats_db.alltypes_empty like functional_rc_snap.alltypes
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypes_empty
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes_empty
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes_empty
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',0,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','TIMESTAMP',0,-1,16,16
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-867: Test computing stats on Avro tables created by Hive with
|
|
# matching/mismatched column definitions and Avro schema.
|
|
# Clone the used tables here.
|
|
create table compute_stats_db.avro_hive_alltypes
|
|
like functional_avro_snap.alltypes;
|
|
create table compute_stats_db.avro_hive_alltypes_extra_coldef
|
|
like functional_avro_snap.alltypes_extra_coldef;
|
|
create table compute_stats_db.avro_hive_alltypes_missing_coldef
|
|
like functional_avro_snap.alltypes_missing_coldef;
|
|
create table compute_stats_db.avro_hive_alltypes_type_mismatch
|
|
like functional_avro_snap.alltypes_type_mismatch;
|
|
create table compute_stats_db.avro_hive_no_avro_schema
|
|
like functional_avro_snap.no_avro_schema;
|
|
====
|
|
---- QUERY
|
|
# Avro table with matching column definitions and Avro schema
|
|
compute stats compute_stats_db.avro_hive_alltypes
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_hive_alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_hive_alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table with an extra column definition.
|
|
compute stats compute_stats_db.avro_hive_alltypes_extra_coldef
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 12 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_hive_alltypes_extra_coldef
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_hive_alltypes_extra_coldef
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',0,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','TIMESTAMP',0,-1,16,16
|
|
'extra_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table with missing two column definitions.
|
|
compute stats compute_stats_db.avro_hive_alltypes_missing_coldef
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 9 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_hive_alltypes_missing_coldef
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_hive_alltypes_missing_coldef
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table with one column definition having a different
|
|
# type than the Avro schema (bigint_col is a string).
|
|
compute stats compute_stats_db.avro_hive_alltypes_type_mismatch
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_hive_alltypes_type_mismatch
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_hive_alltypes_type_mismatch
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',0,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','STRING',0,-1,0,0
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','TIMESTAMP',0,-1,16,16
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table without an Avro schema created by Hive.
|
|
# The Avro schema is inferred from the column definitions,
|
|
compute stats compute_stats_db.avro_hive_no_avro_schema
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_hive_no_avro_schema
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_hive_no_avro_schema
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test Avro table created without any column definitions.
|
|
create table compute_stats_db.avro_impala_alltypes_no_coldefs
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.avro_impala_alltypes_no_coldefs
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_impala_alltypes_no_coldefs
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_impala_alltypes_no_coldefs
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1104: Test computing stats on Avro tables created by Impala
|
|
# with mismatched column definitions and Avro schema. Mismatched column name.
|
|
create table compute_stats_db.avro_impala_alltypes_bad_colname
|
|
(id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int,
|
|
bigint_col bigint, float_col float, double_col double, date_string_col string,
|
|
string_col string, timestamp_col timestamp)
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.avro_impala_alltypes_bad_colname
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_impala_alltypes_bad_colname
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_impala_alltypes_bad_colname
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1104: Test computing stats on Avro tables created by Impala
|
|
# with mismatched column definitions and Avro schema. Mismatched column type.
|
|
create table compute_stats_db.avro_impala_alltypes_bad_coltype
|
|
(id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int,
|
|
bigint_col bigint, float_col float, double_col bigint, date_string_col string,
|
|
string_col string, timestamp_col timestamp)
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.avro_impala_alltypes_bad_coltype
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_impala_alltypes_bad_coltype
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_impala_alltypes_bad_coltype
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# For IMPALA-1055, using a database called `parquet` to test cases where the name of
|
|
# the database is a keyword.
|
|
CREATE TABLE `parquet`.billion_parquet(id INT);
|
|
====
|
|
---- QUERY
|
|
COMPUTE STATS `parquet`.billion_parquet
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE `parquet`.`parquet`(id INT)
|
|
====
|
|
---- QUERY
|
|
COMPUTE STATS `parquet`.`parquet`
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-883: Compute table stats for an empty partition.
|
|
create table compute_stats_db.empty_partitioned (i int) partitioned by (j int);
|
|
alter table compute_stats_db.empty_partitioned add partition (j=1);
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.empty_partitioned
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.empty_partitioned
|
|
---- RESULTS
|
|
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Insert non empty partition to the table with empty partition.
|
|
insert into table compute_stats_db.empty_partitioned partition (j=2) select 1;
|
|
====
|
|
---- QUERY
|
|
# Verify partition stats work with empty and non-empty partition.
|
|
drop stats compute_stats_db.empty_partitioned;
|
|
compute stats compute_stats_db.empty_partitioned;
|
|
---- RESULTS
|
|
'Updated 2 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.empty_partitioned
|
|
---- RESULTS
|
|
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total',1,1,'2B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Verify partition stats work with empty and non-empty partition.
|
|
drop stats compute_stats_db.empty_partitioned;
|
|
compute stats compute_stats_db.empty_partitioned;
|
|
---- RESULTS
|
|
'Updated 2 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.empty_partitioned
|
|
---- RESULTS
|
|
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total',1,1,'2B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1614 Verify that COMPUTE STATS works on a table whose name starts with numbers.
|
|
create table compute_stats_db.`123_table` (i int, 1p int) partitioned by (2j int);
|
|
alter table compute_stats_db.`123_table` add partition (2j=1);
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.`123_table`
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 2 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.`123_table`
|
|
---- RESULTS
|
|
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1629: Verify that the column stats for CHAR/VARCHAR columns are populated.
|
|
# The values of date_string_col always have exactly 8 characters. The CHAR/VARCHAR
|
|
# sizes below are chosen such that they are smaller, equal, and greater than the
|
|
# source data values, in particular, to test the CHAR padding behavior.
|
|
create table compute_stats_db.chars_tbl (
|
|
id int,
|
|
ch1 char(1),
|
|
ch2 char(8),
|
|
ch3 char(20),
|
|
ts timestamp,
|
|
vc1 varchar(1),
|
|
vc2 varchar(8),
|
|
vc3 varchar(20)
|
|
)
|
|
partitioned by (
|
|
year char(5),
|
|
day varchar(13)
|
|
);
|
|
|
|
insert overwrite compute_stats_db.chars_tbl partition(year, day)
|
|
select
|
|
id,
|
|
cast(date_string_col as char(1)),
|
|
cast(date_string_col as char(8)),
|
|
cast(date_string_col as char(20)),
|
|
timestamp_col,
|
|
cast(date_string_col as varchar(1)),
|
|
cast(date_string_col as varchar(8)),
|
|
cast(date_string_col as varchar(20)),
|
|
cast(year as char(5)),
|
|
cast(day as varchar(13))
|
|
from functional.alltypesagg
|
|
where day is null or day in (3, 7);
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
year=2010 /day=7/: 1000
|
|
year=2010 /day=3/: 1000
|
|
year=2010 /day=__HIVE_DEFAULT_PARTITION__/: 1000
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.chars_tbl
|
|
---- RESULTS
|
|
'Updated 3 partition(s) and 8 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.chars_tbl
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',2915,-1,4,4
|
|
'ch1','CHAR(1)',1,-1,1,1
|
|
'ch2','CHAR(8)',10,-1,8,8
|
|
'ch3','CHAR(20)',10,-1,8,8
|
|
'ts','TIMESTAMP',2871,-1,16,16
|
|
'vc1','VARCHAR(1)',1,-1,1,1
|
|
'vc2','VARCHAR(8)',10,-1,8,8
|
|
'vc3','VARCHAR(20)',10,-1,8,8
|
|
'year','CHAR(5)',1,0,5,5
|
|
'day','VARCHAR(13)',3,1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test that compute stats on a Hive-created Avro table without column defs
|
|
# works (HIVE-6308, IMPALA-867).
|
|
create table compute_stats_db.alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef;
|
|
compute stats compute_stats_db.alltypes_no_coldef
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes_no_coldef
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|