Files
impala/testdata/workloads/functional-query/queries/QueryTest/compute-stats-avro.test
Chang Wu a93f2c2675 IMPALA-8205: Support number of true and false statistics for boolean column
This change compute the real number of true and false statistics
information for boolean columns. Before this, impala used to set
numTrues and numFalses to hardcoded -1 to indicate that its
statistics is missing.

Test Done:
Append the numTrue and numFalse test for all the statistics-related
test cases including the non-incremental, incremental and other test
cases.

Change-Id: I991bee8e7fdc644d908289f5fe2ee8032cc2c431
Reviewed-on: http://gerrit.cloudera.org:8080/14666
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-05-12 23:29:04 +00:00

378 lines
12 KiB
Plaintext

====
---- QUERY
# IMPALA-867: Test computing stats on Avro tables created by Hive with
# matching/mismatched column definitions and Avro schema.
# Clone the used tables here.
create table avro_hive_alltypes
like functional_avro_snap.alltypes;
create table avro_hive_alltypes_extra_coldef
like functional_avro_snap.alltypes_extra_coldef;
create table avro_hive_alltypes_missing_coldef
like functional_avro_snap.alltypes_missing_coldef;
create table avro_hive_alltypes_type_mismatch
like functional_avro_snap.alltypes_type_mismatch;
create table avro_hive_no_avro_schema
like functional_avro_snap.no_avro_schema;
====
---- QUERY
# Avro table with matching column definitions and Avro schema
compute stats avro_hive_alltypes
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','INT',0,0,4,4,-1,-1
'smallint_col','INT',0,0,4,4,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Avro table with an extra column definition.
compute stats avro_hive_alltypes_extra_coldef
---- RESULTS
'Updated 0 partition(s) and 12 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_alltypes_extra_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes_extra_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','TINYINT',0,0,1,1,-1,-1
'smallint_col','SMALLINT',0,0,2,2,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','TIMESTAMP',0,0,16,16,-1,-1
'extra_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Avro table with missing two column definitions.
compute stats avro_hive_alltypes_missing_coldef
---- RESULTS
'Updated 0 partition(s) and 9 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_alltypes_missing_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes_missing_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'smallint_col','SMALLINT',0,0,2,2,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Avro table with one column definition having a different
# type than the Avro schema (bigint_col is a string).
compute stats avro_hive_alltypes_type_mismatch
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_alltypes_type_mismatch
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes_type_mismatch
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','TINYINT',0,0,1,1,-1,-1
'smallint_col','SMALLINT',0,0,2,2,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','STRING',0,0,0,0,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','TIMESTAMP',0,0,16,16,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Avro table without an Avro schema created by Hive.
# The Avro schema is inferred from the column definitions,
compute stats avro_hive_no_avro_schema
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_no_avro_schema
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_no_avro_schema
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','INT',0,0,4,4,-1,-1
'smallint_col','INT',0,0,4,4,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Test Avro table created without any column definitions.
create table avro_impala_alltypes_no_coldefs
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats avro_impala_alltypes_no_coldefs
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_impala_alltypes_no_coldefs
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_impala_alltypes_no_coldefs
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','INT',0,0,4,4,-1,-1
'smallint_col','INT',0,0,4,4,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column name.
create table avro_impala_alltypes_bad_colname
(id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int,
bigint_col bigint, float_col float, double_col double, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats avro_impala_alltypes_bad_colname
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_impala_alltypes_bad_colname
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_impala_alltypes_bad_colname
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','INT',0,0,4,4,-1,-1
'smallint_col','INT',0,0,4,4,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column type.
create table avro_impala_alltypes_bad_coltype
(id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int,
bigint_col bigint, float_col float, double_col bigint, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats avro_impala_alltypes_bad_coltype
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_impala_alltypes_bad_coltype
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_impala_alltypes_bad_coltype
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','INT',0,0,4,4,-1,-1
'smallint_col','INT',0,0,4,4,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','STRING',0,0,0,0,-1,-1
'year','INT',0,0,4,4,-1,-1
'month','INT',0,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Test that compute stats on a Hive-created Avro table without column defs
# works (HIVE-6308, IMPALA-867).
create table alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef;
compute stats alltypes_no_coldef
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats alltypes_no_coldef
---- RESULTS
'id','INT',0,0,4,4,-1,-1
'bool_col','BOOLEAN',2,0,1,1,0,0
'tinyint_col','INT',0,0,4,4,-1,-1
'smallint_col','INT',0,0,4,4,-1,-1
'int_col','INT',0,0,4,4,-1,-1
'bigint_col','BIGINT',0,0,8,8,-1,-1
'float_col','FLOAT',0,0,4,4,-1,-1
'double_col','DOUBLE',0,0,8,8,-1,-1
'date_string_col','STRING',0,0,0,0,-1,-1
'string_col','STRING',0,0,0,0,-1,-1
'timestamp_col','STRING',0,0,0,0,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====