mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
Adds erasure coding policy to introspection commands SHOW FILES, SHOW PARTITIONS, SHOW TABLE STATS, and DESCRIBE EXTENDED. Remove `throws IOException` for methods that don't throw. Removes null check for getSd because getStorageDescriptorInfo - which is called right after getTableMetaDataInformation - uses it without checking for null. Adds '$ERASURECODE_POLICY' for runtime test substitution. The test suite replaces this with the current erasure code policy - from HDFS_ERASURECODE_POLICY - or NONE to match expected output. Testing: - ran backend, end-to-end, and custom cluster tests with erasure coding - ran backend, end-to-end, and custom cluster tests with exhaustive strategy Change-Id: Idd95f2d18b3980581788c92993b6d2f53504b5e0 Reviewed-on: http://gerrit.cloudera.org:8080/19268 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Michael Smith <michael.smith@cloudera.com>
529 lines
20 KiB
Plaintext
529 lines
20 KiB
Plaintext
====
|
|
---- QUERY
|
|
# IMPALA-867: Test computing stats on Avro tables created by Hive with
|
|
# matching/mismatched column definitions and Avro schema.
|
|
# Clone the used tables here.
|
|
create table avro_hive_alltypes
|
|
like functional_avro_snap.alltypes;
|
|
create table avro_hive_alltypes_extra_coldef
|
|
like functional_avro_snap.alltypes_extra_coldef;
|
|
create table avro_hive_alltypes_missing_coldef
|
|
like functional_avro_snap.alltypes_missing_coldef;
|
|
create table avro_hive_alltypes_type_mismatch
|
|
like functional_avro_snap.alltypes_type_mismatch;
|
|
create table avro_hive_no_avro_schema
|
|
like functional_avro_snap.no_avro_schema;
|
|
====
|
|
---- QUERY
|
|
# Avro table with matching column definitions and Avro schema
|
|
compute stats avro_hive_alltypes
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','INT',0,0,4,4,-1,-1
|
|
'smallint_col','INT',0,0,4,4,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Non-empty Avro table with matching column definitions and Avro schema
|
|
create external table avro_hive_alltypes_ext
|
|
like functional_avro_snap.alltypes;
|
|
alter table avro_hive_alltypes_ext
|
|
set location '$FILESYSTEM_PREFIX/test-warehouse/alltypes_avro_snap';
|
|
alter table avro_hive_alltypes_ext recover partitions;
|
|
compute stats avro_hive_alltypes_ext;
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_alltypes_ext
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'2009','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'Total','',7300,24,regex:.*,'0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_alltypes_ext
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',7300,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,3650,3650
|
|
'tinyint_col','INT',10,0,4,4,-1,-1
|
|
'smallint_col','INT',10,0,4,4,-1,-1
|
|
'int_col','INT',10,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',10,0,8,8,-1,-1
|
|
'float_col','FLOAT',10,0,4,4,-1,-1
|
|
'double_col','DOUBLE',10,0,8,8,-1,-1
|
|
'date_string_col','STRING',736,0,8,8,-1,-1
|
|
'string_col','STRING',10,0,1,1,-1,-1
|
|
'timestamp_col','STRING',7224,0,22,21.66438293457031,-1,-1
|
|
'year','INT',2,0,4,4,-1,-1
|
|
'month','INT',12,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Non-empty Avro table with matching column definitions and Avro schema, but with
|
|
# different partition schema. Note that we use INT for tinyint_col and smallint_col,
|
|
# and STRING for timestamp_col. See HIVE_TO_AVRO_TYPE_MAP in
|
|
# testdata/bin/generate-schema-statements.py
|
|
create external table avro_hive_alltypes_str_part (
|
|
id int,
|
|
bool_col boolean,
|
|
tinyint_col int,
|
|
smallint_col int,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
date_string_col string,
|
|
string_col string,
|
|
timestamp_col string
|
|
) partitioned by (
|
|
year string,
|
|
month string
|
|
)
|
|
stored as avro
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/alltypes_avro_snap';
|
|
alter table avro_hive_alltypes_str_part recover partitions;
|
|
compute stats avro_hive_alltypes_str_part;
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_alltypes_str_part
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'2009','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2009','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'2010','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY'
|
|
'Total','',7300,24,regex:.*,'0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_alltypes_str_part
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',7300,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,3650,3650
|
|
'tinyint_col','INT',10,0,4,4,-1,-1
|
|
'smallint_col','INT',10,0,4,4,-1,-1
|
|
'int_col','INT',10,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',10,0,8,8,-1,-1
|
|
'float_col','FLOAT',10,0,4,4,-1,-1
|
|
'double_col','DOUBLE',10,0,8,8,-1,-1
|
|
'date_string_col','STRING',736,0,8,8,-1,-1
|
|
'string_col','STRING',10,0,1,1,-1,-1
|
|
'timestamp_col','STRING',7224,0,22,21.66438293457031,-1,-1
|
|
'year','STRING',2,0,-1,-1,-1,-1
|
|
'month','STRING',12,0,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Avro table with an extra column definition.
|
|
compute stats avro_hive_alltypes_extra_coldef
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 12 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_alltypes_extra_coldef
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_alltypes_extra_coldef
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','TINYINT',0,0,1,1,-1,-1
|
|
'smallint_col','SMALLINT',0,0,2,2,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','TIMESTAMP',0,0,16,16,-1,-1
|
|
'extra_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Avro table with missing two column definitions.
|
|
compute stats avro_hive_alltypes_missing_coldef
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 9 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_alltypes_missing_coldef
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_alltypes_missing_coldef
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'smallint_col','SMALLINT',0,0,2,2,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Avro table with one column definition having a different
|
|
# type than the Avro schema (bigint_col is a string).
|
|
compute stats avro_hive_alltypes_type_mismatch
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_alltypes_type_mismatch
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_alltypes_type_mismatch
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','TINYINT',0,0,1,1,-1,-1
|
|
'smallint_col','SMALLINT',0,0,2,2,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','STRING',0,0,0,0,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','TIMESTAMP',0,0,16,16,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Avro table without an Avro schema created by Hive.
|
|
# The Avro schema is inferred from the column definitions,
|
|
compute stats avro_hive_no_avro_schema
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_hive_no_avro_schema
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_hive_no_avro_schema
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','INT',0,0,4,4,-1,-1
|
|
'smallint_col','INT',0,0,4,4,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test Avro table created without any column definitions.
|
|
create table avro_impala_alltypes_no_coldefs
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro;
|
|
====
|
|
---- QUERY
|
|
compute stats avro_impala_alltypes_no_coldefs
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_impala_alltypes_no_coldefs
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_impala_alltypes_no_coldefs
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','INT',0,0,4,4,-1,-1
|
|
'smallint_col','INT',0,0,4,4,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1104: Test computing stats on Avro tables created by Impala
|
|
# with mismatched column definitions and Avro schema. Mismatched column name.
|
|
create table avro_impala_alltypes_bad_colname
|
|
(id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int,
|
|
bigint_col bigint, float_col float, double_col double, date_string_col string,
|
|
string_col string, timestamp_col timestamp)
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro;
|
|
====
|
|
---- QUERY
|
|
compute stats avro_impala_alltypes_bad_colname
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_impala_alltypes_bad_colname
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_impala_alltypes_bad_colname
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','INT',0,0,4,4,-1,-1
|
|
'smallint_col','INT',0,0,4,4,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1104: Test computing stats on Avro tables created by Impala
|
|
# with mismatched column definitions and Avro schema. Mismatched column type.
|
|
create table avro_impala_alltypes_bad_coltype
|
|
(id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int,
|
|
bigint_col bigint, float_col float, double_col bigint, date_string_col string,
|
|
string_col string, timestamp_col timestamp)
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro;
|
|
====
|
|
---- QUERY
|
|
compute stats avro_impala_alltypes_bad_coltype
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats avro_impala_alltypes_bad_coltype
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
|
---- RESULTS
|
|
'Total','',0,0,'0B','0B','','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats avro_impala_alltypes_bad_coltype
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','INT',0,0,4,4,-1,-1
|
|
'smallint_col','INT',0,0,4,4,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','STRING',0,0,0,0,-1,-1
|
|
'year','INT',0,0,4,4,-1,-1
|
|
'month','INT',0,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test that compute stats on a Hive-created Avro table without column defs
|
|
# works (HIVE-6308, IMPALA-867).
|
|
create table alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef;
|
|
compute stats alltypes_no_coldef
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypes_no_coldef
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4,-1,-1
|
|
'bool_col','BOOLEAN',2,0,1,1,0,0
|
|
'tinyint_col','INT',0,0,4,4,-1,-1
|
|
'smallint_col','INT',0,0,4,4,-1,-1
|
|
'int_col','INT',0,0,4,4,-1,-1
|
|
'bigint_col','BIGINT',0,0,8,8,-1,-1
|
|
'float_col','FLOAT',0,0,4,4,-1,-1
|
|
'double_col','DOUBLE',0,0,8,8,-1,-1
|
|
'date_string_col','STRING',0,0,0,0,-1,-1
|
|
'string_col','STRING',0,0,0,0,-1,-1
|
|
'timestamp_col','STRING',0,0,0,0,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|