mirror of
https://github.com/apache/impala.git
synced 2026-01-04 18:00:57 -05:00
This patch improves the performance of the DDL queries
"Alter table add partition" and "Alter table drop partition"
as the number of partitions is scaled up.
The issue was that every time a partition was added or dropped,
the entire block metadata for that table was reloaded. This
operation was highly expensive especially as the number
of partitions became larger.
This patch handles this by adding/dropping only the added/dropped
partition's metadata to the hdfsTable (adding/dropping it to/from
the internal partition list), and incrementally updating the
corresponding data structures instead of refreshing them from scratch.
The following are the time improvements observed.
Number of partitions Time taken to add/drop Time to add/drop
(existing) a new partition (before) a new partition (now)
1 1.02s 1.02s
10 0.27s 0.27s
100 0.14s 0.14s
500 0.35s 0.35s
1000 0.91s 0.51s
10000 11.72s 0.85s
20000 21.92s 0.87s
Out of this total time (for the worst case), around 0.50s is spent in
adding and dropping the partition to the hive meta store and rest of the
time is spent in updating the catalog.
Change-Id: I359ab0af921543c0fdcb975c14b05f80f93fe803
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3291
Reviewed-by: Anusha Dasarakothapalli <anusha.dasarakothapalli@cloudera.com>
Tested-by: jenkins
707 lines
21 KiB
Plaintext
707 lines
21 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test computing stats on a partitioned text table with all types
|
|
create table compute_stats_db.alltypes like functional.alltypes;
|
|
insert into compute_stats_db.alltypes partition(year, month)
|
|
select * from functional.alltypes;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypes
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
2009,1,310,1,'24.56KB','NOT CACHED','TEXT'
|
|
2009,2,280,1,'22.27KB','NOT CACHED','TEXT'
|
|
2009,3,310,1,'24.67KB','NOT CACHED','TEXT'
|
|
2009,4,300,1,'24.06KB','NOT CACHED','TEXT'
|
|
2009,5,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,6,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2009,7,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,8,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,9,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2009,10,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,11,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2009,12,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,1,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,2,280,1,'22.54KB','NOT CACHED','TEXT'
|
|
2010,3,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,4,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,5,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,6,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,7,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,8,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,9,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,10,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,11,300,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,12,310,1,'24.97KB','NOT CACHED','TEXT'
|
|
Total,,7300,24,'586.84KB','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',8161,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',666,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',5678,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# drop stats from this table
|
|
drop stats compute_stats_db.alltypes
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
2009,1,-1,1,'24.56KB','NOT CACHED','TEXT'
|
|
2009,2,-1,1,'22.27KB','NOT CACHED','TEXT'
|
|
2009,3,-1,1,'24.67KB','NOT CACHED','TEXT'
|
|
2009,4,-1,1,'24.06KB','NOT CACHED','TEXT'
|
|
2009,5,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,6,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2009,7,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,8,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,9,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2009,10,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2009,11,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2009,12,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,1,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,2,-1,1,'22.54KB','NOT CACHED','TEXT'
|
|
2010,3,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,4,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,5,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,6,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,7,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,8,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,9,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,10,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
2010,11,-1,1,'24.16KB','NOT CACHED','TEXT'
|
|
2010,12,-1,1,'24.97KB','NOT CACHED','TEXT'
|
|
Total,,-1,24,'586.84KB','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Note - the NDV for partition columns is read from the table metadata.
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
# Add partitions with NULL values and check for stats.
|
|
---- QUERY
|
|
alter table compute_stats_db.alltypes add partition (year=NULL, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',3,1,4,4
|
|
'month','INT',13,1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table compute_stats_db.alltypes add partition (year=2011, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',4,1,4,4
|
|
'month','INT',13,2,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
# Drop the partitions with NULL values and check for stats.
|
|
---- QUERY
|
|
alter table compute_stats_db.alltypes drop partition (year=NULL, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',3,0,4,4
|
|
'month','INT',13,1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table compute_stats_db.alltypes drop partition (year=2011, month=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'bool_col','BOOLEAN',-1,-1,1,1
|
|
'tinyint_col','TINYINT',-1,-1,1,1
|
|
'smallint_col','SMALLINT',-1,-1,2,2
|
|
'int_col','INT',-1,-1,4,4
|
|
'bigint_col','BIGINT',-1,-1,8,8
|
|
'float_col','FLOAT',-1,-1,4,4
|
|
'double_col','DOUBLE',-1,-1,8,8
|
|
'date_string_col','STRING',-1,-1,-1,-1
|
|
'string_col','STRING',-1,-1,-1,-1
|
|
'timestamp_col','TIMESTAMP',-1,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# drop stats from this table a second time, should not throw an error.
|
|
drop stats compute_stats_db.alltypes
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an partitioned text table with all types
|
|
create table compute_stats_db.alltypesnopart like functional.alltypesnopart;
|
|
insert into compute_stats_db.alltypesnopart
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
|
|
double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypessmall;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypesnopart
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypesnopart
|
|
---- LABELS
|
|
#ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
100,3,'7.73KB','NOT CACHED','TEXT'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypesnopart
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',105,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',12,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',101,-1,16,16
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on a partitioned parquet table with all types
|
|
create table compute_stats_db.alltypes_parquet
|
|
like functional_parquet.alltypes;
|
|
insert into compute_stats_db.alltypes_parquet partition(year, month)
|
|
select * from functional.alltypes;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypes_parquet
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes_parquet
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
2009,1,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,2,280,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,3,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,4,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,5,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,6,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,7,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,8,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,9,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,10,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,11,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2009,12,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,1,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,2,280,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,3,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,4,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,5,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,6,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,7,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,8,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,9,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,10,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,11,300,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
2010,12,310,1,regex:.+KB,'NOT CACHED','PARQUET'
|
|
Total,,7300,24,regex:.+KB,'0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes_parquet
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',8161,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',666,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',5678,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an HBase table
|
|
create table compute_stats_db.alltypessmall_hbase
|
|
like functional_hbase.alltypessmall;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypessmall_hbase
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 13 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypessmall_hbase
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
regex:.+,'',regex:.+,regex:.+KB
|
|
regex:.+,'1',regex:.+,regex:.+KB
|
|
regex:.+,'3',regex:.+,regex:.+KB
|
|
regex:.+,'5',regex:.+,regex:.+KB
|
|
regex:.+,'7',regex:.+,regex:.+KB
|
|
regex:.+,'9',regex:.+,regex:.+KB
|
|
'Total','',regex:.+,regex:.+KB
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypessmall_hbase
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',105,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'date_string_col','STRING',12,-1,8,8
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'int_col','INT',10,-1,4,4
|
|
'month','INT',4,-1,4,4
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',101,-1,16,16
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'year','INT',1,-1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an binary HBase table
|
|
create table compute_stats_db.alltypessmall_hbase_bin
|
|
like functional_hbase.alltypessmallbinary;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypessmall_hbase_bin
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 13 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY: VERIFY_IS_EQUAL
|
|
show table stats compute_stats_db.alltypessmall_hbase_bin
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS
|
|
regex:.+,'',regex:.+,regex:.+
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypessmall_hbase_bin
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',105,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'date_string_col','STRING',12,-1,8,8
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'int_col','INT',10,-1,4,4
|
|
'month','INT',4,-1,4,4
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',101,-1,16,16
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'year','INT',1,-1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an empty table
|
|
create table compute_stats_db.alltypes_empty like functional_rc_snap.alltypes
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.alltypes_empty
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.alltypes_empty
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
Total,,0,0,'0B','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.alltypes_empty
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',0,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','TIMESTAMP',0,-1,16,16
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMP-1227: Test computing stats on an HBase table that has a
|
|
# complex-typed column that Impala does not yet support.
|
|
create table compute_stats_db.allcomplextypes
|
|
like functional_hbase.allcomplextypes;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.allcomplextypes
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 3 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY: VERIFY_IS_EQUAL
|
|
show table stats compute_stats_db.allcomplextypes
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS
|
|
regex:.+,'',regex:.+,regex:.+
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.allcomplextypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'array_array_col','ARRAY<ARRAY<INT>>',-1,-1,-1,-1
|
|
'array_map_col','MAP<STRING,ARRAY<INT>>',-1,-1,-1,-1
|
|
'complex_nested_struct_col','STRUCT<f1:INT,f2:ARRAY<STRUCT<f11:BIGINT,f12:MAP<STRING,STRUCT<f21:BIGINT>>>>>',-1,-1,-1,-1
|
|
'complex_struct_col','STRUCT<f1:INT,f2:ARRAY<INT>,f3:MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'int_array_col','ARRAY<INT>',-1,-1,-1,-1
|
|
'int_map_col','MAP<STRING,INT>',-1,-1,-1,-1
|
|
'int_struct_col','STRUCT<f1:INT,f2:INT>',-1,-1,-1,-1
|
|
'map_array_col','ARRAY<MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'map_map_col','MAP<STRING,MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'month','INT',0,-1,4,4
|
|
'nested_struct_col','STRUCT<f1:INT,f2:STRUCT<f11:BIGINT,f12:STRUCT<f21:BIGINT>>>',-1,-1,-1,-1
|
|
'struct_array_col','ARRAY<STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
|
|
'struct_map_col','MAP<STRING,STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
|
|
'year','INT',0,-1,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-867: Test computing stats on Avro tables with matching/mismatched
|
|
# column definitions and Avro schema. Clone the used tables here.
|
|
create table compute_stats_db.avro_alltypes
|
|
like functional_avro_snap.alltypes;
|
|
create table compute_stats_db.avro_alltypes_extra_coldef
|
|
like functional_avro_snap.alltypes_extra_coldef;
|
|
create table compute_stats_db.avro_alltypes_missing_coldef
|
|
like functional_avro_snap.alltypes_missing_coldef;
|
|
create table compute_stats_db.avro_alltypes_type_mismatch
|
|
like functional_avro_snap.alltypes_type_mismatch;
|
|
====
|
|
---- QUERY
|
|
# Avro table with matching column definitions and Avro schema
|
|
compute stats compute_stats_db.avro_alltypes
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
Total,,0,0,'0B','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','INT',0,-1,4,4
|
|
'smallint_col','INT',0,-1,4,4
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table with an extra column definition.
|
|
compute stats compute_stats_db.avro_alltypes_extra_coldef
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 12 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_alltypes_extra_coldef
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
Total,,0,0,'0B','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_alltypes_extra_coldef
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',0,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','TIMESTAMP',0,-1,16,16
|
|
'extra_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table with missing two column definitions.
|
|
compute stats compute_stats_db.avro_alltypes_missing_coldef
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 9 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_alltypes_missing_coldef
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
Total,,0,0,'0B','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_alltypes_missing_coldef
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','BIGINT',0,-1,8,8
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Avro table with one column definition having a different
|
|
# type than the Avro schema (bigint_col is a string).
|
|
compute stats compute_stats_db.avro_alltypes_type_mismatch
|
|
---- RESULTS
|
|
'Updated 0 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.avro_alltypes_type_mismatch
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
Total,,0,0,'0B','0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.avro_alltypes_type_mismatch
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',0,-1,1,1
|
|
'smallint_col','SMALLINT',0,-1,2,2
|
|
'int_col','INT',0,-1,4,4
|
|
'bigint_col','STRING',0,-1,0,0
|
|
'float_col','FLOAT',0,-1,4,4
|
|
'double_col','DOUBLE',0,-1,8,8
|
|
'date_string_col','STRING',0,-1,0,0
|
|
'string_col','STRING',0,-1,0,0
|
|
'timestamp_col','TIMESTAMP',0,-1,16,16
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# For IMPALA-1055, using a database called `parquet` to test cases where the name of
|
|
# the database is a keyword.
|
|
CREATE TABLE `parquet`.billion_parquet(id INT);
|
|
====
|
|
---- QUERY
|
|
COMPUTE STATS `parquet`.billion_parquet
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE `parquet`.`parquet`(id INT)
|
|
====
|
|
---- QUERY
|
|
COMPUTE STATS `parquet`.`parquet`
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|