mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
Adds the TABLESAMPLE clause for COMPUTE STATS.
Syntax:
COMPUTE STATS <table> TABLESAMPLE SYSTEM(<number>) [REPEATABLE(<number>)]
Computes and replaces the table-level row count and total file size,
as well as all table-level column statistics. Existing partition-level
row counts are not modified.
The TABLESAMPLE clause can be used to limit the scanned data volume to
a desired percentage. When sampling, the unmodified results of the
COMPUTE STATS queries are sent to the CatalogServer. There, the stats
are extrapolated before storing them into the HMS so as not to confuse
other engines like Hive/SparkSQL which may rely on the shared HMS
fields being accurate.
Limitations
- Only works for HDFS tables
- TABLESAMPLE is not supported for COMPUTE INCREMENTAL STATS
- TABLESAMPLE requires --enable_stats_extrapolation=true
Changes to EXPLAIN
The stored statistics from the HMS are more clearly displayed under
a 'stored statistics' section. Example:
00:SCAN HDFS [functional.alltypes, RANDOM]
partitions=24/24 files=24 size=478.45KB
stored statistics:
table: rows=7300 size=478.45KB
partitions: 24/24 rows=7300
columns: all
Testing:
- added new functional tests
- core/hdfs run passed
Change-Id: I7f3e72471ac563adada4a4156033a85852b7c8b7
Reviewed-on: http://gerrit.cloudera.org:8080/8136
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins
625 lines
26 KiB
Plaintext
625 lines
26 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test computing stats on a partitioned text table with all types
|
|
create table alltypes_incremental like functional.alltypes;
|
|
insert into alltypes_incremental partition(year, month)
|
|
select * from functional.alltypes;
|
|
====
|
|
---- QUERY
|
|
compute incremental stats alltypes_incremental
|
|
---- RESULTS
|
|
'Updated 24 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats alltypes_incremental
|
|
---- RESULTS
|
|
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',7300,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypes_incremental
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',7300,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',736,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',7300,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
drop incremental stats alltypes_incremental partition(year=2010, month=12)
|
|
====
|
|
---- QUERY
|
|
show table stats alltypes_incremental;
|
|
---- RESULTS
|
|
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total','',7300,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
compute incremental stats alltypes_incremental
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats alltypes_incremental;
|
|
---- RESULTS
|
|
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',7300,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypes_incremental
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',7300,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',736,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',7300,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
create table incremental_empty_partitioned (i int) partitioned by (j int);
|
|
alter table incremental_empty_partitioned add partition (j=1);
|
|
====
|
|
---- QUERY
|
|
compute incremental stats incremental_empty_partitioned;
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats incremental_empty_partitioned;
|
|
---- RESULTS
|
|
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop stats incremental_empty_partitioned;
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2199: Test that compute incremental stats with a partition spec on an empty partition populates the row count.
|
|
compute incremental stats incremental_empty_partitioned partition(j=1);
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats incremental_empty_partitioned;
|
|
---- RESULTS
|
|
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop incremental stats alltypes_incremental partition(year=2010, month=1);
|
|
drop incremental stats alltypes_incremental partition(year=2010, month=2);
|
|
====
|
|
---- QUERY
|
|
compute incremental stats alltypes_incremental partition(year=2010, month=2);
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats alltypes_incremental;
|
|
---- RESULTS
|
|
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',6990,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypes_incremental
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',6990,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',688,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',6990,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Drop and recompute incremental stats for general partition expressions
|
|
drop incremental stats alltypes_incremental partition(year=2010);
|
|
====
|
|
---- QUERY
|
|
compute incremental stats alltypes_incremental partition(year=2010);
|
|
---- RESULTS
|
|
'Updated 12 partition(s) and 11 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats alltypes_incremental;
|
|
---- RESULTS
|
|
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',7300,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypes_incremental
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',7300,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',736,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',7300,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Confirm that dropping stats drops incremental stats as well
|
|
drop stats alltypes_incremental;
|
|
show table stats alltypes_incremental;
|
|
---- RESULTS
|
|
'2009','1',-1,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','2',-1,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','3',-1,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','4',-1,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2009','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','1',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','2',-1,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','3',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','4',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'2010','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
'Total','',-1,24,'586.84KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test that many partition keys work correctly
|
|
create table incremental_many_part_keys(col int)
|
|
partitioned by (p1 int, p2 int, p3 int, p4 int, p5 int, p6 int);
|
|
insert into incremental_many_part_keys
|
|
partition(p1=1, p2=2, p3=3, p4=4, p5=5, p6=6) values(1);
|
|
====
|
|
---- QUERY
|
|
compute incremental stats incremental_many_part_keys;
|
|
show table stats incremental_many_part_keys;
|
|
---- RESULTS
|
|
'1','2','3','4','5','6',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','','','','','',1,1,'2B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING, STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table incremental_null_part_key(col int) partitioned by (p int);
|
|
insert into incremental_null_part_key partition(p) values(1,NULL), (1,2);
|
|
compute incremental stats incremental_null_part_key partition(p=2);
|
|
====
|
|
---- QUERY
|
|
compute incremental stats incremental_null_part_key partition(p=NULL);
|
|
show table stats incremental_null_part_key;
|
|
---- RESULTS
|
|
'NULL',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total',2,2,'4B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Check that incremental stats queries handle partitions with keyword names
|
|
create table incremental_keyword_part_key(col int)
|
|
partitioned by (`date` int, `table` string);
|
|
insert into incremental_keyword_part_key
|
|
partition(`date`=1, `table`='a') values(2);
|
|
compute incremental stats incremental_keyword_part_key
|
|
partition(`date`=1, `table`='a');
|
|
show table stats incremental_keyword_part_key;
|
|
---- RESULTS
|
|
'1','a',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',1,1,'2B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4170: Check that incremental stats queries handles
|
|
# new partitions with keyword names.
|
|
insert into incremental_keyword_part_key
|
|
partition(`date`=2, `table`='b') values(3);
|
|
insert into incremental_keyword_part_key
|
|
partition(`date`=NULL, `table`='') values(4);
|
|
compute incremental stats incremental_keyword_part_key;
|
|
show table stats incremental_keyword_part_key;
|
|
---- RESULTS
|
|
'NULL','NULL',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'1','a',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2','b',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',3,3,'6B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop stats incremental_keyword_part_key;
|
|
compute incremental stats incremental_keyword_part_key;
|
|
show table stats incremental_keyword_part_key;
|
|
---- RESULTS
|
|
'NULL','NULL',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'1','a',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2','b',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total','',3,3,'6B','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table incremental_string_part_value(col int) partitioned by
|
|
(p string);
|
|
insert into incremental_string_part_value partition(p="test_string")
|
|
values(2);
|
|
compute incremental stats incremental_string_part_value
|
|
partition(p="test_string");
|
|
show table stats incremental_string_part_value;
|
|
---- RESULTS
|
|
'test_string',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total',1,1,'2B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Check that dropping a column still allows stats to be computed
|
|
create table incremental_drop_column (a int, b int, c int)
|
|
partitioned by (d int);
|
|
alter table incremental_drop_column add partition (d=1);
|
|
insert into incremental_drop_column partition(d=1) values (4,4,4);
|
|
compute incremental stats incremental_drop_column;
|
|
alter table incremental_drop_column drop column c;
|
|
alter table incremental_drop_column drop column b;
|
|
compute incremental stats incremental_drop_column;
|
|
show table stats incremental_drop_column;
|
|
---- RESULTS
|
|
'1',1,1,'6B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total',1,1,'6B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Check that adding a column invalidates all incremental stats
|
|
create table incremental_add_column (col int) partitioned by (p int);
|
|
insert into incremental_add_column partition(p) values(1,1),(2,2);
|
|
compute incremental stats incremental_add_column;
|
|
insert into incremental_add_column partition(p) values(1,1);
|
|
alter table incremental_add_column add columns (c int);
|
|
compute incremental stats incremental_add_column;
|
|
show table stats incremental_add_column;
|
|
---- RESULTS
|
|
'1',2,2,'4B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
|
|
'Total',3,3,'6B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table incremental_no_partitions (col int) partitioned by (p int);
|
|
compute incremental stats incremental_no_partitions;
|
|
show table stats incremental_no_partitions;
|
|
---- RESULTS
|
|
'Total',0,0,'0B','0B','','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table incremental_not_partitioned (col int);
|
|
insert into incremental_not_partitioned values(1),(2);
|
|
compute incremental stats incremental_not_partitioned;
|
|
show table stats incremental_not_partitioned;
|
|
---- RESULTS
|
|
2,1,'4B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1629: Verify that the column stats for CHAR/VARCHAR columns are populated.
|
|
# The values of date_string_col always have exactly 8 characters. The CHAR/VARCHAR
|
|
# sizes below are chosen such that they are smaller, equal, and greater than the
|
|
# source data values, in particular, to test the CHAR padding behavior.
|
|
create table chars_tbl (
|
|
id int,
|
|
ch1 char(1),
|
|
ch2 char(8),
|
|
ch3 char(20),
|
|
ts timestamp,
|
|
vc1 varchar(1),
|
|
vc2 varchar(8),
|
|
vc3 varchar(20)
|
|
)
|
|
partitioned by (
|
|
year char(5),
|
|
day varchar(13)
|
|
);
|
|
|
|
insert overwrite chars_tbl partition(year, day)
|
|
select
|
|
id,
|
|
cast(date_string_col as char(1)),
|
|
cast(date_string_col as char(8)),
|
|
cast(date_string_col as char(20)),
|
|
timestamp_col,
|
|
cast(date_string_col as varchar(1)),
|
|
cast(date_string_col as varchar(8)),
|
|
cast(date_string_col as varchar(20)),
|
|
cast(year as char(5)),
|
|
cast(day as varchar(13))
|
|
from functional.alltypesagg
|
|
where day is null or day in (3, 7);
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
year=2010 /day=7/: 1000
|
|
year=2010 /day=3/: 1000
|
|
year=2010 /day=__HIVE_DEFAULT_PARTITION__/: 1000
|
|
====
|
|
---- QUERY
|
|
compute incremental stats chars_tbl
|
|
---- RESULTS
|
|
'Updated 3 partition(s) and 8 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show column stats chars_tbl
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',2915,-1,4,4
|
|
'ch1','CHAR(1)',1,-1,1,1
|
|
'ch2','CHAR(8)',10,-1,8,8
|
|
'ch3','CHAR(20)',10,-1,8,8
|
|
'ts','TIMESTAMP',2871,-1,16,16
|
|
'vc1','VARCHAR(1)',1,-1,1,1
|
|
'vc2','VARCHAR(8)',10,-1,8,8
|
|
'vc3','VARCHAR(20)',10,-1,8,8
|
|
'year','CHAR(5)',1,0,5,5
|
|
'day','VARCHAR(13)',3,1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Populate a new partition to verify the incremental stats update
|
|
insert into chars_tbl partition(year, day)
|
|
select null, cast('x' as char(1)), cast('x' as char(8)), cast('x' as char(20)),
|
|
null, cast('x' as varchar(1)), cast('x' as varchar(8)), cast('x' as varchar(20)),
|
|
cast('abc' as char(5)), cast('xyz' as varchar(13));
|
|
compute incremental stats chars_tbl;
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 8 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show column stats chars_tbl
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',2915,-1,4,4
|
|
'ch1','CHAR(1)',2,-1,1,1
|
|
'ch2','CHAR(8)',11,-1,8,7.99766731262207
|
|
'ch3','CHAR(20)',11,-1,8,7.99766731262207
|
|
'ts','TIMESTAMP',2871,-1,16,16
|
|
'vc1','VARCHAR(1)',2,-1,1,1
|
|
'vc2','VARCHAR(8)',11,-1,8,7.99766731262207
|
|
'vc3','VARCHAR(20)',11,-1,8,7.99766731262207
|
|
'year','CHAR(5)',2,0,5,5
|
|
'day','VARCHAR(13)',4,1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4854: Tests incremental computation in the presence of complex-typed columns.
|
|
create external table complextypestbl_part
|
|
like parquet '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/nullable.parq'
|
|
partitioned by (p int) stored as parquet;
|
|
alter table complextypestbl_part add partition (p=0)
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
|
|
alter table complextypestbl_part add partition (p=1)
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
|
|
compute incremental stats complextypestbl_part;
|
|
---- RESULTS
|
|
'Updated 2 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# The table was not changed. Validate that the next compute incremental stats is a no-op.
|
|
compute incremental stats complextypestbl_part;
|
|
---- RESULTS
|
|
---- ERRORS
|
|
No partitions selected for incremental stats update
|
|
====
|
|
---- QUERY
|
|
# Add a new partition and check that only stats for the new partition are computed.
|
|
alter table complextypestbl_part add partition (p=2)
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
|
|
compute incremental stats complextypestbl_part;
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 1 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|