Files
impala/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
Alex Behm b3d8a507cb IMPALA-5310: Add COMPUTE STATS TABLESAMPLE.
Adds the TABLESAMPLE clause for COMPUTE STATS.

Syntax:
COMPUTE STATS <table> TABLESAMPLE SYSTEM(<number>) [REPEATABLE(<number>)]

Computes and replaces the table-level row count and total file size,
as well as all table-level column statistics. Existing partition-level
row counts are not modified.
The TABLESAMPLE clause can be used to limit the scanned data volume to
a desired percentage. When sampling, the unmodified results of the
COMPUTE STATS queries are sent to the CatalogServer. There, the stats
are extrapolated before storing them into the HMS so as not to confuse
other engines like Hive/SparkSQL which may rely on the shared HMS
fields being accurate.

Limitations
- Only works for HDFS tables
- TABLESAMPLE is not supported for COMPUTE INCREMENTAL STATS
- TABLESAMPLE requires --enable_stats_extrapolation=true

Changes to EXPLAIN
The stored statistics from the HMS are more clearly displayed under
a 'stored statistics' section. Example:

00:SCAN HDFS [functional.alltypes, RANDOM]
   partitions=24/24 files=24 size=478.45KB
   stored statistics:
     table: rows=7300 size=478.45KB
     partitions: 24/24 rows=7300
     columns: all

Testing:
- added new functional tests
- core/hdfs run passed

Change-Id: I7f3e72471ac563adada4a4156033a85852b7c8b7
Reviewed-on: http://gerrit.cloudera.org:8080/8136
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins
2017-11-29 22:37:01 +00:00

625 lines
26 KiB
Plaintext

====
---- QUERY
# test computing stats on a partitioned text table with all types
create table alltypes_incremental like functional.alltypes;
insert into alltypes_incremental partition(year, month)
select * from functional.alltypes;
====
---- QUERY
compute incremental stats alltypes_incremental
---- RESULTS
'Updated 24 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats alltypes_incremental
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',7300,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats alltypes_incremental
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',7300,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',736,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',7300,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
drop incremental stats alltypes_incremental partition(year=2010, month=12)
====
---- QUERY
show table stats alltypes_incremental;
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total','',7300,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
compute incremental stats alltypes_incremental
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats alltypes_incremental;
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',7300,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats alltypes_incremental
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',7300,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',736,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',7300,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
create table incremental_empty_partitioned (i int) partitioned by (j int);
alter table incremental_empty_partitioned add partition (j=1);
====
---- QUERY
compute incremental stats incremental_empty_partitioned;
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats incremental_empty_partitioned;
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total',0,0,'0B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
drop stats incremental_empty_partitioned;
====
---- QUERY
# IMPALA-2199: Test that compute incremental stats with a partition spec on an empty partition populates the row count.
compute incremental stats incremental_empty_partitioned partition(j=1);
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats incremental_empty_partitioned;
---- RESULTS
'1',0,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total',0,0,'0B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
drop incremental stats alltypes_incremental partition(year=2010, month=1);
drop incremental stats alltypes_incremental partition(year=2010, month=2);
====
---- QUERY
compute incremental stats alltypes_incremental partition(year=2010, month=2);
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats alltypes_incremental;
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',6990,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats alltypes_incremental
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',6990,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',688,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',6990,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Drop and recompute incremental stats for general partition expressions
drop incremental stats alltypes_incremental partition(year=2010);
====
---- QUERY
compute incremental stats alltypes_incremental partition(year=2010);
---- RESULTS
'Updated 12 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats alltypes_incremental;
---- RESULTS
'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',7300,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats alltypes_incremental
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',7300,-1,4,4
'bool_col','BOOLEAN',2,-1,1,1
'tinyint_col','TINYINT',10,-1,1,1
'smallint_col','SMALLINT',10,-1,2,2
'int_col','INT',10,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'double_col','DOUBLE',10,-1,8,8
'date_string_col','STRING',736,-1,8,8
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',7300,-1,16,16
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Confirm that dropping stats drops incremental stats as well
drop stats alltypes_incremental;
show table stats alltypes_incremental;
---- RESULTS
'2009','1',-1,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','2',-1,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','3',-1,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','4',-1,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2009','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','1',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','2',-1,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','3',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','4',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','5',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','6',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','7',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','8',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','9',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','10',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','11',-1,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'2010','12',-1,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total','',-1,24,'586.84KB','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Test that many partition keys work correctly
create table incremental_many_part_keys(col int)
partitioned by (p1 int, p2 int, p3 int, p4 int, p5 int, p6 int);
insert into incremental_many_part_keys
partition(p1=1, p2=2, p3=3, p4=4, p5=5, p6=6) values(1);
====
---- QUERY
compute incremental stats incremental_many_part_keys;
show table stats incremental_many_part_keys;
---- RESULTS
'1','2','3','4','5','6',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','','','','','',1,1,'2B','0B','','','',''
---- TYPES
STRING, STRING, STRING, STRING, STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
create table incremental_null_part_key(col int) partitioned by (p int);
insert into incremental_null_part_key partition(p) values(1,NULL), (1,2);
compute incremental stats incremental_null_part_key partition(p=2);
====
---- QUERY
compute incremental stats incremental_null_part_key partition(p=NULL);
show table stats incremental_null_part_key;
---- RESULTS
'NULL',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total',2,2,'4B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Check that incremental stats queries handle partitions with keyword names
create table incremental_keyword_part_key(col int)
partitioned by (`date` int, `table` string);
insert into incremental_keyword_part_key
partition(`date`=1, `table`='a') values(2);
compute incremental stats incremental_keyword_part_key
partition(`date`=1, `table`='a');
show table stats incremental_keyword_part_key;
---- RESULTS
'1','a',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',1,1,'2B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# IMPALA-4170: Check that incremental stats queries handles
# new partitions with keyword names.
insert into incremental_keyword_part_key
partition(`date`=2, `table`='b') values(3);
insert into incremental_keyword_part_key
partition(`date`=NULL, `table`='') values(4);
compute incremental stats incremental_keyword_part_key;
show table stats incremental_keyword_part_key;
---- RESULTS
'NULL','NULL',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'1','a',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2','b',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',3,3,'6B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
drop stats incremental_keyword_part_key;
compute incremental stats incremental_keyword_part_key;
show table stats incremental_keyword_part_key;
---- RESULTS
'NULL','NULL',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'1','a',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2','b',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total','',3,3,'6B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
create table incremental_string_part_value(col int) partitioned by
(p string);
insert into incremental_string_part_value partition(p="test_string")
values(2);
compute incremental stats incremental_string_part_value
partition(p="test_string");
show table stats incremental_string_part_value;
---- RESULTS
'test_string',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total',1,1,'2B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Check that dropping a column still allows stats to be computed
create table incremental_drop_column (a int, b int, c int)
partitioned by (d int);
alter table incremental_drop_column add partition (d=1);
insert into incremental_drop_column partition(d=1) values (4,4,4);
compute incremental stats incremental_drop_column;
alter table incremental_drop_column drop column c;
alter table incremental_drop_column drop column b;
compute incremental stats incremental_drop_column;
show table stats incremental_drop_column;
---- RESULTS
'1',1,1,'6B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total',1,1,'6B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# Check that adding a column invalidates all incremental stats
create table incremental_add_column (col int) partitioned by (p int);
insert into incremental_add_column partition(p) values(1,1),(2,2);
compute incremental stats incremental_add_column;
insert into incremental_add_column partition(p) values(1,1);
alter table incremental_add_column add columns (c int);
compute incremental stats incremental_add_column;
show table stats incremental_add_column;
---- RESULTS
'1',2,2,'4B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'2',1,1,'2B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
'Total',3,3,'6B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
create table incremental_no_partitions (col int) partitioned by (p int);
compute incremental stats incremental_no_partitions;
show table stats incremental_no_partitions;
---- RESULTS
'Total',0,0,'0B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
create table incremental_not_partitioned (col int);
insert into incremental_not_partitioned values(1),(2);
compute incremental stats incremental_not_partitioned;
show table stats incremental_not_partitioned;
---- RESULTS
2,1,'4B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
====
---- QUERY
# IMPALA-1629: Verify that the column stats for CHAR/VARCHAR columns are populated.
# The values of date_string_col always have exactly 8 characters. The CHAR/VARCHAR
# sizes below are chosen such that they are smaller, equal, and greater than the
# source data values, in particular, to test the CHAR padding behavior.
create table chars_tbl (
id int,
ch1 char(1),
ch2 char(8),
ch3 char(20),
ts timestamp,
vc1 varchar(1),
vc2 varchar(8),
vc3 varchar(20)
)
partitioned by (
year char(5),
day varchar(13)
);
insert overwrite chars_tbl partition(year, day)
select
id,
cast(date_string_col as char(1)),
cast(date_string_col as char(8)),
cast(date_string_col as char(20)),
timestamp_col,
cast(date_string_col as varchar(1)),
cast(date_string_col as varchar(8)),
cast(date_string_col as varchar(20)),
cast(year as char(5)),
cast(day as varchar(13))
from functional.alltypesagg
where day is null or day in (3, 7);
---- RESULTS: VERIFY_IS_EQUAL_SORTED
year=2010 /day=7/: 1000
year=2010 /day=3/: 1000
year=2010 /day=__HIVE_DEFAULT_PARTITION__/: 1000
====
---- QUERY
compute incremental stats chars_tbl
---- RESULTS
'Updated 3 partition(s) and 8 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats chars_tbl
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',2915,-1,4,4
'ch1','CHAR(1)',1,-1,1,1
'ch2','CHAR(8)',10,-1,8,8
'ch3','CHAR(20)',10,-1,8,8
'ts','TIMESTAMP',2871,-1,16,16
'vc1','VARCHAR(1)',1,-1,1,1
'vc2','VARCHAR(8)',10,-1,8,8
'vc3','VARCHAR(20)',10,-1,8,8
'year','CHAR(5)',1,0,5,5
'day','VARCHAR(13)',3,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Populate a new partition to verify the incremental stats update
insert into chars_tbl partition(year, day)
select null, cast('x' as char(1)), cast('x' as char(8)), cast('x' as char(20)),
null, cast('x' as varchar(1)), cast('x' as varchar(8)), cast('x' as varchar(20)),
cast('abc' as char(5)), cast('xyz' as varchar(13));
compute incremental stats chars_tbl;
---- RESULTS
'Updated 1 partition(s) and 8 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats chars_tbl
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',2915,-1,4,4
'ch1','CHAR(1)',2,-1,1,1
'ch2','CHAR(8)',11,-1,8,7.99766731262207
'ch3','CHAR(20)',11,-1,8,7.99766731262207
'ts','TIMESTAMP',2871,-1,16,16
'vc1','VARCHAR(1)',2,-1,1,1
'vc2','VARCHAR(8)',11,-1,8,7.99766731262207
'vc3','VARCHAR(20)',11,-1,8,7.99766731262207
'year','CHAR(5)',2,0,5,5
'day','VARCHAR(13)',4,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-4854: Tests incremental computation in the presence of complex-typed columns.
create external table complextypestbl_part
like parquet '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/nullable.parq'
partitioned by (p int) stored as parquet;
alter table complextypestbl_part add partition (p=0)
location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
alter table complextypestbl_part add partition (p=1)
location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
compute incremental stats complextypestbl_part;
---- RESULTS
'Updated 2 partition(s) and 1 column(s).'
---- TYPES
STRING
====
---- QUERY
# The table was not changed. Validate that the next compute incremental stats is a no-op.
compute incremental stats complextypestbl_part;
---- RESULTS
---- ERRORS
No partitions selected for incremental stats update
====
---- QUERY
# Add a new partition and check that only stats for the new partition are computed.
alter table complextypestbl_part add partition (p=2)
location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
compute incremental stats complextypestbl_part;
---- RESULTS
'Updated 1 partition(s) and 1 column(s).'
---- TYPES
STRING
====