mirror of
https://github.com/apache/impala.git
synced 2026-01-04 00:00:56 -05:00
This patch adds the ability to compute and drop column and table statistics at partition granularity. The following commands are added. Detail about the implementation follows. COMPUTE INCREMENTAL STATS <tbl_name> [PARTITION <partition_spec>] This variant of COMPUTE STATS will, ultimately, do the same thing as the traditional COMPUTE STATS statement, but does so by caching the intermediate state of the computation for each partition in the Hive MetaStore. If the PARTITION clause is added, the computation is performed for only that partition. If the PARTITION clause is omitted, incremental stats are updated only for those partitions with missing incremental stats (e.g. one column does not have stats, or incremental stats was never computed for this partition). In this patch, incremental stats are only invalidated when a DROP STATS variant is executed. Future patches can automatically invalidate the statistics after REFRESH or INSERT queries, etc. DROP INCREMENTAL STATS <tbl_name> PARTITION <part_spec> This variant of DROP stats removes the incremental statistics for the given table. It does *not* recalculate the statistics for the whole table, so this should be used only to invalidate the intermediate state for a partition which will shortly be subject to COMPUTE INCREMENTAL STATS. The point of this variant is to allow users to notify Impala when they believe a partition has changed significantly enough to warrant recomputation of its statistics. It is not necessary for new partitions; Impala will detect that they do not have any valid statistics. -------- This is achieved by adapting the existing HLL UDA via swapping its finalize method for a new one which returns the intermediate HLL buckets, rather than aggregating and then disposing of them. This intermediate state is then returned to Impala's catalog-op-executor.cc, which then passes the intermediate state back to the frontend to be ultimately stored in the HMS. This intermediate state is computed on a per-partition basis by grouping the input to the UDA by partition. Thus, the incremental computation produces one row for each partition selected (the set of which might be quite small, if there are few partitions without valid incremental stats: this is the point of the new commands). At the same time, the query coordinator aggregates the output of the UDA to produce table-level statistics. This computation incorporates any existing (and not re-computed) intermediate partition state which is passed to the coordinator by the frontend. The resulting statistics are saved to the table as normal. Intermediate statistics are serialised to the HMS by writing a Thrift structure's serialised form to the partition's 'parameters' map. There is a schema-imposed limit of 4000 characters to the serialised string, which is exacerbated by the fact that the Thrift representation must first be base-64 encoded to avoid type errors in the HMS. The current patch breaks the encoded structure into 4k chunks, and then recombines them on read. The alltypes table (11 columns) takes about three of these chunks. This may mean that incremental stats are not suitable for particularly wide tables: these structures could be zipped before encoding for some space savings. In the meantime, the NDV estimates are run-length encoded (since they are generally sparse); this can result in substantial space savings. Change-Id: If82cf4753d19eb532265acb556f798b95fbb0f34 Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4475 Tested-by: jenkins Reviewed-by: Henry Robinson <henry@cloudera.com> Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5408
706 lines
14 KiB
Plaintext
706 lines
14 KiB
Plaintext
====
|
|
---- QUERY
|
|
use alter_table_test_db
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# First create an unpartitioned table
|
|
create external table t1 (i int) location '/test-warehouse/t1_tmp1'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t1
|
|
---- RESULTS
|
|
'i','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Add some columns
|
|
alter table t1 add columns (t tinyint, s string comment 'Str Col')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t1
|
|
---- RESULTS
|
|
'i','int',''
|
|
't','tinyint',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table t1 rename to t2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
't2'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Move the table to a different database
|
|
alter table t2 rename to alter_table_test_db2.t1_inotherdb
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# No longer appears in this database
|
|
show tables
|
|
---- RESULTS
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Shows up in the second database
|
|
show tables in alter_table_test_db2
|
|
---- RESULTS
|
|
't1_inotherdb'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Move the table back to this database
|
|
alter table alter_table_test_db2.t1_inotherdb rename to t2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# make sure the new table shows the same columns as the old table
|
|
describe t2
|
|
---- RESULTS
|
|
'i','int',''
|
|
't','tinyint',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table t2 drop column t
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# The dropped column no longer shows up
|
|
describe t2
|
|
---- RESULTS
|
|
'i','int',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Replace the columns with new values
|
|
alter table t2 replace columns (c1 bigint comment 'id col', c2 string, c3 int)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'c1','bigint','id col'
|
|
'c2','string',''
|
|
'c3','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Should be able to read/write using the new column types
|
|
insert overwrite table t2 select 1, '50', 2 from functional.alltypes limit 2
|
|
---- RESULTS
|
|
: 2
|
|
====
|
|
---- QUERY
|
|
select * from t2
|
|
---- RESULTS
|
|
1,'50',2
|
|
1,'50',2
|
|
---- TYPES
|
|
bigint,string,int
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c2 int_col int comment 'changed to int col'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c1 id_col bigint
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c3 c3 int comment 'added a comment'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'id_col','bigint','id col'
|
|
'int_col','int','changed to int col'
|
|
'c3','int','added a comment'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
select * from t2
|
|
---- RESULTS
|
|
1,50,2
|
|
1,50,2
|
|
---- TYPES
|
|
bigint,int,int
|
|
====
|
|
---- QUERY
|
|
# Add some complex-typed columns
|
|
alter table t2 add columns (
|
|
x array<int>,
|
|
y map<string,float> comment 'Map Col',
|
|
z struct<f1:boolean,f2:bigint>)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'id_col','bigint','id col'
|
|
'int_col','int','changed to int col'
|
|
'c3','int','added a comment'
|
|
'x','array<int>',''
|
|
'y','map<string,float>','Map Col'
|
|
'z','struct<f1:boolean,f2:bigint>',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Replace columns with some complex-typed columns
|
|
alter table t2 replace columns (
|
|
a int comment 'Int Col',
|
|
b struct<f1:array<int>,f2:map<string,struct<f1:bigint>>>,
|
|
c double)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'a','int','Int Col'
|
|
'b','struct<f1:array<int>,f2:map<string,struct<f1:bigint>>>',''
|
|
'c','double',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
create external table jointbl_test like functional.jointbl
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Expect new table to be empty
|
|
select * from jointbl_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# change the location to point to some data
|
|
alter table jointbl_test set location '/test-warehouse/jointbl'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# should get some results back now
|
|
select * from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000
|
|
1002,'Name2',94611,5000
|
|
1003,'Name3',94611,5000
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# change the location to point to some data in another file format
|
|
alter table jointbl_test set location '/test-warehouse/jointbl_seq'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# change the table fileformat to match the data
|
|
alter table jointbl_test set fileformat sequencefile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# now the proper data should be returned
|
|
select * from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000
|
|
1002,'Name2',94611,5000
|
|
1003,'Name3',94611,5000
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# Create a partitioned table. Specify the location so we know what dirs need
|
|
# to be cleaned after the test finishes executing.
|
|
create external table t_part (i int) partitioned by (j int, s string)
|
|
location '/test-warehouse/t_part_tmp'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=cast(2-1 as int), s='2012')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add if not exists partition (j=1, s='2012')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add if not exists partition (j=1, s='2012/withslash')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=1, s=substring('foo2013bar', 4, 8))
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Add another partition that points to the same location as another partition.
|
|
# This will cause the data to be read twice, but shouldn't result in an error.
|
|
alter table t_part add partition (j=100, s='same_location')
|
|
location '/test-warehouse/t_part_tmp/j=1/s=2012'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Add another partition that points to an existing data location that does not
|
|
# follow the key=value directory structure.
|
|
alter table t_part add partition (j=101, s='different_part_dir')
|
|
location '/test-warehouse/part_data/'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2012') select 2 from functional.alltypes limit 2
|
|
---- RESULTS
|
|
j=1/s=2012/: 2
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2013') select 3 from functional.alltypes limit 3
|
|
---- RESULTS
|
|
j=1/s=2013/: 3
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2012/withslash')
|
|
select 1 from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=1/s=2012%2Fwithslash/: 1
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
2,100,'same_location'
|
|
2,100,'same_location'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
1984,101,'different_part_dir'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=NULL, s='2013')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=NULL, s=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Drop the partition that points to a duplication location. The data will no longer
|
|
# be read twice.
|
|
alter table t_part drop partition (j=100, s='same_location')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=101, s='different_part_dir')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=NULL, s=NULL)
|
|
select 4 from functional.alltypes limit 5
|
|
---- RESULTS
|
|
j=__HIVE_DEFAULT_PARTITION__/s=__HIVE_DEFAULT_PARTITION__/: 5
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=NULL, s='2013')
|
|
select 5 from functional.alltypes limit 5
|
|
---- RESULTS
|
|
j=__HIVE_DEFAULT_PARTITION__/s=2013/: 5
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=NULL, s=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=NULL, s=trim(' 2013 '))
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=1, s='2013')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=1, s='2012/withslash')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# Test that empty string as partition column maps onto NULL
|
|
# using static partition insert
|
|
insert into table t_part partition(j=2, s='')
|
|
select 1 from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
|
|
====
|
|
---- QUERY
|
|
# Test that empty string as partition column maps onto NULL
|
|
# using dynamic partition insert
|
|
insert into table t_part partition(j=2, s)
|
|
select 10, '' from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
|
|
====
|
|
---- QUERY
|
|
# Validate the previous inserts
|
|
select i, j, s from t_part where s is NULL
|
|
---- RESULTS
|
|
1,2,'NULL'
|
|
10,2,'NULL'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# Drop default partition using empty string as key
|
|
alter table t_part drop partition (j=2, s='')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Validate previous partition drop
|
|
select i, j, s from t_part where s is NULL
|
|
---- RESULTS
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# rename a partitioned table
|
|
alter table t_part rename to t_part2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# only the new table shows up
|
|
show tables like 't_part*'
|
|
---- RESULTS
|
|
't_part2'
|
|
====
|
|
---- QUERY
|
|
# should be able to read the same data from this table
|
|
select i, j, s from t_part2
|
|
---- RESULTS
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
create external table alltypes_test like functional.alltypes
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test add partition(month=4, year=2009)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test add partition(month=5, year=2009)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Table is empty
|
|
select int_col from alltypes_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
int
|
|
====
|
|
---- QUERY
|
|
# Point one partition at some data (file format does not match table)
|
|
alter table alltypes_test partition(month=4, year=2009)
|
|
set location '/test-warehouse/alltypes_seq_snap/year=2009/month=4'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test partition(month=4, year=2009)
|
|
set fileformat sequencefile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col, count(*) from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0,30
|
|
1,30
|
|
2,30
|
|
3,30
|
|
4,30
|
|
5,30
|
|
6,30
|
|
7,30
|
|
8,30
|
|
9,30
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Point the other partition at some more data. This time in a different
|
|
# file format.
|
|
alter table alltypes_test partition(month=cast(1+4 as int), year=cast(100*20+9 as int))
|
|
set location '/test-warehouse/alltypes_rc/year=2009/month=5'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test partition(month=cast(2+3 as int), year=2009)
|
|
set fileformat rcfile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col, count(*) from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0,61
|
|
1,61
|
|
2,61
|
|
3,61
|
|
4,61
|
|
5,61
|
|
6,61
|
|
7,61
|
|
8,61
|
|
9,61
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Show the table stats before altering.
|
|
show table stats alltypes_test
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT, INCREMENTAL STATS
|
|
---- RESULTS
|
|
'2009','4',-1,1,regex:.+KB,'NOT CACHED','SEQUENCE_FILE','false'
|
|
'2009','5',-1,1,regex:.+KB,'NOT CACHED','RC_FILE','false'
|
|
'Total','',-1,2,regex:.+KB,'0B','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test altering the 'numRows' table property of a table.
|
|
alter table alltypes_test set tblproperties ('numRows'='200')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test altering the 'numRows' table property of a partition.
|
|
alter table alltypes_test partition(year=2009, month=4)
|
|
set tblproperties ('numRows'='30', 'STATS_GENERATED_VIA_STATS_TASK'='true')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Show the table stats after altering the table and partition stats.
|
|
show table stats alltypes_test
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT, INCREMENTAL STATS
|
|
---- RESULTS
|
|
'2009','4',30,1,regex:.+KB,'NOT CACHED','SEQUENCE_FILE','false'
|
|
'2009','5',-1,1,regex:.+KB,'NOT CACHED','RC_FILE','false'
|
|
'Total','',200,2,regex:.+KB,'0B','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1016: Testing scanning newly added columns
|
|
DROP TABLE IF EXISTS imp1016
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE imp1016 (string1 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
INSERT INTO imp1016 VALUES ('test')
|
|
---- RESULTS
|
|
: 1
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE imp1016 ADD COLUMNS (string2 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
DESCRIBE imp1016
|
|
---- RESULTS
|
|
'string1','string',''
|
|
'string2','string',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
SELECT * FROM imp1016
|
|
---- RESULTS
|
|
'test','NULL'
|
|
---- TYPES
|
|
string,string
|
|
====
|
|
---- QUERY
|
|
SELECT string1 FROM imp1016
|
|
---- RESULTS
|
|
'test'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
SELECT string2 FROM imp1016
|
|
---- RESULTS
|
|
'NULL'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(DISTINCT string1) FROM imp1016
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(DISTINCT string2) FROM imp1016
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Create a larger table to test scanning newly added columns
|
|
DROP TABLE IF EXISTS imp1016Large
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE imp1016Large (string1 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# There is a codepath that operates on chunks of 1024 tuples, inserting
|
|
# more than 1024 tuples
|
|
INSERT INTO imp1016Large SELECT 'test' FROM functional.alltypes LIMIT 2000
|
|
---- RESULTS
|
|
: 2000
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE imp1016Large ADD COLUMNS (string2 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
DESCRIBE imp1016Large
|
|
---- RESULTS
|
|
'string1','string',''
|
|
'string2','string',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(string2) FROM imp1016Large
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(*), COUNT(DISTINCT string1) FROM imp1016Large
|
|
---- RESULTS
|
|
2000,1
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(*), COUNT(DISTINCT string2) FROM imp1016Large
|
|
---- RESULTS
|
|
2000,0
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|