mirror of
https://github.com/apache/impala.git
synced 2025-12-31 15:00:10 -05:00
This patch adds parsing of complex types and tests for using complex types in various exprs and create/alter/describe stmts. Change-Id: Ibc211a560c889f5ccfb616813700b923c89d8245 Reviewed-on: http://gerrit.ent.cloudera.com:8080/3577 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3594
715 lines
14 KiB
Plaintext
715 lines
14 KiB
Plaintext
====
|
|
---- QUERY
|
|
use alter_table_test_db
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# First create an unpartitioned table
|
|
create external table t1 (i int) location '/test-warehouse/t1_tmp1'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t1
|
|
---- RESULTS
|
|
'i','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Add some columns
|
|
alter table t1 add columns (t tinyint, s string comment 'Str Col')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t1
|
|
---- RESULTS
|
|
'i','int',''
|
|
't','tinyint',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table t1 rename to t2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
't2'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Move the table to a different database
|
|
alter table t2 rename to alter_table_test_db2.t1_inotherdb
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# No longer appears in this database
|
|
show tables
|
|
---- RESULTS
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Shows up in the second database
|
|
show tables in alter_table_test_db2
|
|
---- RESULTS
|
|
't1_inotherdb'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Move the table back to this database
|
|
alter table alter_table_test_db2.t1_inotherdb rename to t2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# make sure the new table shows the same columns as the old table
|
|
describe t2
|
|
---- RESULTS
|
|
'i','int',''
|
|
't','tinyint',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table t2 drop column t
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# The dropped column no longer shows up
|
|
describe t2
|
|
---- RESULTS
|
|
'i','int',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Replace the columns with new values
|
|
alter table t2 replace columns (c1 bigint comment 'id col', c2 string, c3 int)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'c1','bigint','id col'
|
|
'c2','string',''
|
|
'c3','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Should be able to read/write using the new column types
|
|
insert overwrite table t2 select 1, '50', 2 from functional.alltypes limit 2
|
|
---- RESULTS
|
|
: 2
|
|
====
|
|
---- QUERY
|
|
select * from t2
|
|
---- RESULTS
|
|
1,'50',2
|
|
1,'50',2
|
|
---- TYPES
|
|
bigint,string,int
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c2 int_col int comment 'changed to int col'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c1 id_col bigint
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c3 c3 int comment 'added a comment'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'id_col','bigint','id col'
|
|
'int_col','int','changed to int col'
|
|
'c3','int','added a comment'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
select * from t2
|
|
---- RESULTS
|
|
1,50,2
|
|
1,50,2
|
|
---- TYPES
|
|
bigint,int,int
|
|
====
|
|
---- QUERY
|
|
# Add some complex-typed columns
|
|
alter table t2 add columns (
|
|
x array<int>,
|
|
y map<string,float> comment 'Map Col',
|
|
z struct<f1:boolean,f2:bigint>)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'id_col','bigint','id col'
|
|
'int_col','int','changed to int col'
|
|
'c3','int','added a comment'
|
|
'x','array<int>',''
|
|
'y','map<string,float>','Map Col'
|
|
'z','struct<f1:boolean,f2:bigint>',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Replace columns with some complex-typed columns
|
|
alter table t2 replace columns (
|
|
a int comment 'Int Col',
|
|
b struct<f1:array<int>,f2:map<string,struct<f1:bigint>>>,
|
|
c double)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'a','int','Int Col'
|
|
'b','struct<f1:array<int>,f2:map<string,struct<f1:bigint>>>',''
|
|
'c','double',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
create external table jointbl_test like functional.jointbl
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Expect new table to be empty
|
|
select * from jointbl_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# change the location to point to some data
|
|
alter table jointbl_test set location '/test-warehouse/jointbl'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# should get some results back now
|
|
select * from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000
|
|
1002,'Name2',94611,5000
|
|
1003,'Name3',94611,5000
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# change the location to point to some data in another file format
|
|
alter table jointbl_test set location '/test-warehouse/jointbl_seq'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# change the table fileformat to match the data
|
|
alter table jointbl_test set fileformat sequencefile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# now the proper data should be returned
|
|
select * from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000
|
|
1002,'Name2',94611,5000
|
|
1003,'Name3',94611,5000
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# Create a partitioned table. Specify the location so we know what dirs need
|
|
# to be cleaned after the test finishes executing.
|
|
create external table t_part (i int) partitioned by (j int, s string)
|
|
location '/test-warehouse/t_part_tmp'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=cast(2-1 as int), s='2012')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add if not exists partition (j=1, s='2012')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add if not exists partition (j=1, s='2012/withslash')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=1, s=substring('foo2013bar', 4, 8))
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Add another partition that points to the same location as another partition.
|
|
# This will cause the data to be read twice, but shouldn't result in an error.
|
|
alter table t_part add partition (j=100, s='same_location')
|
|
location '/test-warehouse/t_part_tmp/j=1/s=2012'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Add another partition that points to an existing data location that does not
|
|
# follow the key=value directory structure.
|
|
alter table t_part add partition (j=101, s='different_part_dir')
|
|
location '/test-warehouse/part_data/'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2012') select 2 from functional.alltypes limit 2
|
|
---- RESULTS
|
|
j=1/s=2012/: 2
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2013') select 3 from functional.alltypes limit 3
|
|
---- RESULTS
|
|
j=1/s=2013/: 3
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2012/withslash')
|
|
select 1 from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=1/s=2012%2Fwithslash/: 1
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
2,100,'same_location'
|
|
2,100,'same_location'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
1984,101,'different_part_dir'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=NULL, s='2013')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=NULL, s=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Drop the partition that points to a duplication location. The data will no longer
|
|
# be read twice.
|
|
alter table t_part drop partition (j=100, s='same_location')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=101, s='different_part_dir')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=NULL, s=NULL)
|
|
select 4 from functional.alltypes limit 5
|
|
---- RESULTS
|
|
j=__HIVE_DEFAULT_PARTITION__/s=__HIVE_DEFAULT_PARTITION__/: 5
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=NULL, s='2013')
|
|
select 5 from functional.alltypes limit 5
|
|
---- RESULTS
|
|
j=__HIVE_DEFAULT_PARTITION__/s=2013/: 5
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=NULL, s=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=NULL, s=trim(' 2013 '))
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=1, s='2013')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=1, s='2012/withslash')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# Test that empty string as partition column maps onto NULL
|
|
# using static partition insert
|
|
insert into table t_part partition(j=2, s='')
|
|
select 1 from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
|
|
====
|
|
---- QUERY
|
|
# Test that empty string as partition column maps onto NULL
|
|
# using dynamic partition insert
|
|
insert into table t_part partition(j=2, s)
|
|
select 10, '' from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
|
|
====
|
|
---- QUERY
|
|
# Validate the previous inserts
|
|
select i, j, s from t_part where s is NULL
|
|
---- RESULTS
|
|
1,2,'NULL'
|
|
10,2,'NULL'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# Drop default partition using empty string as key
|
|
alter table t_part drop partition (j=2, s='')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Validate previous partition drop
|
|
select i, j, s from t_part where s is NULL
|
|
---- RESULTS
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# rename a partitioned table
|
|
alter table t_part rename to t_part2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# only the new table shows up
|
|
show tables like 't_part*'
|
|
---- RESULTS
|
|
't_part2'
|
|
====
|
|
---- QUERY
|
|
# should be able to read the same data from this table
|
|
select i, j, s from t_part2
|
|
---- RESULTS
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
create external table alltypes_test like functional.alltypes
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test add partition(month=4, year=2009)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test add partition(month=5, year=2009)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Table is empty
|
|
select int_col from alltypes_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
int
|
|
====
|
|
---- QUERY
|
|
# Point one partition at some data (file format does not match table)
|
|
alter table alltypes_test partition(month=4, year=2009)
|
|
set location '/test-warehouse/alltypes_seq_snap/year=2009/month=4'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0
|
|
NULL
|
|
---- TYPES
|
|
int
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test partition(month=4, year=2009)
|
|
set fileformat sequencefile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col, count(*) from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0,30
|
|
1,30
|
|
2,30
|
|
3,30
|
|
4,30
|
|
5,30
|
|
6,30
|
|
7,30
|
|
8,30
|
|
9,30
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Point the other partition at some more data. This time in a different
|
|
# file format.
|
|
alter table alltypes_test partition(month=cast(1+4 as int), year=cast(100*20+9 as int))
|
|
set location '/test-warehouse/alltypes_rc/year=2009/month=5'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test partition(month=cast(2+3 as int), year=2009)
|
|
set fileformat rcfile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col, count(*) from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0,61
|
|
1,61
|
|
2,61
|
|
3,61
|
|
4,61
|
|
5,61
|
|
6,61
|
|
7,61
|
|
8,61
|
|
9,61
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Show the table stats before altering.
|
|
show table stats alltypes_test
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
2009,4,-1,1,regex:.+KB,'NOT CACHED','SEQUENCE_FILE'
|
|
2009,5,-1,1,regex:.+KB,'NOT CACHED','RC_FILE'
|
|
Total,,-1,2,regex:.+KB,'0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test altering the 'numRows' table property of a table.
|
|
alter table alltypes_test set tblproperties ('numRows'='200')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test altering the 'numRows' table property of a partition.
|
|
alter table alltypes_test partition(year=2009, month=4)
|
|
set tblproperties ('numRows'='30')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Show the table stats after altering the table and partition stats.
|
|
show table stats alltypes_test
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
|
|
---- RESULTS
|
|
2009,4,30,1,regex:.+KB,'NOT CACHED','SEQUENCE_FILE'
|
|
2009,5,-1,1,regex:.+KB,'NOT CACHED','RC_FILE'
|
|
Total,,200,2,regex:.+KB,'0B',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1016: Testing scanning newly added columns
|
|
DROP TABLE IF EXISTS imp1016
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE imp1016 (string1 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
INSERT INTO imp1016 VALUES ('test')
|
|
---- RESULTS
|
|
: 1
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE imp1016 ADD COLUMNS (string2 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
DESCRIBE imp1016
|
|
---- RESULTS
|
|
'string1','string',''
|
|
'string2','string',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
SELECT * FROM imp1016
|
|
---- RESULTS
|
|
'test','NULL'
|
|
---- TYPES
|
|
string,string
|
|
====
|
|
---- QUERY
|
|
SELECT string1 FROM imp1016
|
|
---- RESULTS
|
|
'test'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
SELECT string2 FROM imp1016
|
|
---- RESULTS
|
|
'NULL'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(DISTINCT string1) FROM imp1016
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(DISTINCT string2) FROM imp1016
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Create a larger table to test scanning newly added columns
|
|
DROP TABLE IF EXISTS imp1016Large
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE imp1016Large (string1 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# There is a codepath that operates on chunks of 1024 tuples, inserting
|
|
# more than 1024 tuples
|
|
INSERT INTO imp1016Large SELECT 'test' FROM functional.alltypes LIMIT 2000
|
|
---- RESULTS
|
|
: 2000
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE imp1016Large ADD COLUMNS (string2 string)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
DESCRIBE imp1016Large
|
|
---- RESULTS
|
|
'string1','string',''
|
|
'string2','string',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(string2) FROM imp1016Large
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(*), COUNT(DISTINCT string1) FROM imp1016Large
|
|
---- RESULTS
|
|
2000,1
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
SELECT COUNT(*), COUNT(DISTINCT string2) FROM imp1016Large
|
|
---- RESULTS
|
|
2000,0
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|