Files
impala/testdata/workloads/functional-query/queries/QueryTest/alter-table.test
Lenni Kuff 39f77b8b8f Add support for cluster-synchronized catalog operations
This change adds support for cluster-synchronized catalog operations. This provides the
guaranteethat after a catalog op completes, all other subscribers to the catalog topic have
also processed that update. This is useful when load balancing, because a common workflow
is to target a different impalad for each statement executed.
For example if each of the following were executed sequentially, but targeting
a different node:
1) CREATE TABLE Foo
2) INSERT INTO Foo
3) SELECT * FROM Foo
4) INSERT INTO Foo ....

Since both the INSERT and the CREATE update the catalog, it would not work as expected
without this patch. The user might either get a "table not found" error or would be
missing partition information from the INSERT.

The downside is that this approach to DDL takes a bit longer because we need to wait
until all subscribers have processed an update. If all nodes are healthy, this overhead
should not be significantly longer than the current DDL time. However, a single bad node
might slow down or completely block the completion of all DDL operations. By default
this feature is disabled, but it can be enabled using a new query option: SYNCED_DDL=1

To test this, the base test suite was updated to support selecting a random impalad
to execute each query section in a query test file. This is currently only enabled
for the insert and DDL tests, but could be leveraged by more tests in the future.

TODO: Add additional failure tests around this functionality.
TODO: Add an explicit "sync" statement so users do not need to run all their DDL
in this mode (since it is slower).

Change-Id: I45e757a931bf2a4740cc0cdd1e76ce49a1e22b83
Reviewed-on: http://gerrit.ent.cloudera.com:8080/899
Reviewed-by: Ishaan Joshi <ishaan@cloudera.com>
Tested-by: jenkins
2014-01-08 10:53:58 -08:00

543 lines
11 KiB
Plaintext

====
---- QUERY
use alter_table_test_db
---- RESULTS
====
---- QUERY
# First create an unpartitioned table
create external table t1 (i int) location '/test-warehouse/t1_tmp1'
---- RESULTS
====
---- QUERY
describe t1
---- RESULTS
'i','int',''
---- TYPES
string,string,string
====
---- QUERY
# Add some columns
alter table t1 add columns (t tinyint, s string comment 'Str Col')
---- RESULTS
====
---- QUERY
describe t1
---- RESULTS
'i','int',''
't','tinyint',''
's','string','Str Col'
---- TYPES
string,string,string
====
---- QUERY
alter table t1 rename to t2
---- RESULTS
====
---- QUERY
show tables
---- RESULTS
't2'
---- TYPES
string
====
---- QUERY
# Move the table to a different database
alter table t2 rename to alter_table_test_db2.t1_inotherdb
---- RESULTS
====
---- QUERY
# No longer appears in this database
show tables
---- RESULTS
---- TYPES
string
====
---- QUERY
# Shows up in the second database
show tables in alter_table_test_db2
---- RESULTS
't1_inotherdb'
---- TYPES
string
====
---- QUERY
# Move the table back to this database
alter table alter_table_test_db2.t1_inotherdb rename to t2
---- RESULTS
====
---- QUERY
# make sure the new table shows the same columns as the old table
describe t2
---- RESULTS
'i','int',''
't','tinyint',''
's','string','Str Col'
---- TYPES
string,string,string
====
---- QUERY
alter table t2 drop column t
---- RESULTS
====
---- QUERY
# The dropped column no longer shows up
describe t2
---- RESULTS
'i','int',''
's','string','Str Col'
---- TYPES
string,string,string
====
---- QUERY
# Replace the columns with new values
alter table t2 replace columns (c1 bigint comment 'id col', c2 string, c3 int)
---- RESULTS
====
---- QUERY
describe t2
---- RESULTS
'c1','bigint','id col'
'c2','string',''
'c3','int',''
---- TYPES
string,string,string
====
---- QUERY
# Should be able to read/write using the new column types
insert overwrite table t2 select 1, '50', 2 from functional.alltypes limit 2
---- RESULTS
: 2
====
---- QUERY
select * from t2
---- RESULTS
1,'50',2
1,'50',2
---- TYPES
bigint,string,int
====
---- QUERY
alter table t2 change column c2 int_col int comment 'changed to int col'
---- RESULTS
====
---- QUERY
alter table t2 change column c1 id_col bigint
---- RESULTS
====
---- QUERY
alter table t2 change column c3 c3 int comment 'added a comment'
---- RESULTS
====
---- QUERY
describe t2
---- RESULTS
'id_col','bigint','id col'
'int_col','int','changed to int col'
'c3','int','added a comment'
---- TYPES
string,string,string
====
---- QUERY
select * from t2
---- RESULTS
1,50,2
1,50,2
---- TYPES
bigint,int,int
====
---- QUERY
create external table jointbl_test like functional.jointbl
---- RESULTS
====
---- QUERY
# Expect new table to be empty
select * from jointbl_test
---- RESULTS
---- TYPES
bigint,string,int,int
====
---- QUERY
# change the location to point to some data
alter table jointbl_test set location '/test-warehouse/jointbl'
---- RESULTS
====
---- QUERY
# should get some results back now
select * from jointbl_test order by test_id limit 3
---- RESULTS
1001,'Name1',94611,5000
1002,'Name2',94611,5000
1003,'Name3',94611,5000
---- TYPES
bigint,string,int,int
====
---- QUERY
# change the location to point to some data in another file format
alter table jointbl_test set location '/test-warehouse/jointbl_seq'
---- RESULTS
====
---- QUERY
# since the file format doesn't match, wrong results will be returned
select test_id from jointbl_test order by test_id limit 3
---- RESULTS
NULL
---- TYPES
bigint
====
---- QUERY
# change the table fileformat to match the data
alter table jointbl_test set fileformat sequencefile
---- RESULTS
====
---- QUERY
# now the proper data should be returned
select * from jointbl_test order by test_id limit 3
---- RESULTS
1001,'Name1',94611,5000
1002,'Name2',94611,5000
1003,'Name3',94611,5000
---- TYPES
bigint,string,int,int
====
---- QUERY
# Create a partitioned table. Specify the location so we know what dirs need
# to be cleaned after the test finishes executing.
create external table t_part (i int) partitioned by (j int, s string)
location '/test-warehouse/t_part_tmp'
---- RESULTS
====
---- QUERY
alter table t_part add partition (j=cast(2-1 as int), s='2012')
---- RESULTS
====
---- QUERY
alter table t_part add if not exists partition (j=1, s='2012')
---- RESULTS
====
---- QUERY
alter table t_part add if not exists partition (j=1, s='2012/withslash')
---- RESULTS
====
---- QUERY
alter table t_part add partition (j=1, s=substring('foo2013bar', 4, 8))
---- RESULTS
====
---- QUERY
# Add another partition that points to the same location as another partition.
# This will cause the data to be read twice, but shouldn't result in an error.
alter table t_part add partition (j=100, s='same_location')
location '/test-warehouse/t_part_tmp/j=1/s=2012'
---- RESULTS
====
---- QUERY
# Add another partition that points to an existing data location that does not
# follow the key=value directory structure.
alter table t_part add partition (j=101, s='different_part_dir')
location '/test-warehouse/part_data/'
---- RESULTS
====
---- QUERY
insert overwrite table t_part partition(j=1, s='2012') select 2 from functional.alltypes limit 2
---- RESULTS
j=1/s=2012/: 2
====
---- QUERY
insert overwrite table t_part partition(j=1, s='2013') select 3 from functional.alltypes limit 3
---- RESULTS
j=1/s=2013/: 3
====
---- QUERY
insert overwrite table t_part partition(j=1, s='2012/withslash')
select 1 from functional.alltypes limit 1
---- RESULTS
j=1/s=2012%2Fwithslash/: 1
====
---- QUERY
select i, j, s from t_part
---- RESULTS
1,1,'2012/withslash'
2,1,'2012'
2,1,'2012'
2,100,'same_location'
2,100,'same_location'
3,1,'2013'
3,1,'2013'
3,1,'2013'
1984,101,'different_part_dir'
---- TYPES
int,int,string
====
---- QUERY
alter table t_part add partition (j=NULL, s='2013')
---- RESULTS
====
---- QUERY
alter table t_part add partition (j=NULL, s=NULL)
---- RESULTS
====
---- QUERY
# Drop the partition that points to a duplication location. The data will no longer
# be read twice.
alter table t_part drop partition (j=100, s='same_location')
---- RESULTS
====
---- QUERY
alter table t_part drop partition (j=101, s='different_part_dir')
---- RESULTS
====
---- QUERY
insert overwrite table t_part partition(j=NULL, s=NULL)
select 4 from functional.alltypes limit 5
---- RESULTS
j=__HIVE_DEFAULT_PARTITION__/s=__HIVE_DEFAULT_PARTITION__/: 5
====
---- QUERY
select i, j, s from t_part
---- RESULTS
1,1,'2012/withslash'
2,1,'2012'
2,1,'2012'
3,1,'2013'
3,1,'2013'
3,1,'2013'
4,NULL,'NULL'
4,NULL,'NULL'
4,NULL,'NULL'
4,NULL,'NULL'
4,NULL,'NULL'
---- TYPES
int,int,string
====
---- QUERY
insert overwrite table t_part partition(j=NULL, s='2013')
select 5 from functional.alltypes limit 5
---- RESULTS
j=__HIVE_DEFAULT_PARTITION__/s=2013/: 5
====
---- QUERY
select i, j, s from t_part
---- RESULTS
1,1,'2012/withslash'
2,1,'2012'
2,1,'2012'
3,1,'2013'
3,1,'2013'
3,1,'2013'
4,NULL,'NULL'
4,NULL,'NULL'
4,NULL,'NULL'
4,NULL,'NULL'
4,NULL,'NULL'
5,NULL,'2013'
5,NULL,'2013'
5,NULL,'2013'
5,NULL,'2013'
5,NULL,'2013'
---- TYPES
int,int,string
====
---- QUERY
alter table t_part drop partition (j=NULL, s=NULL)
---- RESULTS
====
---- QUERY
select i, j, s from t_part
---- RESULTS
1,1,'2012/withslash'
2,1,'2012'
2,1,'2012'
3,1,'2013'
3,1,'2013'
3,1,'2013'
5,NULL,'2013'
5,NULL,'2013'
5,NULL,'2013'
5,NULL,'2013'
5,NULL,'2013'
---- TYPES
int,int,string
====
---- QUERY
alter table t_part drop partition (j=NULL, s=trim(' 2013 '))
---- RESULTS
====
---- QUERY
select i, j, s from t_part
---- RESULTS
1,1,'2012/withslash'
2,1,'2012'
2,1,'2012'
3,1,'2013'
3,1,'2013'
3,1,'2013'
---- TYPES
int,int,string
====
---- QUERY
alter table t_part drop partition (j=1, s='2013')
---- RESULTS
====
---- QUERY
select i, j, s from t_part
---- RESULTS
1,1,'2012/withslash'
2,1,'2012'
2,1,'2012'
---- TYPES
int,int,string
====
---- QUERY
alter table t_part drop partition (j=1, s='2012/withslash')
---- RESULTS
====
---- QUERY
select i, j, s from t_part
---- RESULTS
2,1,'2012'
2,1,'2012'
---- TYPES
int,int,string
====
---- QUERY
# Test that empty string as partition column maps onto NULL
# using static partition insert
insert into table t_part partition(j=2, s='')
select 1 from functional.alltypes limit 1
---- RESULTS
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
====
---- QUERY
# Test that empty string as partition column maps onto NULL
# using dynamic partition insert
insert into table t_part partition(j=2, s)
select 10, '' from functional.alltypes limit 1
---- RESULTS
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
====
---- QUERY
# Validate the previous inserts
select i, j, s from t_part where s is NULL
---- RESULTS
1,2,'NULL'
10,2,'NULL'
---- TYPES
int,int,string
====
---- QUERY
# Drop default partition using empty string as key
alter table t_part drop partition (j=2, s='')
---- RESULTS
====
---- QUERY
# Validate previous partition drop
select i, j, s from t_part where s is NULL
---- RESULTS
---- TYPES
int,int,string
====
---- QUERY
# rename a partitioned table
alter table t_part rename to t_part2
---- RESULTS
====
---- QUERY
# only the new table shows up
show tables like 't_part*'
---- RESULTS
't_part2'
====
---- QUERY
# should be able to read the same data from this table
select i, j, s from t_part2
---- RESULTS
2,1,'2012'
2,1,'2012'
---- TYPES
int,int,string
====
---- QUERY
create external table alltypes_test like functional.alltypes
---- RESULTS
====
---- QUERY
alter table alltypes_test add partition(month=4, year=2009)
---- RESULTS
====
---- QUERY
alter table alltypes_test add partition(month=5, year=2009)
---- RESULTS
====
---- QUERY
# Table is empty
select int_col from alltypes_test
---- RESULTS
---- TYPES
int
====
---- QUERY
# Point one partition at some data (file format does not match table)
alter table alltypes_test partition(month=4, year=2009)
set location '/test-warehouse/alltypes_seq_snap/year=2009/month=4'
---- RESULTS
====
---- QUERY
select int_col from alltypes_test
group by int_col order by 1 limit 100
---- RESULTS
0
NULL
---- TYPES
int
====
---- QUERY
alter table alltypes_test partition(month=4, year=2009)
set fileformat sequencefile
---- RESULTS
====
---- QUERY
select int_col, count(*) from alltypes_test
group by int_col order by 1 limit 100
---- RESULTS
0,30
1,30
2,30
3,30
4,30
5,30
6,30
7,30
8,30
9,30
---- TYPES
int,bigint
====
---- QUERY
# Point the other partition at some more data. This time in a different
# file format.
alter table alltypes_test partition(month=cast(1+4 as int), year=cast(100*20+9 as int))
set location '/test-warehouse/alltypes_rc/year=2009/month=5'
---- RESULTS
====
---- QUERY
alter table alltypes_test partition(month=cast(2+3 as int), year=2009)
set fileformat rcfile
---- RESULTS
====
---- QUERY
select int_col, count(*) from alltypes_test
group by int_col order by 1 limit 100
---- RESULTS
0,61
1,61
2,61
3,61
4,61
5,61
6,61
7,61
8,61
9,61
---- TYPES
int,bigint
====