mirror of
https://github.com/apache/impala.git
synced 2026-01-01 09:00:42 -05:00
This change adds support for cluster-synchronized catalog operations. This provides the guaranteethat after a catalog op completes, all other subscribers to the catalog topic have also processed that update. This is useful when load balancing, because a common workflow is to target a different impalad for each statement executed. For example if each of the following were executed sequentially, but targeting a different node: 1) CREATE TABLE Foo 2) INSERT INTO Foo 3) SELECT * FROM Foo 4) INSERT INTO Foo .... Since both the INSERT and the CREATE update the catalog, it would not work as expected without this patch. The user might either get a "table not found" error or would be missing partition information from the INSERT. The downside is that this approach to DDL takes a bit longer because we need to wait until all subscribers have processed an update. If all nodes are healthy, this overhead should not be significantly longer than the current DDL time. However, a single bad node might slow down or completely block the completion of all DDL operations. By default this feature is disabled, but it can be enabled using a new query option: SYNCED_DDL=1 To test this, the base test suite was updated to support selecting a random impalad to execute each query section in a query test file. This is currently only enabled for the insert and DDL tests, but could be leveraged by more tests in the future. TODO: Add additional failure tests around this functionality. TODO: Add an explicit "sync" statement so users do not need to run all their DDL in this mode (since it is slower). Change-Id: I45e757a931bf2a4740cc0cdd1e76ce49a1e22b83 Reviewed-on: http://gerrit.ent.cloudera.com:8080/899 Reviewed-by: Ishaan Joshi <ishaan@cloudera.com> Tested-by: jenkins
543 lines
11 KiB
Plaintext
543 lines
11 KiB
Plaintext
====
|
|
---- QUERY
|
|
use alter_table_test_db
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# First create an unpartitioned table
|
|
create external table t1 (i int) location '/test-warehouse/t1_tmp1'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t1
|
|
---- RESULTS
|
|
'i','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Add some columns
|
|
alter table t1 add columns (t tinyint, s string comment 'Str Col')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t1
|
|
---- RESULTS
|
|
'i','int',''
|
|
't','tinyint',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table t1 rename to t2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
't2'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Move the table to a different database
|
|
alter table t2 rename to alter_table_test_db2.t1_inotherdb
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# No longer appears in this database
|
|
show tables
|
|
---- RESULTS
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Shows up in the second database
|
|
show tables in alter_table_test_db2
|
|
---- RESULTS
|
|
't1_inotherdb'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Move the table back to this database
|
|
alter table alter_table_test_db2.t1_inotherdb rename to t2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# make sure the new table shows the same columns as the old table
|
|
describe t2
|
|
---- RESULTS
|
|
'i','int',''
|
|
't','tinyint',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table t2 drop column t
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# The dropped column no longer shows up
|
|
describe t2
|
|
---- RESULTS
|
|
'i','int',''
|
|
's','string','Str Col'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Replace the columns with new values
|
|
alter table t2 replace columns (c1 bigint comment 'id col', c2 string, c3 int)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'c1','bigint','id col'
|
|
'c2','string',''
|
|
'c3','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Should be able to read/write using the new column types
|
|
insert overwrite table t2 select 1, '50', 2 from functional.alltypes limit 2
|
|
---- RESULTS
|
|
: 2
|
|
====
|
|
---- QUERY
|
|
select * from t2
|
|
---- RESULTS
|
|
1,'50',2
|
|
1,'50',2
|
|
---- TYPES
|
|
bigint,string,int
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c2 int_col int comment 'changed to int col'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c1 id_col bigint
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t2 change column c3 c3 int comment 'added a comment'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe t2
|
|
---- RESULTS
|
|
'id_col','bigint','id col'
|
|
'int_col','int','changed to int col'
|
|
'c3','int','added a comment'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
select * from t2
|
|
---- RESULTS
|
|
1,50,2
|
|
1,50,2
|
|
---- TYPES
|
|
bigint,int,int
|
|
====
|
|
---- QUERY
|
|
create external table jointbl_test like functional.jointbl
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Expect new table to be empty
|
|
select * from jointbl_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# change the location to point to some data
|
|
alter table jointbl_test set location '/test-warehouse/jointbl'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# should get some results back now
|
|
select * from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000
|
|
1002,'Name2',94611,5000
|
|
1003,'Name3',94611,5000
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# change the location to point to some data in another file format
|
|
alter table jointbl_test set location '/test-warehouse/jointbl_seq'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# since the file format doesn't match, wrong results will be returned
|
|
select test_id from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
NULL
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# change the table fileformat to match the data
|
|
alter table jointbl_test set fileformat sequencefile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# now the proper data should be returned
|
|
select * from jointbl_test order by test_id limit 3
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000
|
|
1002,'Name2',94611,5000
|
|
1003,'Name3',94611,5000
|
|
---- TYPES
|
|
bigint,string,int,int
|
|
====
|
|
---- QUERY
|
|
# Create a partitioned table. Specify the location so we know what dirs need
|
|
# to be cleaned after the test finishes executing.
|
|
create external table t_part (i int) partitioned by (j int, s string)
|
|
location '/test-warehouse/t_part_tmp'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=cast(2-1 as int), s='2012')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add if not exists partition (j=1, s='2012')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add if not exists partition (j=1, s='2012/withslash')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=1, s=substring('foo2013bar', 4, 8))
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Add another partition that points to the same location as another partition.
|
|
# This will cause the data to be read twice, but shouldn't result in an error.
|
|
alter table t_part add partition (j=100, s='same_location')
|
|
location '/test-warehouse/t_part_tmp/j=1/s=2012'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Add another partition that points to an existing data location that does not
|
|
# follow the key=value directory structure.
|
|
alter table t_part add partition (j=101, s='different_part_dir')
|
|
location '/test-warehouse/part_data/'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2012') select 2 from functional.alltypes limit 2
|
|
---- RESULTS
|
|
j=1/s=2012/: 2
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2013') select 3 from functional.alltypes limit 3
|
|
---- RESULTS
|
|
j=1/s=2013/: 3
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=1, s='2012/withslash')
|
|
select 1 from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=1/s=2012%2Fwithslash/: 1
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
2,100,'same_location'
|
|
2,100,'same_location'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
1984,101,'different_part_dir'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=NULL, s='2013')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part add partition (j=NULL, s=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Drop the partition that points to a duplication location. The data will no longer
|
|
# be read twice.
|
|
alter table t_part drop partition (j=100, s='same_location')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=101, s='different_part_dir')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=NULL, s=NULL)
|
|
select 4 from functional.alltypes limit 5
|
|
---- RESULTS
|
|
j=__HIVE_DEFAULT_PARTITION__/s=__HIVE_DEFAULT_PARTITION__/: 5
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
insert overwrite table t_part partition(j=NULL, s='2013')
|
|
select 5 from functional.alltypes limit 5
|
|
---- RESULTS
|
|
j=__HIVE_DEFAULT_PARTITION__/s=2013/: 5
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
4,NULL,'NULL'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=NULL, s=NULL)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
5,NULL,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=NULL, s=trim(' 2013 '))
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
3,1,'2013'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=1, s='2013')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
1,1,'2012/withslash'
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
alter table t_part drop partition (j=1, s='2012/withslash')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select i, j, s from t_part
|
|
---- RESULTS
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# Test that empty string as partition column maps onto NULL
|
|
# using static partition insert
|
|
insert into table t_part partition(j=2, s='')
|
|
select 1 from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
|
|
====
|
|
---- QUERY
|
|
# Test that empty string as partition column maps onto NULL
|
|
# using dynamic partition insert
|
|
insert into table t_part partition(j=2, s)
|
|
select 10, '' from functional.alltypes limit 1
|
|
---- RESULTS
|
|
j=2/s=__HIVE_DEFAULT_PARTITION__/: 1
|
|
====
|
|
---- QUERY
|
|
# Validate the previous inserts
|
|
select i, j, s from t_part where s is NULL
|
|
---- RESULTS
|
|
1,2,'NULL'
|
|
10,2,'NULL'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# Drop default partition using empty string as key
|
|
alter table t_part drop partition (j=2, s='')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Validate previous partition drop
|
|
select i, j, s from t_part where s is NULL
|
|
---- RESULTS
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
# rename a partitioned table
|
|
alter table t_part rename to t_part2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# only the new table shows up
|
|
show tables like 't_part*'
|
|
---- RESULTS
|
|
't_part2'
|
|
====
|
|
---- QUERY
|
|
# should be able to read the same data from this table
|
|
select i, j, s from t_part2
|
|
---- RESULTS
|
|
2,1,'2012'
|
|
2,1,'2012'
|
|
---- TYPES
|
|
int,int,string
|
|
====
|
|
---- QUERY
|
|
create external table alltypes_test like functional.alltypes
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test add partition(month=4, year=2009)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test add partition(month=5, year=2009)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Table is empty
|
|
select int_col from alltypes_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
int
|
|
====
|
|
---- QUERY
|
|
# Point one partition at some data (file format does not match table)
|
|
alter table alltypes_test partition(month=4, year=2009)
|
|
set location '/test-warehouse/alltypes_seq_snap/year=2009/month=4'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0
|
|
NULL
|
|
---- TYPES
|
|
int
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test partition(month=4, year=2009)
|
|
set fileformat sequencefile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col, count(*) from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0,30
|
|
1,30
|
|
2,30
|
|
3,30
|
|
4,30
|
|
5,30
|
|
6,30
|
|
7,30
|
|
8,30
|
|
9,30
|
|
---- TYPES
|
|
int,bigint
|
|
====
|
|
---- QUERY
|
|
# Point the other partition at some more data. This time in a different
|
|
# file format.
|
|
alter table alltypes_test partition(month=cast(1+4 as int), year=cast(100*20+9 as int))
|
|
set location '/test-warehouse/alltypes_rc/year=2009/month=5'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
alter table alltypes_test partition(month=cast(2+3 as int), year=2009)
|
|
set fileformat rcfile
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select int_col, count(*) from alltypes_test
|
|
group by int_col order by 1 limit 100
|
|
---- RESULTS
|
|
0,61
|
|
1,61
|
|
2,61
|
|
3,61
|
|
4,61
|
|
5,61
|
|
6,61
|
|
7,61
|
|
8,61
|
|
9,61
|
|
---- TYPES
|
|
int,bigint
|
|
====
|