mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
This patch fixes two problems: - Previously a CTAS into a Kudu table where an expr rewrite occurred would create an unpartitioned table, due to the partition info being reset in TableDataLayout and then never reconstructed. Since the Kudu partition info is set by the parser and never changes, the solution is to not reset it. - Previously a CTAS into a Kudu table with a range partition where an expr rewrite occurred would fail with an analysis exception due to a Precondition check in RangePartition.analyze that checked that the RangePartition wasn't already analyzed, as the analysis can't be done twice. Since the state in RangePartition never changes, it doesn't need to be reanalyzed and we can just return instead of failing on the check. Testing: - Added an e2e test that creates a partitioned Kudu table with a CTAS with a rewrite, and checks that the expected partitions are created. Change-Id: I731743bd84cc695119e99342e1b155096147f0ed Reviewed-on: http://gerrit.cloudera.org:8080/10251 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
348 lines
10 KiB
Plaintext
348 lines
10 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table t primary key (id) partition by hash (id) partitions 3
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
select * from t;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Boolean primary key column
|
|
create table tab (x int, y boolean, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Float primary key column
|
|
create table tab (x int, y float, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be declared first
|
|
# TODO: See KUDU-1709 for improving Kudu error messages.
|
|
create table tab (x int, y int, primary key(y))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: Got out-of-order key column: name: "y" type: INT32 is_key: true is_nullable: false cfile_block_size: 0
|
|
====
|
|
---- QUERY
|
|
# Small number of hash partitions
|
|
create table tab (a int, b int, c int, d int, primary key(a, b, c))
|
|
partition by hash(a,b) partitions 8, hash(c) partitions 1 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: must have at least two hash buckets
|
|
====
|
|
---- QUERY
|
|
# Same column in multiple hash based distributions
|
|
create table tab (a int, b int, primary key (a))
|
|
partition by hash (a) partitions 3, hash (a) partitions 2 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Same column referenced multiple times in the same hash-based distribution
|
|
create table tab (a int primary key) partition by hash (a, a, a) partitions 3
|
|
stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Kudu table that uses Impala keywords as table name and column names
|
|
create table `add`(`analytic` int, `function` int, primary key(`analytic`, `function`))
|
|
partition by hash (`analytic`) partitions 4, range (`function`)
|
|
(partition values <= 1, partition 1 < values <= 10, partition 10 < values) stored as kudu;
|
|
insert into `add` select id, int_col from functional.alltypestiny;
|
|
select * from `add`
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Test implicit casting/folding of partition values.
|
|
create table tab (a int not null primary key)
|
|
partition by range (a) (partition value = false)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Invalid hostname
|
|
create table tdata_bogus_host (id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus host name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus host name:7051
|
|
====
|
|
---- QUERY
|
|
# Non-existing host
|
|
create table tdata_non_existing_host
|
|
(id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus.host.name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus.host.name:7051
|
|
====
|
|
---- QUERY
|
|
# Valid host with whitespace
|
|
create table tdata_master_addresses_whitespace (id int primary key) stored as kudu
|
|
tblproperties('kudu.master_addresses' = ' localhost ')
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tdata_master_addresses_whitespace values (0), (1)
|
|
---- DML_RESULTS: tdata_master_addresses_whitespace
|
|
0
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Test that string case is ignored
|
|
create table ignore_column_case (Id int, NAME string, vAlf float, vali bigint,
|
|
primary key (Id, NAME)) PARTITION BY RANGE (PARTITION VALUE = (1, 'Martin'))
|
|
STORED AS KUDU
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ignore_column_case values (1, 'Martin', 1.0, 10);
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select ID, nAmE, VALF, VALI from ignore_column_case where NaMe = 'Martin';
|
|
---- RESULTS
|
|
1,'Martin',1.0,10
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Using NULL as default values
|
|
create table tbl_with_null_defaults (x int primary key, i1 tinyint default null,
|
|
i2 smallint default null, i3 int default null, i4 bigint default null,
|
|
vals string default null, valf float default null, vald double default null,
|
|
valb boolean default null, valdec4 decimal(9) default null,
|
|
valdec8 decimal(18) default null, valdec16 decimal(38) default null)
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_null_defaults (x) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
X, I1, I2, I3, I4, VALS, VALF, VALD, VALB, VALDEC4, VALDEC8, VALDEC16
|
|
---- DML_RESULTS: tbl_with_null_defaults
|
|
1,NULL,NULL,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,TINYINT,SMALLINT,INT,BIGINT,STRING,FLOAT,DOUBLE,BOOLEAN,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Overlapping ranges are rejected by the Kudu client
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition cast('2009-01-02 10:00:00' as timestamp) <= VALUES
|
|
) stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: overlapping range partitions: first range partition: 2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z, second range partition: VALUES >= 2009-01-02T10:00:00.000000Z
|
|
====
|
|
---- QUERY
|
|
# Creates a range partitioned Kudu table with a timestamp PK. Note that nanoseconds
|
|
# are rounded (same behavior as when writing timestamp values to Kudu).
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition nanoseconds_add(cast('2009-01-01 00:00:00' as timestamp), 999) <= VALUES <
|
|
nanoseconds_add(cast('2009-01-02 00:00:00' as timestamp), 1),
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition '2009-01-03 00:00:00' <= VALUES
|
|
) stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges
|
|
---- RESULTS
|
|
'2009-01-01T00:00:00.000001Z <= VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000000Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
create table ts_ranges_ctas
|
|
primary key (ts)
|
|
partition by range (
|
|
partition VALUES < '2009-01-02 00:00:00',
|
|
partition '2009-01-02 00:00:00' <= VALUES <
|
|
'2009-01-03 00:00:00',
|
|
partition '2009-01-03 00:00:00' < VALUES,
|
|
partition VALUE = ('2009-01-03 00:00:00')
|
|
)
|
|
stored as kudu
|
|
as select timestamp_col ts, id from functional.alltypestiny;
|
|
---- RESULTS
|
|
'Inserted 8 row(s)'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges_ctas
|
|
---- RESULTS
|
|
'VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUE = 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000001Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select * from ts_ranges_ctas order by id
|
|
---- RESULTS
|
|
2009-01-01 00:00:00,0
|
|
2009-01-01 00:01:00,1
|
|
2009-02-01 00:00:00,2
|
|
2009-02-01 00:01:00,3
|
|
2009-03-01 00:00:00,4
|
|
2009-03-01 00:01:00,5
|
|
2009-04-01 00:00:00,6
|
|
2009-04-01 00:01:00,7
|
|
---- TYPES
|
|
TIMESTAMP,INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with timestamp column default values.
|
|
create table ts_default (i int primary key, ts1 timestamp,
|
|
ts2 timestamp default cast('2009-01-01 00:00:00' as timestamp))
|
|
partition by hash(i) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i, ts1, ts2) values (2, NULL, NULL);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
2,NULL,NULL
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# create an unpartitioned table
|
|
create table unpartitioned_kudu_table (col0 bigint primary key, col1 string)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
insert into unpartitioned_kudu_table values (0, 'zero'), (1, 'one')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
COL0,COL1
|
|
---- DML_RESULTS: unpartitioned_kudu_table
|
|
0,'zero'
|
|
1,'one'
|
|
---- TYPES
|
|
BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
create table unpartitioned_kudu_table2 primary key(id) stored as kudu
|
|
as select id from functional.alltypestiny where id > 4
|
|
---- RESULTS
|
|
'Inserted 3 row(s)'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
select * from unpartitioned_kudu_table2
|
|
---- RESULTS
|
|
5
|
|
6
|
|
7
|
|
---- LABELS
|
|
ID
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with decimal columns and primary key
|
|
create table create_decimal
|
|
(
|
|
decimal_4 decimal(9, 9),
|
|
decimal_8 decimal(18, 2) not null default 100.00,
|
|
decimal_16 decimal(38, 0) null,
|
|
primary key (decimal_4))
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create as select table with decimal columns and primary key
|
|
create table ctas_decimal primary key (d1,d2,d3)
|
|
stored as kudu
|
|
as select * from functional.decimal_tbl;
|
|
select * from ctas_decimal;
|
|
---- RESULTS
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6954: CTAS with an expr rewrite.
|
|
create table ctas_rewrite primary key(id)
|
|
partition by range(id) (partition 0 <= values < 100) stored as kudu
|
|
as select id, tinyint_col from functional.alltypes
|
|
where id between 0 and 1;
|
|
show range partitions ctas_rewrite;
|
|
---- RESULTS
|
|
'0 <= VALUES < 100'
|
|
==== |