mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
Kudu engine recently enables the auto-incrementing column feature (KUDU-1945). The feature works by appending a system generated auto-incrementing column to the primary key columns to guarantee the uniqueness on primary key when the primary key columns can be non unique. The non unique primary key columns and the auto-incrementing column form the effective unique composite primary key. This auto-incrementing column is named as 'auto_incrementing_id' with big int type. The assignment to it during insertion is automatic so insertion statements should not specify values for auto-incrementing column. In current Kudu implementation, there is no central key provider for auto-incrementing columns. It uses a per tablet-server global counter to assign values for auto-incrementing columns. So the values of auto-incrementing columns are not unique in a Kudu table, but unique within a continuous region of the table served by a tablet-server. This patch also upgraded Kudu version to 345fd44ca3 to pick up Kudu changes needed for supporting non-unique primary key. It added syntactic support for creating Kudu table with non unique primary key. When creating a Kudu table, specifying PRIMARY KEY is optional. If there is no primary key attribute specified, the partition key columns will be promoted as non unique primary key if those columns are the beginning columns of the table. New column "key_unique" is added to the output of 'describe' table command for Kudu table. Examples of CREATE TABLE statement with non unique primary key: CREATE TABLE tbl (i INT NON UNIQUE PRIMARY KEY, s STRING) PARTITION BY HASH (i) PARTITIONS 3 STORED as KUDU; CREATE TABLE tbl (i INT, s STRING, NON UNIQUE PRIMARY KEY(i)) PARTITION BY HASH (i) PARTITIONS 3 STORED as KUDU; CREATE TABLE tbl NON UNIQUE PRIMARY KEY(id) PARTITION BY HASH (id) PARTITIONS 3 STORED as KUDU AS SELECT id, string_col FROM functional.alltypes WHERE id = 10; CREATE TABLE tbl NON UNIQUE PRIMARY KEY(id) PARTITION BY RANGE (id) (PARTITION VALUES <= 1000, PARTITION 1000 < VALUES <= 2000, PARTITION 2000 < VALUES <= 3000, PARTITION 3000 < VALUES) STORED as KUDU AS SELECT id, int_col FROM functional.alltypestiny ORDER BY id ASC LIMIT 4000; CREATE TABLE tbl (id INT, name STRING, NON UNIQUE PRIMARY KEY(id)) STORED as KUDU; CREATE TABLE tbl (a INT, b STRING, c FLOAT) PARTITION BY HASH (a, b) PARTITIONS 3 STORED as KUDU; SELECT statement does not show the system generated auto-incrementing column unless the column is explicitly specified in the select list. Auto-incrementing column cannot be added, removed or renamed with ALTER TABLE statements. UPSERT operation is not supported now for Kudu tables with auto incrementing column due to limitation in Kudu engine. Testing: - Ran manual test in impala-shell with queries to create Kudu tables with non unique primary key, and tested insert/update/delete operations for these tables with non unique primary key. - Added front end tests, and end to end unit tests for Kudu tables with non unique primary key. - Passed exhaustive test. Change-Id: I4d7882bf3d01a3492cc9827c072d1f3200d9eebd Reviewed-on: http://gerrit.cloudera.org:8080/19383 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
254 lines
6.7 KiB
Plaintext
254 lines
6.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Make sure LIMIT is enforced.
|
|
select * from functional_kudu.dimtbl order by id limit 1;
|
|
---- RESULTS
|
|
1001,'Name1',94611
|
|
---- TYPES
|
|
BIGINT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
# Make sure that we can list the columns to be scanned in any order, that predicates
|
|
# work and that we can have predicates on columns not referenced elsewhere.
|
|
select zip, id from functional_kudu.dimtbl where id >= 1000 and 1002 >= id and
|
|
94611 = zip and 'Name1' = name order by id;
|
|
---- RESULTS
|
|
94611,1001
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2740, a NULL value from a previously filtered row would
|
|
# carry over into the next unfiltered row (the result below would incorrectly be 2,NULL).
|
|
CREATE TABLE impala_2740 (key INT PRIMARY KEY, value INT)
|
|
PARTITION BY HASH (key) PARTITIONS 3 STORED AS KUDU;
|
|
INSERT INTO impala_2740 VALUES (1, NULL), (2, -2);
|
|
SELECT * FROM impala_2740 WHERE key != 1;
|
|
---- RESULTS
|
|
2,-2
|
|
---- TYPES
|
|
INT, INT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2635, the Kudu scanner hangs waiting for data from scanner
|
|
# threads that are never started. The union and both scans land in the same fragment which
|
|
# is run on all impalads. However, for the t1 table there is only as single scan range,
|
|
# so two of the scan instances get empty scan ranges.
|
|
CREATE TABLE impala_2635_t1 (id BIGINT PRIMARY KEY, name STRING)
|
|
PARTITION BY HASH (id) PARTITIONS 3 STORED AS KUDU;
|
|
CREATE TABLE impala_2635_t2 (id BIGINT PRIMARY KEY, name STRING)
|
|
PARTITION BY HASH(id) PARTITIONS 16 STORED AS KUDU;
|
|
INSERT INTO impala_2635_t1 VALUES (0, 'Foo');
|
|
INSERT INTO impala_2635_t2 VALUES (1, 'Blah');
|
|
SELECT * FROM impala_2635_t1 UNION ALL SELECT * FROM impala_2635_t2;
|
|
---- RESULTS
|
|
0,'Foo'
|
|
1,'Blah'
|
|
---- TYPES
|
|
BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4408: Test Kudu scans where all materialized slots are non-nullable.
|
|
select count(int_col) from functional_kudu.tinyinttable
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4859: Test Kudu IS NULL/IS NOT NULL pushdown
|
|
select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is null;
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is not null;
|
|
---- RESULTS
|
|
9
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# alltypes.id is primary key/not nullable, verify IS NOT NULL/IS NULL pushdown works
|
|
select count(*) from functional_kudu.alltypes where id is not null;
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_kudu.alltypes where id is null;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Push down TIMESTAMP binary predicates
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col <= cast('2009-01-01 00:08:00.28' as timestamp) and
|
|
timestamp_col >= cast('2009-01-01 00:04:00.6' as timestamp)
|
|
order by id;
|
|
---- RESULTS
|
|
4,2009-01-01 00:04:00.600000000
|
|
5,2009-01-01 00:05:00.100000000
|
|
6,2009-01-01 00:06:00.150000000
|
|
7,2009-01-01 00:07:00.210000000
|
|
8,2009-01-01 00:08:00.280000000
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Out-of-range TIMESTAMP predicate (evaluates to NULL)
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col > cast('1000-01-01 00:00:00.00' as timestamp)
|
|
---- RESULTS
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col < cast('2009-01-01 00:08:00.28' as timestamp) and
|
|
timestamp_col > cast('2009-01-01 00:04:00.6' as timestamp)
|
|
order by id;
|
|
---- RESULTS
|
|
5,2009-01-01 00:05:00.100000000
|
|
6,2009-01-01 00:06:00.150000000
|
|
7,2009-01-01 00:07:00.210000000
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col = cast('2009-01-01 00:08:00.28' as timestamp);
|
|
---- RESULTS
|
|
8,2009-01-01 00:08:00.280000000
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Push down TIMESTAMP IN list predicates
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col in (cast('2010-03-01 00:00:00' as timestamp),
|
|
cast('2010-03-01 00:01:00' as timestamp))
|
|
order by id;
|
|
---- RESULTS
|
|
4240,2010-03-01 00:00:00
|
|
4241,2010-03-01 00:01:00
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Push down VARCHAR predicates
|
|
CREATE TABLE kudu_varchar_pred (key INT PRIMARY KEY, varchar_col VARCHAR(10))
|
|
PARTITION BY HASH (key) PARTITIONS 4 STORED AS KUDU;
|
|
INSERT INTO kudu_varchar_pred VALUES
|
|
(1, cast('a' as VARCHAR(10))),
|
|
(2, cast('b' as VARCHAR(10))),
|
|
(3, cast('m' as VARCHAR(10))),
|
|
(4, cast('y' as VARCHAR(10))),
|
|
(5, cast('z' as VARCHAR(10))),
|
|
(6, NULL);
|
|
select key, varchar_col from kudu_varchar_pred where
|
|
varchar_col >= cast('b' as VARCHAR(10)) and
|
|
varchar_col <= cast('y' as VARCHAR(10))
|
|
order by key;
|
|
---- RESULTS
|
|
2,'b'
|
|
3,'m'
|
|
4,'y'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-6187. Make sure count(*) queries with partition columns only
|
|
# won't miss conjuncts evaluation. 'id' is the partition column here.
|
|
select count(*) from functional_kudu.alltypes where rand() + id < 0.0;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique primary key
|
|
create table non_unique_key_scan_tbl1 non unique primary key (id)
|
|
partition by range (id)
|
|
(partition value = 0, partition value = 1,
|
|
partition value = 2, partition value = 3,
|
|
partition value = 4, partition value = 5,
|
|
partition value = 6, partition value = 7)
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
---- RESULTS
|
|
'Inserted 8 row(s)'
|
|
====
|
|
---- QUERY
|
|
# auto-incrementing column is not shown for SELECT *
|
|
select * from non_unique_key_scan_tbl1 order by id asc;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# auto-incrementing column is shown when the column is specified in SELECT statement
|
|
select id, int_col, auto_incrementing_id from non_unique_key_scan_tbl1 order by id asc,
|
|
auto_incrementing_id desc;
|
|
---- RESULTS
|
|
0,0,1
|
|
1,1,1
|
|
2,0,1
|
|
3,1,1
|
|
4,0,1
|
|
5,1,1
|
|
6,0,1
|
|
7,1,1
|
|
---- TYPES
|
|
INT,INT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Query with auto-incrementing column in where clause
|
|
select id, int_col, auto_incrementing_id from non_unique_key_scan_tbl1
|
|
where auto_incrementing_id = 1 and id < 3
|
|
group by id, int_col, auto_incrementing_id;
|
|
---- RESULTS
|
|
0,0,1
|
|
1,1,1
|
|
2,0,1
|
|
---- TYPES
|
|
INT,INT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Create unpartitioned Kudu table with non unique primary key.
|
|
create table non_unique_key_scan_tbl2 non unique primary key (id)
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny order by id asc limit 100;
|
|
---- RESULTS
|
|
'Inserted 8 row(s)'
|
|
====
|
|
---- QUERY
|
|
# Query with auto-incrementing column in ORDER BY.
|
|
# All rows are added to one tablet-server so auto_incrementing_id shows insertion order.
|
|
select id, int_col, auto_incrementing_id from non_unique_key_scan_tbl2
|
|
order by auto_incrementing_id asc;
|
|
---- RESULTS
|
|
0,0,1
|
|
1,1,2
|
|
2,0,3
|
|
3,1,4
|
|
4,0,5
|
|
5,1,6
|
|
6,0,7
|
|
7,1,8
|
|
---- TYPES
|
|
INT,INT,BIGINT
|
|
==== |