Files
impala/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
wzhou-code 40da36414f IMPALA-11809: Support non unique primary key for Kudu
Kudu engine recently enables the auto-incrementing column feature
(KUDU-1945). The feature works by appending a system generated
auto-incrementing column to the primary key columns to guarantee the
uniqueness on primary key when the primary key columns can be non
unique. The non unique primary key columns and the auto-incrementing
column form the effective unique composite primary key.

This auto-incrementing column is named as 'auto_incrementing_id' with
big int type. The assignment to it during insertion is automatic so
insertion statements should not specify values for auto-incrementing
column. In current Kudu implementation, there is no central key provider
for auto-incrementing columns. It uses a per tablet-server global
counter to assign values for auto-incrementing columns. So the values
of auto-incrementing columns are not unique in a Kudu table, but unique
within a continuous region of the table served by a tablet-server.

This patch also upgraded Kudu version to 345fd44ca3 to pick up Kudu
changes needed for supporting non-unique primary key. It added
syntactic support for creating Kudu table with non unique primary key.
When creating a Kudu table, specifying PRIMARY KEY is optional.
If there is no primary key attribute specified, the partition key
columns will be promoted as non unique primary key if those columns
are the beginning columns of the table.
New column "key_unique" is added to the output of 'describe' table
command for Kudu table.

Examples of CREATE TABLE statement with non unique primary key:
  CREATE TABLE tbl (i INT NON UNIQUE PRIMARY KEY, s STRING)
  PARTITION BY HASH (i) PARTITIONS 3
  STORED as KUDU;

  CREATE TABLE tbl (i INT, s STRING, NON UNIQUE PRIMARY KEY(i))
  PARTITION BY HASH (i) PARTITIONS 3
  STORED as KUDU;

  CREATE TABLE tbl NON UNIQUE PRIMARY KEY(id)
  PARTITION BY HASH (id) PARTITIONS 3
  STORED as KUDU
  AS SELECT id, string_col FROM functional.alltypes WHERE id = 10;

  CREATE TABLE tbl NON UNIQUE PRIMARY KEY(id)
  PARTITION BY RANGE (id)
  (PARTITION VALUES <= 1000,
   PARTITION 1000 < VALUES <= 2000,
   PARTITION 2000 < VALUES <= 3000,
   PARTITION 3000 < VALUES)
  STORED as KUDU
  AS SELECT id, int_col FROM functional.alltypestiny ORDER BY id ASC
   LIMIT 4000;

  CREATE TABLE tbl (id INT, name STRING, NON UNIQUE PRIMARY KEY(id))
  STORED as KUDU;

  CREATE TABLE tbl (a INT, b STRING, c FLOAT)
  PARTITION BY HASH (a, b) PARTITIONS 3
  STORED as KUDU;

SELECT statement does not show the system generated auto-incrementing
column unless the column is explicitly specified in the select list.
Auto-incrementing column cannot be added, removed or renamed with
ALTER TABLE statements.
UPSERT operation is not supported now for Kudu tables with auto
incrementing column due to limitation in Kudu engine.

Testing:
 - Ran manual test in impala-shell with queries to create Kudu tables
   with non unique primary key, and tested insert/update/delete
   operations for these tables with non unique primary key.
 - Added front end tests, and end to end unit tests for Kudu tables
   with non unique primary key.
 - Passed exhaustive test.

Change-Id: I4d7882bf3d01a3492cc9827c072d1f3200d9eebd
Reviewed-on: http://gerrit.cloudera.org:8080/19383
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2023-02-04 07:34:56 +00:00

254 lines
6.7 KiB
Plaintext

====
---- QUERY
# Make sure LIMIT is enforced.
select * from functional_kudu.dimtbl order by id limit 1;
---- RESULTS
1001,'Name1',94611
---- TYPES
BIGINT, STRING, INT
====
---- QUERY
# Make sure that we can list the columns to be scanned in any order, that predicates
# work and that we can have predicates on columns not referenced elsewhere.
select zip, id from functional_kudu.dimtbl where id >= 1000 and 1002 >= id and
94611 = zip and 'Name1' = name order by id;
---- RESULTS
94611,1001
---- TYPES
INT, BIGINT
====
---- QUERY
# Regression test for IMPALA-2740, a NULL value from a previously filtered row would
# carry over into the next unfiltered row (the result below would incorrectly be 2,NULL).
CREATE TABLE impala_2740 (key INT PRIMARY KEY, value INT)
PARTITION BY HASH (key) PARTITIONS 3 STORED AS KUDU;
INSERT INTO impala_2740 VALUES (1, NULL), (2, -2);
SELECT * FROM impala_2740 WHERE key != 1;
---- RESULTS
2,-2
---- TYPES
INT, INT
====
---- QUERY
# Regression test for IMPALA-2635, the Kudu scanner hangs waiting for data from scanner
# threads that are never started. The union and both scans land in the same fragment which
# is run on all impalads. However, for the t1 table there is only as single scan range,
# so two of the scan instances get empty scan ranges.
CREATE TABLE impala_2635_t1 (id BIGINT PRIMARY KEY, name STRING)
PARTITION BY HASH (id) PARTITIONS 3 STORED AS KUDU;
CREATE TABLE impala_2635_t2 (id BIGINT PRIMARY KEY, name STRING)
PARTITION BY HASH(id) PARTITIONS 16 STORED AS KUDU;
INSERT INTO impala_2635_t1 VALUES (0, 'Foo');
INSERT INTO impala_2635_t2 VALUES (1, 'Blah');
SELECT * FROM impala_2635_t1 UNION ALL SELECT * FROM impala_2635_t2;
---- RESULTS
0,'Foo'
1,'Blah'
---- TYPES
BIGINT, STRING
====
---- QUERY
# IMPALA-4408: Test Kudu scans where all materialized slots are non-nullable.
select count(int_col) from functional_kudu.tinyinttable
---- RESULTS
10
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-4859: Test Kudu IS NULL/IS NOT NULL pushdown
select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is null;
---- RESULTS
2
---- TYPES
BIGINT
====
---- QUERY
select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is not null;
---- RESULTS
9
---- TYPES
BIGINT
====
---- QUERY
# alltypes.id is primary key/not nullable, verify IS NOT NULL/IS NULL pushdown works
select count(*) from functional_kudu.alltypes where id is not null;
---- RESULTS
7300
---- TYPES
BIGINT
====
---- QUERY
select count(*) from functional_kudu.alltypes where id is null;
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Push down TIMESTAMP binary predicates
select id, timestamp_col from functional_kudu.alltypes where
timestamp_col <= cast('2009-01-01 00:08:00.28' as timestamp) and
timestamp_col >= cast('2009-01-01 00:04:00.6' as timestamp)
order by id;
---- RESULTS
4,2009-01-01 00:04:00.600000000
5,2009-01-01 00:05:00.100000000
6,2009-01-01 00:06:00.150000000
7,2009-01-01 00:07:00.210000000
8,2009-01-01 00:08:00.280000000
---- TYPES
INT, TIMESTAMP
====
---- QUERY
# Out-of-range TIMESTAMP predicate (evaluates to NULL)
select id, timestamp_col from functional_kudu.alltypes where
timestamp_col > cast('1000-01-01 00:00:00.00' as timestamp)
---- RESULTS
---- TYPES
INT, TIMESTAMP
====
---- QUERY
select id, timestamp_col from functional_kudu.alltypes where
timestamp_col < cast('2009-01-01 00:08:00.28' as timestamp) and
timestamp_col > cast('2009-01-01 00:04:00.6' as timestamp)
order by id;
---- RESULTS
5,2009-01-01 00:05:00.100000000
6,2009-01-01 00:06:00.150000000
7,2009-01-01 00:07:00.210000000
---- TYPES
INT, TIMESTAMP
====
---- QUERY
select id, timestamp_col from functional_kudu.alltypes where
timestamp_col = cast('2009-01-01 00:08:00.28' as timestamp);
---- RESULTS
8,2009-01-01 00:08:00.280000000
---- TYPES
INT, TIMESTAMP
====
---- QUERY
# Push down TIMESTAMP IN list predicates
select id, timestamp_col from functional_kudu.alltypes where
timestamp_col in (cast('2010-03-01 00:00:00' as timestamp),
cast('2010-03-01 00:01:00' as timestamp))
order by id;
---- RESULTS
4240,2010-03-01 00:00:00
4241,2010-03-01 00:01:00
---- TYPES
INT, TIMESTAMP
====
---- QUERY
# Push down VARCHAR predicates
CREATE TABLE kudu_varchar_pred (key INT PRIMARY KEY, varchar_col VARCHAR(10))
PARTITION BY HASH (key) PARTITIONS 4 STORED AS KUDU;
INSERT INTO kudu_varchar_pred VALUES
(1, cast('a' as VARCHAR(10))),
(2, cast('b' as VARCHAR(10))),
(3, cast('m' as VARCHAR(10))),
(4, cast('y' as VARCHAR(10))),
(5, cast('z' as VARCHAR(10))),
(6, NULL);
select key, varchar_col from kudu_varchar_pred where
varchar_col >= cast('b' as VARCHAR(10)) and
varchar_col <= cast('y' as VARCHAR(10))
order by key;
---- RESULTS
2,'b'
3,'m'
4,'y'
---- TYPES
INT, STRING
====
---- QUERY
# Regression test for IMPALA-6187. Make sure count(*) queries with partition columns only
# won't miss conjuncts evaluation. 'id' is the partition column here.
select count(*) from functional_kudu.alltypes where rand() + id < 0.0;
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Create Kudu table with non unique primary key
create table non_unique_key_scan_tbl1 non unique primary key (id)
partition by range (id)
(partition value = 0, partition value = 1,
partition value = 2, partition value = 3,
partition value = 4, partition value = 5,
partition value = 6, partition value = 7)
stored as kudu
as select id, int_col from functional.alltypestiny;
---- RESULTS
'Inserted 8 row(s)'
====
---- QUERY
# auto-incrementing column is not shown for SELECT *
select * from non_unique_key_scan_tbl1 order by id asc;
---- RESULTS
0,0
1,1
2,0
3,1
4,0
5,1
6,0
7,1
---- TYPES
INT,INT
====
---- QUERY
# auto-incrementing column is shown when the column is specified in SELECT statement
select id, int_col, auto_incrementing_id from non_unique_key_scan_tbl1 order by id asc,
auto_incrementing_id desc;
---- RESULTS
0,0,1
1,1,1
2,0,1
3,1,1
4,0,1
5,1,1
6,0,1
7,1,1
---- TYPES
INT,INT,BIGINT
====
---- QUERY
# Query with auto-incrementing column in where clause
select id, int_col, auto_incrementing_id from non_unique_key_scan_tbl1
where auto_incrementing_id = 1 and id < 3
group by id, int_col, auto_incrementing_id;
---- RESULTS
0,0,1
1,1,1
2,0,1
---- TYPES
INT,INT,BIGINT
====
---- QUERY
# Create unpartitioned Kudu table with non unique primary key.
create table non_unique_key_scan_tbl2 non unique primary key (id)
stored as kudu
as select id, int_col from functional.alltypestiny order by id asc limit 100;
---- RESULTS
'Inserted 8 row(s)'
====
---- QUERY
# Query with auto-incrementing column in ORDER BY.
# All rows are added to one tablet-server so auto_incrementing_id shows insertion order.
select id, int_col, auto_incrementing_id from non_unique_key_scan_tbl2
order by auto_incrementing_id asc;
---- RESULTS
0,0,1
1,1,2
2,0,3
3,1,4
4,0,5
5,1,6
6,0,7
7,1,8
---- TYPES
INT,INT,BIGINT
====