mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
This patch adds table sampling functionalities for Iceberg tables. From now it's possible to execute SELECT and COMPUTE STATS statements with table sampling. Predicates in the WHERE clause affect the results of table sampling similarly to how legacy tables work (sampling is applied after static partition and file pruning). Sampling is repeatable via the REPEATABLE clause. Testing * planner tests * e2e tests for V1 and V2 tables Change-Id: I5de151747c0e9d9379a4051252175fccf42efd7d Reviewed-on: http://gerrit.cloudera.org:8080/18989 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
44 lines
1.0 KiB
Plaintext
44 lines
1.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
set TIMEZONE='Europe/Budapest';
|
|
select * from iceberg_non_partitioned tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
20,'Alex','view',2020-01-01 09:00:00
|
|
9,'Alan','click',2020-01-01 10:00:00
|
|
6,'Alex','view',2020-01-01 09:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_non_partitioned tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
3
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
set TIMEZONE='Europe/Budapest';
|
|
select * from iceberg_partitioned tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
16,'Lisa','download',2020-01-01 11:00:00
|
|
3,'Alan','click',2020-01-01 10:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_partitioned tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
set TIMEZONE='Europe/Budapest';
|
|
select * from iceberg_partitioned tablesample system(10) repeatable(1234)
|
|
where action = 'click';
|
|
---- RESULTS
|
|
10,'Alan','click',2020-01-01 10:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|