mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
This patch adds table sampling functionalities for Iceberg tables. From now it's possible to execute SELECT and COMPUTE STATS statements with table sampling. Predicates in the WHERE clause affect the results of table sampling similarly to how legacy tables work (sampling is applied after static partition and file pruning). Sampling is repeatable via the REPEATABLE clause. Testing * planner tests * e2e tests for V1 and V2 tables Change-Id: I5de151747c0e9d9379a4051252175fccf42efd7d Reviewed-on: http://gerrit.cloudera.org:8080/18989 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
106 lines
2.6 KiB
Plaintext
106 lines
2.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
select * from iceberg_v2_no_deletes tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
1,'x'
|
|
2,'y'
|
|
3,'z'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_v2_no_deletes tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
3
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
5,'X'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_partitioned_position_deletes tablesample system(50) repeatable(1234);
|
|
---- RESULTS
|
|
18,'Alan','click',2020-01-01 10:00:00
|
|
12,'Alan','click',2020-01-01 10:00:00
|
|
10,'Alan','click',2020-01-01 10:00:00
|
|
6,'Alex','view',2020-01-01 09:00:00
|
|
20,'Alex','view',2020-01-01 09:00:00
|
|
4,'Alex','view',2020-01-01 09:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_partitioned_position_deletes tablesample system(50) repeatable(1234)
|
|
where action = 'click';
|
|
---- RESULTS
|
|
18,'Alan','click',2020-01-01 10:00:00
|
|
12,'Alan','click',2020-01-01 10:00:00
|
|
10,'Alan','click',2020-01-01 10:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_no_deletes_orc tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
1,'x'
|
|
2,'y'
|
|
3,'z'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_v2_no_deletes_orc tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
3
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_positional_not_all_data_files_have_delete_files_orc tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
5,'X'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(10) repeatable(1234)
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_partitioned_position_deletes_orc tablesample system(50) repeatable(1234);
|
|
---- RESULTS
|
|
20,'Alex','view',2020-01-01 09:00:00
|
|
6,'Alex','view',2020-01-01 09:00:00
|
|
4,'Alex','view',2020-01-01 09:00:00
|
|
12,'Alan','click',2020-01-01 10:00:00
|
|
18,'Alan','click',2020-01-01 10:00:00
|
|
10,'Alan','click',2020-01-01 10:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_v2_partitioned_position_deletes_orc tablesample system(50) repeatable(1234)
|
|
where action = 'click';
|
|
---- RESULTS
|
|
12,'Alan','click',2020-01-01 10:00:00
|
|
18,'Alan','click',2020-01-01 10:00:00
|
|
10,'Alan','click',2020-01-01 10:00:00
|
|
---- TYPES
|
|
INT, STRING, STRING, TIMESTAMP
|
|
====
|