Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-tablesample-v2.test
Zoltan Borok-Nagy b91aa06537 IMPALA-11582: Implement table sampling for Iceberg tables
This patch adds table sampling functionalities for Iceberg tables.
From now it's possible to execute SELECT and COMPUTE STATS statements
with table sampling.

Predicates in the WHERE clause affect the results of table sampling
similarly to how legacy tables work (sampling is applied after static
partition and file pruning).

Sampling is repeatable via the REPEATABLE clause.

Testing
 * planner tests
 * e2e tests for V1 and V2 tables

Change-Id: I5de151747c0e9d9379a4051252175fccf42efd7d
Reviewed-on: http://gerrit.cloudera.org:8080/18989
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-09-26 15:49:22 +00:00

106 lines
2.6 KiB
Plaintext

====
---- QUERY
select * from iceberg_v2_no_deletes tablesample system(10) repeatable(1234)
---- RESULTS
1,'x'
2,'y'
3,'z'
---- TYPES
INT, STRING
====
---- QUERY
select count(*) from iceberg_v2_no_deletes tablesample system(10) repeatable(1234)
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select * from iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(10) repeatable(1234)
---- RESULTS
5,'X'
---- TYPES
INT, STRING
====
---- QUERY
select count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(10) repeatable(1234)
---- RESULTS
1
---- TYPES
BIGINT
====
---- QUERY
select * from iceberg_v2_partitioned_position_deletes tablesample system(50) repeatable(1234);
---- RESULTS
18,'Alan','click',2020-01-01 10:00:00
12,'Alan','click',2020-01-01 10:00:00
10,'Alan','click',2020-01-01 10:00:00
6,'Alex','view',2020-01-01 09:00:00
20,'Alex','view',2020-01-01 09:00:00
4,'Alex','view',2020-01-01 09:00:00
---- TYPES
INT, STRING, STRING, TIMESTAMP
====
---- QUERY
select * from iceberg_v2_partitioned_position_deletes tablesample system(50) repeatable(1234)
where action = 'click';
---- RESULTS
18,'Alan','click',2020-01-01 10:00:00
12,'Alan','click',2020-01-01 10:00:00
10,'Alan','click',2020-01-01 10:00:00
---- TYPES
INT, STRING, STRING, TIMESTAMP
====
---- QUERY
select * from iceberg_v2_no_deletes_orc tablesample system(10) repeatable(1234)
---- RESULTS
1,'x'
2,'y'
3,'z'
---- TYPES
INT, STRING
====
---- QUERY
select count(*) from iceberg_v2_no_deletes_orc tablesample system(10) repeatable(1234)
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select * from iceberg_v2_positional_not_all_data_files_have_delete_files_orc tablesample system(10) repeatable(1234)
---- RESULTS
5,'X'
---- TYPES
INT, STRING
====
---- QUERY
select count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files tablesample system(10) repeatable(1234)
---- RESULTS
1
---- TYPES
BIGINT
====
---- QUERY
select * from iceberg_v2_partitioned_position_deletes_orc tablesample system(50) repeatable(1234);
---- RESULTS
20,'Alex','view',2020-01-01 09:00:00
6,'Alex','view',2020-01-01 09:00:00
4,'Alex','view',2020-01-01 09:00:00
12,'Alan','click',2020-01-01 10:00:00
18,'Alan','click',2020-01-01 10:00:00
10,'Alan','click',2020-01-01 10:00:00
---- TYPES
INT, STRING, STRING, TIMESTAMP
====
---- QUERY
select * from iceberg_v2_partitioned_position_deletes_orc tablesample system(50) repeatable(1234)
where action = 'click';
---- RESULTS
12,'Alan','click',2020-01-01 10:00:00
18,'Alan','click',2020-01-01 10:00:00
10,'Alan','click',2020-01-01 10:00:00
---- TYPES
INT, STRING, STRING, TIMESTAMP
====