mirror of
https://github.com/apache/impala.git
synced 2026-01-24 06:00:49 -05:00
SHOW FILES on Iceberg tables lists all files in table directory. Even deleted files and metadata files. We should only shows the current data files. Testing: - existing tests Change-Id: If07c2fd6e05e494f7240ccc147b8776a8f217179 Reviewed-on: http://gerrit.cloudera.org:8080/18455 Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
407 lines
11 KiB
Plaintext
407 lines
11 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create a table that is a subset of 'alltypes' table, i.e. it only
|
|
# contains the data types supported by Iceberg.
|
|
create table iceberg_alltypes(
|
|
id INT COMMENT 'Add a comment',
|
|
bool_col BOOLEAN,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_col DATE,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP
|
|
)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes
|
|
order by id
|
|
limit 5;
|
|
---- RESULTS
|
|
: 5
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_alltypes;
|
|
---- RESULTS
|
|
0,true,0,0,0,0,2009-01-01,'0',2009-01-01 00:00:00
|
|
1,false,1,10,1.100000023841858,10.1,2009-01-01,'1',2009-01-01 00:01:00
|
|
2,true,2,20,2.200000047683716,20.2,2009-01-01,'2',2009-01-01 00:02:00.100000000
|
|
3,false,3,30,3.299999952316284,30.3,2009-01-01,'3',2009-01-01 00:03:00.300000000
|
|
4,true,4,40,4.400000095367432,40.4,2009-01-01,'4',2009-01-01 00:04:00.600000000
|
|
---- TYPES
|
|
INT, BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Create table with decimal types
|
|
CREATE TABLE decimal_tbl (
|
|
d1 DECIMAL(9,0),
|
|
d2 DECIMAL(10,0),
|
|
d3 DECIMAL(20,10),
|
|
d4 DECIMAL(38,38),
|
|
d5 DECIMAL(10,5),
|
|
d6 DECIMAL(9,0)
|
|
)
|
|
STORED AS iceberg
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into decimal_tbl select * from functional_parquet.decimal_tbl;
|
|
select * from decimal_tbl;
|
|
---- RESULTS
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Create non-Iceberg table with INT96 nanos.
|
|
create table int96_nanos (ts timestamp) stored as parquet;
|
|
====
|
|
---- QUERY
|
|
# Insert edge values as "normal" int96 timestamps that can represent all values.
|
|
set parquet_timestamp_type=INT96_NANOS;
|
|
insert into int96_nanos values
|
|
("1400-01-01"),
|
|
("2019-01-18 00:00:00.000000001"),
|
|
("2019-01-18 00:00:00.000001"),
|
|
("2019-01-18 00:00:00.001"),
|
|
("2019-01-18 23:59:59.999"),
|
|
("2019-01-18 23:59:59.999999"),
|
|
("2019-01-18 23:59:59.999999999")
|
|
====
|
|
---- QUERY
|
|
# Iceberg should write timestamps as INT64 micros.
|
|
create table ts_iceberg (ts timestamp) stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.tables');
|
|
insert into ts_iceberg select * from int96_nanos;
|
|
select * from ts_iceberg;
|
|
---- RESULTS
|
|
1400-01-01 00:00:00
|
|
2019-01-18 00:00:00
|
|
2019-01-18 00:00:00.000001000
|
|
2019-01-18 00:00:00.001000000
|
|
2019-01-18 23:59:59.999000000
|
|
2019-01-18 23:59:59.999999000
|
|
2019-01-18 23:59:59.999999000
|
|
====
|
|
---- QUERY
|
|
# Insert into hadoop catalog.
|
|
create table iceberg_hadoop_cat (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test');
|
|
insert into iceberg_hadoop_cat values (1), (2), (3);
|
|
---- RESULTS
|
|
: 3
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hadoop_cat;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in iceberg_hadoop_cat;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/$DATABASE/iceberg_hadoop_cat/data/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Insert into hadoop catalog with custom table identifier.
|
|
create table iceberg_hadoop_cat_ti (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test',
|
|
'iceberg.table_identifier'='test.custom_db.int_table');
|
|
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
|
|
---- RESULTS
|
|
: 3
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hadoop_cat_ti;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in iceberg_hadoop_cat_ti;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/test/custom_db/int_table/data/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Insert into table stored in Iceberg's HiveCatalog
|
|
create table iceberg_hive_cat (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hive.catalog');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_hive_cat values (7);
|
|
select * from iceberg_hive_cat;
|
|
---- RESULTS
|
|
7
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Query external Iceberg table
|
|
create external table iceberg_hive_cat_ext (i int)
|
|
stored as iceberg
|
|
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
|
tblproperties('iceberg.catalog'='hive.catalog',
|
|
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hive_cat_ext;
|
|
---- RESULTS
|
|
7
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# INSET INTO external Iceberg table stored in HiveCatalog.
|
|
insert into iceberg_hive_cat_ext values (8);
|
|
select * from iceberg_hive_cat_ext;
|
|
---- RESULTS
|
|
7
|
|
8
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Query original table
|
|
refresh iceberg_hive_cat;
|
|
select * from iceberg_hive_cat;
|
|
---- RESULTS
|
|
7
|
|
8
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# DROP external Iceberg table
|
|
drop table iceberg_hive_cat_ext
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Original table is not affected after external table drop.
|
|
refresh iceberg_hive_cat;
|
|
select * from iceberg_hive_cat;
|
|
---- RESULTS
|
|
7
|
|
8
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Create another external Iceberg table
|
|
create external table iceberg_hive_cat_ext_2 (i int)
|
|
stored as iceberg
|
|
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
|
tblproperties('iceberg.catalog'='hive.catalog',
|
|
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
|
|
select * from iceberg_hive_cat_ext_2
|
|
---- RESULTS
|
|
7
|
|
8
|
|
====
|
|
---- QUERY
|
|
# DROP the synchronized Iceberg table (data is purged).
|
|
drop table iceberg_hive_cat
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# The data has been purged, so querying the external table fails.
|
|
refresh iceberg_hive_cat_ext_2;
|
|
select * from iceberg_hive_cat_ext_2
|
|
---- CATCH
|
|
Table does not exist
|
|
====
|
|
---- QUERY
|
|
# Insert into hive catalog with custom location.
|
|
create table iceberg_hive_cat_custom_loc (i int)
|
|
stored as iceberg
|
|
location '/test-warehouse/$DATABASE.db/custom_hive_cat'
|
|
tblproperties('iceberg.catalog'='hive.catalog');
|
|
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
|
|
---- RESULTS
|
|
: 3
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hive_cat_custom_loc;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in iceberg_hive_cat_custom_loc;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/custom_hive_cat/data/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Create a table that is a subset of 'alltypes' table, i.e. it only
|
|
# contains the data types supported by Iceberg.
|
|
create table iceberg_alltypes_parq_tblprop(
|
|
id INT COMMENT 'Add a comment',
|
|
bool_col BOOLEAN,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_col DATE,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP
|
|
)
|
|
stored as iceberg
|
|
tblproperties('write.format.default'='parquet',
|
|
'write.parquet.row-group-size-bytes'='8388608',
|
|
'write.parquet.compression-codec'='gzip',
|
|
'write.parquet.page-size-bytes'='65536',
|
|
'write.parquet.dict-size-bytes'='1073741824'
|
|
);
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='536870912',
|
|
'write.parquet.compression-codec'='none',
|
|
'write.parquet.page-size-bytes'='134217728',
|
|
'write.parquet.dict-size-bytes'='805306368');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='1073741824',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='1',
|
|
'write.parquet.page-size-bytes'='402653184',
|
|
'write.parquet.dict-size-bytes'='536870912');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='1610612736',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='13',
|
|
'write.parquet.page-size-bytes'='536870912',
|
|
'write.parquet.dict-size-bytes'='402653184');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='1879048192',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='18',
|
|
'write.parquet.page-size-bytes'='805306368',
|
|
'write.parquet.dict-size-bytes'='134217728');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='2146435072',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='22',
|
|
'write.parquet.page-size-bytes'='1073741824',
|
|
'write.parquet.dict-size-bytes'='65536');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop unset tblproperties (
|
|
'write.parquet.row-group-size-bytes',
|
|
'write.parquet.compression-codec',
|
|
'write.parquet.compression-level',
|
|
'write.parquet.page-size-bytes',
|
|
'write.parquet.dict-size-bytes');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_alltypes_parq_tblprop;
|
|
---- RESULTS
|
|
51100
|
|
---- TYPES
|
|
BIGINT
|
|
====
|