mirror of
https://github.com/apache/impala.git
synced 2026-01-09 15:00:11 -05:00
Reverts support for o3fs as a default filesystem added in IMPALA-9442. Updates test setup to use ofs instead. Munges absolute paths in Iceberg metadata to match the new location required for ofs. Ozone has strict requirements on volume and bucket names, so all tables must be created within a bucket (e.g. inside /impala/test-warehouse/). Change-Id: I45e90d30b2e68876dec0db3c43ac15ee510b17bd Reviewed-on: http://gerrit.cloudera.org:8080/19001 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
407 lines
11 KiB
Plaintext
407 lines
11 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create a table that is a subset of 'alltypes' table, i.e. it only
|
|
# contains the data types supported by Iceberg.
|
|
create table iceberg_alltypes(
|
|
id INT COMMENT 'Add a comment',
|
|
bool_col BOOLEAN,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_col DATE,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP
|
|
)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes
|
|
order by id
|
|
limit 5;
|
|
---- RESULTS
|
|
: 5
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_alltypes;
|
|
---- RESULTS
|
|
0,true,0,0,0,0,2009-01-01,'0',2009-01-01 00:00:00
|
|
1,false,1,10,1.100000023841858,10.1,2009-01-01,'1',2009-01-01 00:01:00
|
|
2,true,2,20,2.200000047683716,20.2,2009-01-01,'2',2009-01-01 00:02:00.100000000
|
|
3,false,3,30,3.299999952316284,30.3,2009-01-01,'3',2009-01-01 00:03:00.300000000
|
|
4,true,4,40,4.400000095367432,40.4,2009-01-01,'4',2009-01-01 00:04:00.600000000
|
|
---- TYPES
|
|
INT, BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Create table with decimal types
|
|
CREATE TABLE decimal_tbl (
|
|
d1 DECIMAL(9,0),
|
|
d2 DECIMAL(10,0),
|
|
d3 DECIMAL(20,10),
|
|
d4 DECIMAL(38,38),
|
|
d5 DECIMAL(10,5),
|
|
d6 DECIMAL(9,0)
|
|
)
|
|
STORED AS iceberg
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into decimal_tbl select * from functional_parquet.decimal_tbl;
|
|
select * from decimal_tbl;
|
|
---- RESULTS
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Create non-Iceberg table with INT96 nanos.
|
|
create table int96_nanos (ts timestamp) stored as parquet;
|
|
====
|
|
---- QUERY
|
|
# Insert edge values as "normal" int96 timestamps that can represent all values.
|
|
set parquet_timestamp_type=INT96_NANOS;
|
|
insert into int96_nanos values
|
|
("1400-01-01"),
|
|
("2019-01-18 00:00:00.000000001"),
|
|
("2019-01-18 00:00:00.000001"),
|
|
("2019-01-18 00:00:00.001"),
|
|
("2019-01-18 23:59:59.999"),
|
|
("2019-01-18 23:59:59.999999"),
|
|
("2019-01-18 23:59:59.999999999")
|
|
====
|
|
---- QUERY
|
|
# Iceberg should write timestamps as INT64 micros.
|
|
create table ts_iceberg (ts timestamp) stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.tables');
|
|
insert into ts_iceberg select * from int96_nanos;
|
|
select * from ts_iceberg;
|
|
---- RESULTS
|
|
1400-01-01 00:00:00
|
|
2019-01-18 00:00:00
|
|
2019-01-18 00:00:00.000001000
|
|
2019-01-18 00:00:00.001000000
|
|
2019-01-18 23:59:59.999000000
|
|
2019-01-18 23:59:59.999999000
|
|
2019-01-18 23:59:59.999999000
|
|
====
|
|
---- QUERY
|
|
# Insert into hadoop catalog.
|
|
create table iceberg_hadoop_cat (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test');
|
|
insert into iceberg_hadoop_cat values (1), (2), (3);
|
|
---- RESULTS
|
|
: 3
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hadoop_cat;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in iceberg_hadoop_cat;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/$DATABASE/iceberg_hadoop_cat/data/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Insert into hadoop catalog with custom table identifier.
|
|
create table iceberg_hadoop_cat_ti (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test',
|
|
'iceberg.table_identifier'='test.custom_db.int_table');
|
|
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
|
|
---- RESULTS
|
|
: 3
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hadoop_cat_ti;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in iceberg_hadoop_cat_ti;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/test/custom_db/int_table/data/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Insert into table stored in Iceberg's HiveCatalog
|
|
create table iceberg_hive_cat (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hive.catalog');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_hive_cat values (7);
|
|
select * from iceberg_hive_cat;
|
|
---- RESULTS
|
|
7
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Query external Iceberg table
|
|
create external table iceberg_hive_cat_ext (i int)
|
|
stored as iceberg
|
|
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
|
tblproperties('iceberg.catalog'='hive.catalog',
|
|
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hive_cat_ext;
|
|
---- RESULTS
|
|
7
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# INSET INTO external Iceberg table stored in HiveCatalog.
|
|
insert into iceberg_hive_cat_ext values (8);
|
|
select * from iceberg_hive_cat_ext;
|
|
---- RESULTS
|
|
7
|
|
8
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Query original table
|
|
refresh iceberg_hive_cat;
|
|
select * from iceberg_hive_cat;
|
|
---- RESULTS
|
|
7
|
|
8
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# DROP external Iceberg table
|
|
drop table iceberg_hive_cat_ext
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Original table is not affected after external table drop.
|
|
refresh iceberg_hive_cat;
|
|
select * from iceberg_hive_cat;
|
|
---- RESULTS
|
|
7
|
|
8
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Create another external Iceberg table
|
|
create external table iceberg_hive_cat_ext_2 (i int)
|
|
stored as iceberg
|
|
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
|
|
tblproperties('iceberg.catalog'='hive.catalog',
|
|
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
|
|
select * from iceberg_hive_cat_ext_2
|
|
---- RESULTS
|
|
7
|
|
8
|
|
====
|
|
---- QUERY
|
|
# DROP the synchronized Iceberg table (data is purged).
|
|
drop table iceberg_hive_cat
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# The data has been purged, so querying the external table fails.
|
|
refresh iceberg_hive_cat_ext_2;
|
|
select * from iceberg_hive_cat_ext_2
|
|
---- CATCH
|
|
Table does not exist
|
|
====
|
|
---- QUERY
|
|
# Insert into hive catalog with custom location.
|
|
create table iceberg_hive_cat_custom_loc (i int)
|
|
stored as iceberg
|
|
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat'
|
|
tblproperties('iceberg.catalog'='hive.catalog');
|
|
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
|
|
---- RESULTS
|
|
: 3
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_hive_cat_custom_loc;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in iceberg_hive_cat_custom_loc;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/custom_hive_cat/data/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Create a table that is a subset of 'alltypes' table, i.e. it only
|
|
# contains the data types supported by Iceberg.
|
|
create table iceberg_alltypes_parq_tblprop(
|
|
id INT COMMENT 'Add a comment',
|
|
bool_col BOOLEAN,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_col DATE,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP
|
|
)
|
|
stored as iceberg
|
|
tblproperties('write.format.default'='parquet',
|
|
'write.parquet.row-group-size-bytes'='8388608',
|
|
'write.parquet.compression-codec'='gzip',
|
|
'write.parquet.page-size-bytes'='65536',
|
|
'write.parquet.dict-size-bytes'='1073741824'
|
|
);
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='536870912',
|
|
'write.parquet.compression-codec'='none',
|
|
'write.parquet.page-size-bytes'='134217728',
|
|
'write.parquet.dict-size-bytes'='805306368');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='1073741824',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='1',
|
|
'write.parquet.page-size-bytes'='402653184',
|
|
'write.parquet.dict-size-bytes'='536870912');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='1610612736',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='13',
|
|
'write.parquet.page-size-bytes'='536870912',
|
|
'write.parquet.dict-size-bytes'='402653184');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='1879048192',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='18',
|
|
'write.parquet.page-size-bytes'='805306368',
|
|
'write.parquet.dict-size-bytes'='134217728');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop set tblproperties (
|
|
'write.parquet.row-group-size-bytes'='2146435072',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='22',
|
|
'write.parquet.page-size-bytes'='1073741824',
|
|
'write.parquet.dict-size-bytes'='65536');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_alltypes_parq_tblprop unset tblproperties (
|
|
'write.parquet.row-group-size-bytes',
|
|
'write.parquet.compression-codec',
|
|
'write.parquet.compression-level',
|
|
'write.parquet.page-size-bytes',
|
|
'write.parquet.dict-size-bytes');
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_alltypes_parq_tblprop
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypes;
|
|
---- RESULTS
|
|
: 7300
|
|
====
|
|
---- QUERY
|
|
select count(*) from iceberg_alltypes_parq_tblprop;
|
|
---- RESULTS
|
|
51100
|
|
---- TYPES
|
|
BIGINT
|
|
====
|