Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test
Michael Smith 8b83726b8e IMPALA-9487: Add erasure coding policy to SHOW, DESCRIBE
Adds erasure coding policy to introspection commands SHOW FILES, SHOW
PARTITIONS, SHOW TABLE STATS, and DESCRIBE EXTENDED.

Remove `throws IOException` for methods that don't throw. Removes null
check for getSd because getStorageDescriptorInfo - which is called right
after getTableMetaDataInformation - uses it without checking for null.

Adds '$ERASURECODE_POLICY' for runtime test substitution. The test suite
replaces this with the current erasure code policy - from
HDFS_ERASURECODE_POLICY - or NONE to match expected output.

Testing:
- ran backend, end-to-end, and custom cluster tests with erasure coding
- ran backend, end-to-end, and custom cluster tests with exhaustive
  strategy

Change-Id: Idd95f2d18b3980581788c92993b6d2f53504b5e0
Reviewed-on: http://gerrit.cloudera.org:8080/19268
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Michael Smith <michael.smith@cloudera.com>
2022-12-14 22:37:14 +00:00

407 lines
11 KiB
Plaintext

====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes
order by id
limit 5;
---- RESULTS
: 5
====
---- QUERY
select * from iceberg_alltypes;
---- RESULTS
0,true,0,0,0,0,2009-01-01,'0',2009-01-01 00:00:00
1,false,1,10,1.100000023841858,10.1,2009-01-01,'1',2009-01-01 00:01:00
2,true,2,20,2.200000047683716,20.2,2009-01-01,'2',2009-01-01 00:02:00.100000000
3,false,3,30,3.299999952316284,30.3,2009-01-01,'3',2009-01-01 00:03:00.300000000
4,true,4,40,4.400000095367432,40.4,2009-01-01,'4',2009-01-01 00:04:00.600000000
---- TYPES
INT, BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Create table with decimal types
CREATE TABLE decimal_tbl (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,10),
d4 DECIMAL(38,38),
d5 DECIMAL(10,5),
d6 DECIMAL(9,0)
)
STORED AS iceberg
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into decimal_tbl select * from functional_parquet.decimal_tbl;
select * from decimal_tbl;
---- RESULTS
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Create non-Iceberg table with INT96 nanos.
create table int96_nanos (ts timestamp) stored as parquet;
====
---- QUERY
# Insert edge values as "normal" int96 timestamps that can represent all values.
set parquet_timestamp_type=INT96_NANOS;
insert into int96_nanos values
("1400-01-01"),
("2019-01-18 00:00:00.000000001"),
("2019-01-18 00:00:00.000001"),
("2019-01-18 00:00:00.001"),
("2019-01-18 23:59:59.999"),
("2019-01-18 23:59:59.999999"),
("2019-01-18 23:59:59.999999999")
====
---- QUERY
# Iceberg should write timestamps as INT64 micros.
create table ts_iceberg (ts timestamp) stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
insert into ts_iceberg select * from int96_nanos;
select * from ts_iceberg;
---- RESULTS
1400-01-01 00:00:00
2019-01-18 00:00:00
2019-01-18 00:00:00.000001000
2019-01-18 00:00:00.001000000
2019-01-18 23:59:59.999000000
2019-01-18 23:59:59.999999000
2019-01-18 23:59:59.999999000
====
---- QUERY
# Insert into hadoop catalog.
create table iceberg_hadoop_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test');
insert into iceberg_hadoop_cat values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hadoop_cat;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/$DATABASE/iceberg_hadoop_cat/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert into hadoop catalog with custom table identifier.
create table iceberg_hadoop_cat_ti (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test',
'iceberg.table_identifier'='test.custom_db.int_table');
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hadoop_cat_ti;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat_ti;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/test/custom_db/int_table/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert into table stored in Iceberg's HiveCatalog
create table iceberg_hive_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hive.catalog');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_hive_cat values (7);
select * from iceberg_hive_cat;
---- RESULTS
7
---- TYPES
INT
====
---- QUERY
# Query external Iceberg table
create external table iceberg_hive_cat_ext (i int)
stored as iceberg
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog',
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
---- RESULTS
'Table has been created.'
====
---- QUERY
select * from iceberg_hive_cat_ext;
---- RESULTS
7
---- TYPES
INT
====
---- QUERY
# INSET INTO external Iceberg table stored in HiveCatalog.
insert into iceberg_hive_cat_ext values (8);
select * from iceberg_hive_cat_ext;
---- RESULTS
7
8
---- TYPES
INT
====
---- QUERY
# Query original table
refresh iceberg_hive_cat;
select * from iceberg_hive_cat;
---- RESULTS
7
8
---- TYPES
INT
====
---- QUERY
# DROP external Iceberg table
drop table iceberg_hive_cat_ext
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Original table is not affected after external table drop.
refresh iceberg_hive_cat;
select * from iceberg_hive_cat;
---- RESULTS
7
8
---- TYPES
INT
====
---- QUERY
# Create another external Iceberg table
create external table iceberg_hive_cat_ext_2 (i int)
stored as iceberg
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog',
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
select * from iceberg_hive_cat_ext_2
---- RESULTS
7
8
====
---- QUERY
# DROP the synchronized Iceberg table (data is purged).
drop table iceberg_hive_cat
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# The data has been purged, so querying the external table fails.
refresh iceberg_hive_cat_ext_2;
select * from iceberg_hive_cat_ext_2
---- CATCH
Table does not exist
====
---- QUERY
# Insert into hive catalog with custom location.
create table iceberg_hive_cat_custom_loc (i int)
stored as iceberg
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog');
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hive_cat_custom_loc;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hive_cat_custom_loc;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/custom_hive_cat/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes_parq_tblprop(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('write.format.default'='parquet',
'write.parquet.row-group-size-bytes'='8388608',
'write.parquet.compression-codec'='gzip',
'write.parquet.page-size-bytes'='65536',
'write.parquet.dict-size-bytes'='1073741824'
);
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='536870912',
'write.parquet.compression-codec'='none',
'write.parquet.page-size-bytes'='134217728',
'write.parquet.dict-size-bytes'='805306368');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1073741824',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='1',
'write.parquet.page-size-bytes'='402653184',
'write.parquet.dict-size-bytes'='536870912');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1610612736',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='13',
'write.parquet.page-size-bytes'='536870912',
'write.parquet.dict-size-bytes'='402653184');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1879048192',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='18',
'write.parquet.page-size-bytes'='805306368',
'write.parquet.dict-size-bytes'='134217728');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='2146435072',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='22',
'write.parquet.page-size-bytes'='1073741824',
'write.parquet.dict-size-bytes'='65536');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop unset tblproperties (
'write.parquet.row-group-size-bytes',
'write.parquet.compression-codec',
'write.parquet.compression-level',
'write.parquet.page-size-bytes',
'write.parquet.dict-size-bytes');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
select count(*) from iceberg_alltypes_parq_tblprop;
---- RESULTS
51100
---- TYPES
BIGINT
====