Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test
Zoltan Borok-Nagy a9aaeaa489 IMPALA-12409: Don't allow EXTERNAL Iceberg tables to point another Iceberg table in Hive catalog
This patch forbids creating an EXTERNAL Iceberg table that points
to another Iceberg table in the Hive Catalog. I.e. the following should
be forbidden:

  CREATE EXTERNAL TABLE ice_ext
  STORED BY ICEBERG
  TBLPROPERTIES ('iceberg.table_identifier'='db.tbl');

Loading such tables should also raise an error. Users need to query
the original Iceberg tables. Alternatively they can create VIEWs if
they want to query tables with a different name.

Testing:
 * added e2e tests for CREATE EXTERNAL TABLE
 * added e2e test about loading such table

Change-Id: Ifb0d7f0e7ec40fba356bd58b43f68d070432de71
Reviewed-on: http://gerrit.cloudera.org:8080/20429
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2023-08-31 17:35:30 +00:00

335 lines
9.7 KiB
Plaintext

====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes
order by id
limit 5;
---- RESULTS
: 5
====
---- QUERY
select * from iceberg_alltypes;
---- RESULTS
0,true,0,0,0,0,2009-01-01,'0',2009-01-01 00:00:00
1,false,1,10,1.100000023841858,10.1,2009-01-01,'1',2009-01-01 00:01:00
2,true,2,20,2.200000047683716,20.2,2009-01-01,'2',2009-01-01 00:02:00.100000000
3,false,3,30,3.299999952316284,30.3,2009-01-01,'3',2009-01-01 00:03:00.300000000
4,true,4,40,4.400000095367432,40.4,2009-01-01,'4',2009-01-01 00:04:00.600000000
---- TYPES
INT, BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Create table with decimal types
CREATE TABLE decimal_tbl (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,10),
d4 DECIMAL(38,38),
d5 DECIMAL(10,5),
d6 DECIMAL(9,0)
)
STORED AS iceberg
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into decimal_tbl select * from functional_parquet.decimal_tbl;
select * from decimal_tbl;
---- RESULTS
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Create non-Iceberg table with INT96 nanos.
create table int96_nanos (ts timestamp) stored as parquet;
====
---- QUERY
# Insert edge values as "normal" int96 timestamps that can represent all values.
set parquet_timestamp_type=INT96_NANOS;
insert into int96_nanos values
("1400-01-01"),
("2019-01-18 00:00:00.000000001"),
("2019-01-18 00:00:00.000001"),
("2019-01-18 00:00:00.001"),
("2019-01-18 23:59:59.999"),
("2019-01-18 23:59:59.999999"),
("2019-01-18 23:59:59.999999999")
====
---- QUERY
# Iceberg should write timestamps as INT64 micros.
create table ts_iceberg (ts timestamp) stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
insert into ts_iceberg select * from int96_nanos;
select * from ts_iceberg;
---- RESULTS
1400-01-01 00:00:00
2019-01-18 00:00:00
2019-01-18 00:00:00.000001000
2019-01-18 00:00:00.001000000
2019-01-18 23:59:59.999000000
2019-01-18 23:59:59.999999000
2019-01-18 23:59:59.999999000
====
---- QUERY
# Insert into hadoop catalog.
create table iceberg_hadoop_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test');
insert into iceberg_hadoop_cat values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hadoop_cat;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/$DATABASE/iceberg_hadoop_cat/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert into hadoop catalog with custom table identifier.
create table iceberg_hadoop_cat_ti (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test',
'iceberg.table_identifier'='test.custom_db.int_table');
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hadoop_cat_ti;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat_ti;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/test/custom_db/int_table/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert into table stored in Iceberg's HiveCatalog
create table iceberg_hive_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hive.catalog');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_hive_cat values (7);
select * from iceberg_hive_cat;
---- RESULTS
7
---- TYPES
INT
====
---- QUERY
# DROP the synchronized Iceberg table (data is purged).
drop table iceberg_hive_cat
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Insert into hive catalog with custom location.
create table iceberg_hive_cat_custom_loc (i int)
stored as iceberg
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog');
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hive_cat_custom_loc;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hive_cat_custom_loc;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/custom_hive_cat/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes_parq_tblprop(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('write.format.default'='parquet',
'write.parquet.row-group-size-bytes'='8388608',
'write.parquet.compression-codec'='gzip',
'write.parquet.page-size-bytes'='65536',
'write.parquet.dict-size-bytes'='1073741824'
);
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='536870912',
'write.parquet.compression-codec'='none',
'write.parquet.page-size-bytes'='134217728',
'write.parquet.dict-size-bytes'='805306368');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1073741824',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='1',
'write.parquet.page-size-bytes'='402653184',
'write.parquet.dict-size-bytes'='536870912');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1610612736',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='13',
'write.parquet.page-size-bytes'='536870912',
'write.parquet.dict-size-bytes'='402653184');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1879048192',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='18',
'write.parquet.page-size-bytes'='805306368',
'write.parquet.dict-size-bytes'='134217728');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='2146435072',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='22',
'write.parquet.page-size-bytes'='1073741824',
'write.parquet.dict-size-bytes'='65536');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop unset tblproperties (
'write.parquet.row-group-size-bytes',
'write.parquet.compression-codec',
'write.parquet.compression-level',
'write.parquet.page-size-bytes',
'write.parquet.dict-size-bytes');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RESULTS
: 7300
====
---- QUERY
select count(*) from iceberg_alltypes_parq_tblprop;
---- RESULTS
51100
---- TYPES
BIGINT
====