Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test
LPL 78609dca32 IMPALA-11256: Fix SHOW FILES on Iceberg tables lists all files
SHOW FILES on Iceberg tables lists all files in table directory. Even
deleted files and metadata files. We should only shows the current data
files.

Testing:
  - existing tests

Change-Id: If07c2fd6e05e494f7240ccc147b8776a8f217179
Reviewed-on: http://gerrit.cloudera.org:8080/18455
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-04-28 17:48:23 +00:00

106 lines
3.4 KiB
Plaintext

====
---- QUERY
CREATE TABLE value_tbl (t TINYINT, i INT, b BIGINT, s STRING, ts TIMESTAMP, d date);
insert into value_tbl values (0, 1, 2, 'impala', '2021-02-26 16:16:59', '2021-02-26');
====
---- QUERY
# Test non-partitioned table in Iceberg HiveCatalog.
CREATE TABLE ice_ctas STORED AS ICEBERG AS SELECT i, b FROM value_tbl;
SELECT * FROM ice_ctas;
---- RESULTS
1,2
---- TYPES
INT,BIGINT
====
---- QUERY
show files in ice_ctas;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_ctas/data/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Test partitioned table in Iceberg HiveCatalog.
# Use old PARTITIONED BY syntax.
CREATE TABLE ice_ctas_part PARTITIONED BY(d) STORED AS ICEBERG AS SELECT s, ts, d FROM value_tbl;
SELECT * FROM ice_ctas_part where d='2021-02-26';
---- RESULTS
'impala',2021-02-26 16:16:59,2021-02-26
---- TYPES
STRING,TIMESTAMP,DATE
====
---- QUERY
show files in ice_ctas_part;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_ctas_part/data/d=2021-02-26/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# INSERT an additional row to a different partition.
INSERT INTO ice_ctas_part VALUES ('fox','2021-02-27 16:16:59','2021-02-27');
SELECT * FROM ice_ctas_part;
---- RESULTS
'impala',2021-02-26 16:16:59,2021-02-26
'fox',2021-02-27 16:16:59,2021-02-27
---- TYPES
STRING,TIMESTAMP,DATE
====
---- QUERY
show files in ice_ctas_part;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_ctas_part/data/d=2021-02-26/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_ctas_part/data/d=2021-02-27/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Test CTAS in Iceberg HadoopTables catalog.
# Set table location to custom location.
# Use PARTITIONED BY SPEC
CREATE TABLE ice_ctas_hadoop_tables_part PARTITIONED BY SPEC (month(d))
STORED AS ICEBERG
LOCATION '/test-warehouse/$DATABASE.db/loc_test'
TBLPROPERTIES ('iceberg.catalog'='hadoop.tables') AS SELECT s, ts, d FROM value_tbl;
SELECT * FROM ice_ctas_hadoop_tables_part where d='2021-02-26';
---- RESULTS
'impala',2021-02-26 16:16:59,2021-02-26
---- TYPES
STRING,TIMESTAMP,DATE
====
---- QUERY
show files in ice_ctas_hadoop_tables_part;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/loc_test/data/d_month=2021-02/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Test CTAS in Iceberg HadoopCatalog catalog.
# Set 'iceberg.catalog_location' and 'iceberg.table_identifier'
# Partitioned by TRUNCATE
# Cast TINYINT to INT.
# INSERT additional row.
CREATE TABLE ice_ctas_hadoop_catalog_part PARTITIONED BY SPEC (truncate(3, s))
STORED AS ICEBERG
TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/cat_loc',
'iceberg.table_identifier'='ns1.ns2.ctas')
AS SELECT cast(t as INT), s, d FROM value_tbl;
INSERT INTO ice_ctas_hadoop_catalog_part VALUES (1, 'lion', '2021-02-27');
SELECT * FROM ice_ctas_hadoop_catalog_part;
---- RESULTS
0,'impala',2021-02-26
1,'lion',2021-02-27
---- TYPES
INT,STRING,DATE
====
---- QUERY
show files in ice_ctas_hadoop_catalog_part;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/ns1/ns2/ctas/data/s_trunc=imp/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/cat_loc/ns1/ns2/ctas/data/s_trunc=lio/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====