From a9aaeaa4896ce025bd9fff2fd60f230a1c1e3733 Mon Sep 17 00:00:00 2001 From: Zoltan Borok-Nagy Date: Mon, 28 Aug 2023 19:38:03 +0200 Subject: [PATCH] IMPALA-12409: Don't allow EXTERNAL Iceberg tables to point another Iceberg table in Hive catalog This patch forbids creating an EXTERNAL Iceberg table that points to another Iceberg table in the Hive Catalog. I.e. the following should be forbidden: CREATE EXTERNAL TABLE ice_ext STORED BY ICEBERG TBLPROPERTIES ('iceberg.table_identifier'='db.tbl'); Loading such tables should also raise an error. Users need to query the original Iceberg tables. Alternatively they can create VIEWs if they want to query tables with a different name. Testing: * added e2e tests for CREATE EXTERNAL TABLE * added e2e test about loading such table Change-Id: Ifb0d7f0e7ec40fba356bd58b43f68d070432de71 Reviewed-on: http://gerrit.cloudera.org:8080/20429 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- .../analysis/AlterTableSetTblProperties.java | 4 ++ .../impala/analysis/CreateTableStmt.java | 17 ++--- .../apache/impala/catalog/IcebergTable.java | 29 ++++++++ .../queries/QueryTest/iceberg-catalogs.test | 38 ---------- .../queries/QueryTest/iceberg-insert.test | 72 ------------------- .../queries/QueryTest/iceberg-negative.test | 60 +++++++++++++++- tests/query_test/test_iceberg.py | 27 +++++++ 7 files changed, 123 insertions(+), 124 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java index 85ed35108..46d6d271e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java @@ -25,6 +25,8 @@ import org.apache.avro.SchemaParseException; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.iceberg.DataFile; +import org.apache.iceberg.mr.Catalogs; +import org.apache.iceberg.mr.InputFormatConfig; import org.apache.impala.authorization.AuthorizationConfig; import org.apache.impala.catalog.FeFsTable; import org.apache.impala.catalog.FeHBaseTable; @@ -155,6 +157,8 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt { icebergPropertyCheck(IcebergTable.ICEBERG_CATALOG); icebergPropertyCheck(IcebergTable.ICEBERG_CATALOG_LOCATION); icebergPropertyCheck(IcebergTable.ICEBERG_TABLE_IDENTIFIER); + icebergPropertyCheck(Catalogs.NAME); + icebergPropertyCheck(InputFormatConfig.TABLE_IDENTIFIER); icebergPropertyCheck(IcebergTable.METADATA_LOCATION); if (tblProperties_.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) { icebergTableFormatCheck(tblProperties_.get(IcebergTable.ICEBERG_FILE_FORMAT)); diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index bc378c5d5..8f8422555 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -775,6 +775,10 @@ public class CreateTableStmt extends StatementBase { analyzer_.addWarning("The table property 'external.table.purge' will be set " + "to 'TRUE' on newly created managed Iceberg tables."); } + if (isExternalWithNoPurge() && IcebergUtil.isHiveCatalog(getTblProperties())) { + throw new AnalysisException("Cannot create EXTERNAL Iceberg table in the " + + "Hive Catalog."); + } } private void validateTableInHiveCatalog() throws AnalysisException { @@ -782,19 +786,6 @@ public class CreateTableStmt extends StatementBase { throw new AnalysisException(String.format("%s cannot be set for Iceberg table " + "stored in hive.catalog", IcebergTable.ICEBERG_CATALOG_LOCATION)); } - if (isExternalWithNoPurge()) { - String tableId = getTblProperties().get(IcebergTable.ICEBERG_TABLE_IDENTIFIER); - if (tableId == null || tableId.isEmpty()) { - tableId = getTblProperties().get(Catalogs.NAME); - } - if (tableId == null || tableId.isEmpty()) { - throw new AnalysisException(String.format("Table property '%s' is necessary " + - "for external Iceberg tables stored in hive.catalog. " + - "For creating a completely new Iceberg table, use 'CREATE TABLE' " + - "(no EXTERNAL keyword).", - IcebergTable.ICEBERG_TABLE_IDENTIFIER)); - } - } } private void validateTableInHadoopCatalog() throws AnalysisException { diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java index 4c83acf02..2a9f7ade8 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java @@ -29,6 +29,10 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.mr.Catalogs; +import org.apache.iceberg.mr.InputFormatConfig; import org.apache.impala.analysis.IcebergPartitionField; import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.analysis.IcebergPartitionTransform; @@ -339,6 +343,7 @@ public class IcebergTable extends Table implements FeIcebergTable { throws TableLoadingException { final Timer.Context context = getMetrics().getTimer(Table.LOAD_DURATION_METRIC).time(); + verifyTable(msTbl); try { // Copy the table to check later if anything has changed. msTable_ = msTbl.deepCopy(); @@ -394,6 +399,30 @@ public class IcebergTable extends Table implements FeIcebergTable { } } + /** + * @throws TableLoadingException when it is unsafe to load the table. + */ + private void verifyTable(org.apache.hadoop.hive.metastore.api.Table msTbl) + throws TableLoadingException { + if (IcebergUtil.isHiveCatalog(msTbl.getParameters())) { + String tableId = IcebergUtil.getIcebergTableIdentifier( + msTbl.getDbName(), msTbl.getTableName()).toString(); + Map params = msTbl.getParameters(); + if (!tableId.equalsIgnoreCase( + params.getOrDefault(IcebergTable.ICEBERG_TABLE_IDENTIFIER, tableId)) || + !tableId.equalsIgnoreCase( + params.getOrDefault(Catalogs.NAME, tableId)) || + !tableId.equalsIgnoreCase( + params.getOrDefault(InputFormatConfig.TABLE_IDENTIFIER, tableId))) { + throw new TableLoadingException(String.format( + "Table %s cannot be loaded because it is an " + + "EXTERNAL table in the HiveCatalog that points to another table. " + + "Query the original table instead.", + getFullName())); + } + } + } + /** * Load schema and partitioning schemes directly from Iceberg. */ diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test index c9430a306..843c12754 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test @@ -122,41 +122,3 @@ row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_hive_catalogs/data/labe ---- TYPES STRING, STRING, STRING, STRING ==== ----- QUERY -CREATE EXTERNAL TABLE iceberg_hive_catalogs_ext -STORED AS ICEBERG -TBLPROPERTIES('iceberg.catalog'='ice_hive_cat', -'iceberg.table_identifier'='$DATABASE.iceberg_hive_catalogs'); ----- RESULTS -'Table has been created.' -==== ----- QUERY -DESCRIBE FORMATTED iceberg_hive_catalogs_ext; ----- RESULTS: VERIFY_IS_SUBSET -'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_hive_catalogs','NULL' -'','write.format.default','parquet ' -'','iceberg.table_identifier','$DATABASE.iceberg_hive_catalogs' -'','name ','$DATABASE.iceberg_hive_catalogs' ----- TYPES -string, string, string -==== ----- QUERY -SELECT * FROM iceberg_hive_catalogs_ext; ----- RESULTS -'ice',3.14 ----- TYPES -STRING,DECIMAL -==== ----- QUERY -DROP TABLE iceberg_hive_catalogs_ext; ----- RESULTS -'Table has been dropped.' -==== ----- QUERY -REFRESH iceberg_hive_catalogs; -SELECT * FROM iceberg_hive_catalogs; ----- RESULTS -'ice',3.14 ----- TYPES -STRING,DECIMAL -==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test index 5d72d2cdb..0773f0b63 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test @@ -167,84 +167,12 @@ select * from iceberg_hive_cat; INT ==== ---- QUERY -# Query external Iceberg table -create external table iceberg_hive_cat_ext (i int) -stored as iceberg -location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat' -tblproperties('iceberg.catalog'='hive.catalog', - 'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat'); ----- RESULTS -'Table has been created.' -==== ----- QUERY -select * from iceberg_hive_cat_ext; ----- RESULTS -7 ----- TYPES -INT -==== ----- QUERY -# INSET INTO external Iceberg table stored in HiveCatalog. -insert into iceberg_hive_cat_ext values (8); -select * from iceberg_hive_cat_ext; ----- RESULTS -7 -8 ----- TYPES -INT -==== ----- QUERY -# Query original table -refresh iceberg_hive_cat; -select * from iceberg_hive_cat; ----- RESULTS -7 -8 ----- TYPES -INT -==== ----- QUERY -# DROP external Iceberg table -drop table iceberg_hive_cat_ext ----- RESULTS -'Table has been dropped.' -==== ----- QUERY -# Original table is not affected after external table drop. -refresh iceberg_hive_cat; -select * from iceberg_hive_cat; ----- RESULTS -7 -8 ----- TYPES -INT -==== ----- QUERY -# Create another external Iceberg table -create external table iceberg_hive_cat_ext_2 (i int) -stored as iceberg -location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat' -tblproperties('iceberg.catalog'='hive.catalog', - 'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat'); -select * from iceberg_hive_cat_ext_2 ----- RESULTS -7 -8 -==== ----- QUERY # DROP the synchronized Iceberg table (data is purged). drop table iceberg_hive_cat ---- RESULTS 'Table has been dropped.' ==== ---- QUERY -# The data has been purged, so querying the external table fails. -refresh iceberg_hive_cat_ext_2; -select * from iceberg_hive_cat_ext_2 ----- CATCH -Table does not exist -==== ----- QUERY # Insert into hive catalog with custom location. create table iceberg_hive_cat_custom_loc (i int) stored as iceberg diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test index a3b55ee67..99ac5d090 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test @@ -93,7 +93,55 @@ CREATE EXTERNAL TABLE iceberg_hive_tbls_external( ) STORED AS ICEBERG; ---- CATCH -Table property 'iceberg.table_identifier' is necessary for external Iceberg tables stored in hive.catalog. For creating a completely new Iceberg table, use 'CREATE TABLE' (no EXTERNAL keyword). +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.catalog'='hive.catalog', +'iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE TABLE iceberg_hive_alias( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.table_identifier'='functional_parquet.iceberg_mixed_file_format'); +---- CATCH +AlreadyExistsException: Table already exists: functional_parquet.iceberg_mixed_file_format +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.catalog'='hive.catalog', +'iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.catalog'='ice_hive_catalog', +'iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. ==== ---- QUERY CREATE EXTERNAL TABLE fake_iceberg_table_hadoop_catalog @@ -249,6 +297,16 @@ ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('iceberg.table_identi AnalysisException: Changing the 'iceberg.table_identifier' table property is not supported for Iceberg table. ==== ---- QUERY +ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('name'='fake_db.fake_table'); +---- CATCH +AnalysisException: Changing the 'name' table property is not supported for Iceberg table. +==== +---- QUERY +ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('iceberg.mr.table.identifier'='fake_db.fake_table'); +---- CATCH +AnalysisException: Changing the 'iceberg.mr.table.identifier' table property is not supported for Iceberg table. +==== +---- QUERY ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('iceberg.table_identifier'); ---- CATCH AnalysisException: Unsetting the 'iceberg.table_identifier' table property is not supported for Iceberg table. diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index 43f7974ba..02cf985d9 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -1086,6 +1086,33 @@ class TestIcebergTable(IcebergTestSuite): assert parquet_column_name_type_list == iceberg_column_name_type_list + @SkipIfFS.hive + def test_hive_external_forbidden(self, vector, unique_database): + tbl_name = unique_database + ".hive_ext" + error_msg = ("cannot be loaded because it is an EXTERNAL table in the HiveCatalog " + "that points to another table. Query the original table instead.") + self.execute_query("create table {0} (i int) stored by iceberg". + format(tbl_name)) + # 'iceberg.table_identifier' can refer to another table + self.run_stmt_in_hive("""alter table {0} set tblproperties + ('external.table.purge'='false', + 'iceberg.table_identifier'='functional_iceberg.iceberg_partitioned')""". + format(tbl_name)) + ex = self.execute_query_expect_failure(self.client, "refresh {0}".format(tbl_name)) + assert error_msg in str(ex) + # 'iceberg.mr.table.identifier' can refer to another table + self.run_stmt_in_hive(""" + alter table {0} unset tblproperties('iceberg.table_identifier')""". + format(tbl_name)) + self.run_stmt_in_hive("""alter table {0} set tblproperties + ('iceberg.mr.table.identifier'='functional_iceberg.iceberg_partitioned')""". + format(tbl_name)) + ex = self.execute_query_expect_failure(self.client, "refresh {0}".format(tbl_name)) + assert error_msg in str(ex) + # 'name' can also refer to another table but cannot be set by Hive/Impala. Also, + # during table migration both Impala and Hive clears existing table properties + # See IMPALA-12410 + @SkipIfFS.incorrent_reported_ec def test_compute_stats(self, vector, unique_database): self.run_test_case('QueryTest/iceberg-compute-stats', vector, unique_database)