diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java index 85ed35108..46d6d271e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java @@ -25,6 +25,8 @@ import org.apache.avro.SchemaParseException; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.iceberg.DataFile; +import org.apache.iceberg.mr.Catalogs; +import org.apache.iceberg.mr.InputFormatConfig; import org.apache.impala.authorization.AuthorizationConfig; import org.apache.impala.catalog.FeFsTable; import org.apache.impala.catalog.FeHBaseTable; @@ -155,6 +157,8 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt { icebergPropertyCheck(IcebergTable.ICEBERG_CATALOG); icebergPropertyCheck(IcebergTable.ICEBERG_CATALOG_LOCATION); icebergPropertyCheck(IcebergTable.ICEBERG_TABLE_IDENTIFIER); + icebergPropertyCheck(Catalogs.NAME); + icebergPropertyCheck(InputFormatConfig.TABLE_IDENTIFIER); icebergPropertyCheck(IcebergTable.METADATA_LOCATION); if (tblProperties_.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) { icebergTableFormatCheck(tblProperties_.get(IcebergTable.ICEBERG_FILE_FORMAT)); diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index bc378c5d5..8f8422555 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -775,6 +775,10 @@ public class CreateTableStmt extends StatementBase { analyzer_.addWarning("The table property 'external.table.purge' will be set " + "to 'TRUE' on newly created managed Iceberg tables."); } + if (isExternalWithNoPurge() && IcebergUtil.isHiveCatalog(getTblProperties())) { + throw new AnalysisException("Cannot create EXTERNAL Iceberg table in the " + + "Hive Catalog."); + } } private void validateTableInHiveCatalog() throws AnalysisException { @@ -782,19 +786,6 @@ public class CreateTableStmt extends StatementBase { throw new AnalysisException(String.format("%s cannot be set for Iceberg table " + "stored in hive.catalog", IcebergTable.ICEBERG_CATALOG_LOCATION)); } - if (isExternalWithNoPurge()) { - String tableId = getTblProperties().get(IcebergTable.ICEBERG_TABLE_IDENTIFIER); - if (tableId == null || tableId.isEmpty()) { - tableId = getTblProperties().get(Catalogs.NAME); - } - if (tableId == null || tableId.isEmpty()) { - throw new AnalysisException(String.format("Table property '%s' is necessary " + - "for external Iceberg tables stored in hive.catalog. " + - "For creating a completely new Iceberg table, use 'CREATE TABLE' " + - "(no EXTERNAL keyword).", - IcebergTable.ICEBERG_TABLE_IDENTIFIER)); - } - } } private void validateTableInHadoopCatalog() throws AnalysisException { diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java index 4c83acf02..2a9f7ade8 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java @@ -29,6 +29,10 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.mr.Catalogs; +import org.apache.iceberg.mr.InputFormatConfig; import org.apache.impala.analysis.IcebergPartitionField; import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.analysis.IcebergPartitionTransform; @@ -339,6 +343,7 @@ public class IcebergTable extends Table implements FeIcebergTable { throws TableLoadingException { final Timer.Context context = getMetrics().getTimer(Table.LOAD_DURATION_METRIC).time(); + verifyTable(msTbl); try { // Copy the table to check later if anything has changed. msTable_ = msTbl.deepCopy(); @@ -394,6 +399,30 @@ public class IcebergTable extends Table implements FeIcebergTable { } } + /** + * @throws TableLoadingException when it is unsafe to load the table. + */ + private void verifyTable(org.apache.hadoop.hive.metastore.api.Table msTbl) + throws TableLoadingException { + if (IcebergUtil.isHiveCatalog(msTbl.getParameters())) { + String tableId = IcebergUtil.getIcebergTableIdentifier( + msTbl.getDbName(), msTbl.getTableName()).toString(); + Map params = msTbl.getParameters(); + if (!tableId.equalsIgnoreCase( + params.getOrDefault(IcebergTable.ICEBERG_TABLE_IDENTIFIER, tableId)) || + !tableId.equalsIgnoreCase( + params.getOrDefault(Catalogs.NAME, tableId)) || + !tableId.equalsIgnoreCase( + params.getOrDefault(InputFormatConfig.TABLE_IDENTIFIER, tableId))) { + throw new TableLoadingException(String.format( + "Table %s cannot be loaded because it is an " + + "EXTERNAL table in the HiveCatalog that points to another table. " + + "Query the original table instead.", + getFullName())); + } + } + } + /** * Load schema and partitioning schemes directly from Iceberg. */ diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test index c9430a306..843c12754 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-catalogs.test @@ -122,41 +122,3 @@ row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_hive_catalogs/data/labe ---- TYPES STRING, STRING, STRING, STRING ==== ----- QUERY -CREATE EXTERNAL TABLE iceberg_hive_catalogs_ext -STORED AS ICEBERG -TBLPROPERTIES('iceberg.catalog'='ice_hive_cat', -'iceberg.table_identifier'='$DATABASE.iceberg_hive_catalogs'); ----- RESULTS -'Table has been created.' -==== ----- QUERY -DESCRIBE FORMATTED iceberg_hive_catalogs_ext; ----- RESULTS: VERIFY_IS_SUBSET -'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_hive_catalogs','NULL' -'','write.format.default','parquet ' -'','iceberg.table_identifier','$DATABASE.iceberg_hive_catalogs' -'','name ','$DATABASE.iceberg_hive_catalogs' ----- TYPES -string, string, string -==== ----- QUERY -SELECT * FROM iceberg_hive_catalogs_ext; ----- RESULTS -'ice',3.14 ----- TYPES -STRING,DECIMAL -==== ----- QUERY -DROP TABLE iceberg_hive_catalogs_ext; ----- RESULTS -'Table has been dropped.' -==== ----- QUERY -REFRESH iceberg_hive_catalogs; -SELECT * FROM iceberg_hive_catalogs; ----- RESULTS -'ice',3.14 ----- TYPES -STRING,DECIMAL -==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test index 5d72d2cdb..0773f0b63 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test @@ -167,84 +167,12 @@ select * from iceberg_hive_cat; INT ==== ---- QUERY -# Query external Iceberg table -create external table iceberg_hive_cat_ext (i int) -stored as iceberg -location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat' -tblproperties('iceberg.catalog'='hive.catalog', - 'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat'); ----- RESULTS -'Table has been created.' -==== ----- QUERY -select * from iceberg_hive_cat_ext; ----- RESULTS -7 ----- TYPES -INT -==== ----- QUERY -# INSET INTO external Iceberg table stored in HiveCatalog. -insert into iceberg_hive_cat_ext values (8); -select * from iceberg_hive_cat_ext; ----- RESULTS -7 -8 ----- TYPES -INT -==== ----- QUERY -# Query original table -refresh iceberg_hive_cat; -select * from iceberg_hive_cat; ----- RESULTS -7 -8 ----- TYPES -INT -==== ----- QUERY -# DROP external Iceberg table -drop table iceberg_hive_cat_ext ----- RESULTS -'Table has been dropped.' -==== ----- QUERY -# Original table is not affected after external table drop. -refresh iceberg_hive_cat; -select * from iceberg_hive_cat; ----- RESULTS -7 -8 ----- TYPES -INT -==== ----- QUERY -# Create another external Iceberg table -create external table iceberg_hive_cat_ext_2 (i int) -stored as iceberg -location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/iceberg_hive_cat' -tblproperties('iceberg.catalog'='hive.catalog', - 'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat'); -select * from iceberg_hive_cat_ext_2 ----- RESULTS -7 -8 -==== ----- QUERY # DROP the synchronized Iceberg table (data is purged). drop table iceberg_hive_cat ---- RESULTS 'Table has been dropped.' ==== ---- QUERY -# The data has been purged, so querying the external table fails. -refresh iceberg_hive_cat_ext_2; -select * from iceberg_hive_cat_ext_2 ----- CATCH -Table does not exist -==== ----- QUERY # Insert into hive catalog with custom location. create table iceberg_hive_cat_custom_loc (i int) stored as iceberg diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test index a3b55ee67..99ac5d090 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test @@ -93,7 +93,55 @@ CREATE EXTERNAL TABLE iceberg_hive_tbls_external( ) STORED AS ICEBERG; ---- CATCH -Table property 'iceberg.table_identifier' is necessary for external Iceberg tables stored in hive.catalog. For creating a completely new Iceberg table, use 'CREATE TABLE' (no EXTERNAL keyword). +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.catalog'='hive.catalog', +'iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE TABLE iceberg_hive_alias( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.table_identifier'='functional_parquet.iceberg_mixed_file_format'); +---- CATCH +AlreadyExistsException: Table already exists: functional_parquet.iceberg_mixed_file_format +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.catalog'='hive.catalog', +'iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. +==== +---- QUERY +CREATE EXTERNAL TABLE iceberg_hive_tbls_external( + level STRING +) +STORED AS ICEBERG +TBLPROPERTIES ('iceberg.catalog'='ice_hive_catalog', +'iceberg.table_identifier'='functional_parquet.iceberg_partitioned'); +---- CATCH +Cannot create EXTERNAL Iceberg table in the Hive Catalog. ==== ---- QUERY CREATE EXTERNAL TABLE fake_iceberg_table_hadoop_catalog @@ -249,6 +297,16 @@ ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('iceberg.table_identi AnalysisException: Changing the 'iceberg.table_identifier' table property is not supported for Iceberg table. ==== ---- QUERY +ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('name'='fake_db.fake_table'); +---- CATCH +AnalysisException: Changing the 'name' table property is not supported for Iceberg table. +==== +---- QUERY +ALTER TABLE iceberg_table_hadoop_catalog set TBLPROPERTIES('iceberg.mr.table.identifier'='fake_db.fake_table'); +---- CATCH +AnalysisException: Changing the 'iceberg.mr.table.identifier' table property is not supported for Iceberg table. +==== +---- QUERY ALTER TABLE iceberg_table_hadoop_catalog unset TBLPROPERTIES('iceberg.table_identifier'); ---- CATCH AnalysisException: Unsetting the 'iceberg.table_identifier' table property is not supported for Iceberg table. diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index 43f7974ba..02cf985d9 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -1086,6 +1086,33 @@ class TestIcebergTable(IcebergTestSuite): assert parquet_column_name_type_list == iceberg_column_name_type_list + @SkipIfFS.hive + def test_hive_external_forbidden(self, vector, unique_database): + tbl_name = unique_database + ".hive_ext" + error_msg = ("cannot be loaded because it is an EXTERNAL table in the HiveCatalog " + "that points to another table. Query the original table instead.") + self.execute_query("create table {0} (i int) stored by iceberg". + format(tbl_name)) + # 'iceberg.table_identifier' can refer to another table + self.run_stmt_in_hive("""alter table {0} set tblproperties + ('external.table.purge'='false', + 'iceberg.table_identifier'='functional_iceberg.iceberg_partitioned')""". + format(tbl_name)) + ex = self.execute_query_expect_failure(self.client, "refresh {0}".format(tbl_name)) + assert error_msg in str(ex) + # 'iceberg.mr.table.identifier' can refer to another table + self.run_stmt_in_hive(""" + alter table {0} unset tblproperties('iceberg.table_identifier')""". + format(tbl_name)) + self.run_stmt_in_hive("""alter table {0} set tblproperties + ('iceberg.mr.table.identifier'='functional_iceberg.iceberg_partitioned')""". + format(tbl_name)) + ex = self.execute_query_expect_failure(self.client, "refresh {0}".format(tbl_name)) + assert error_msg in str(ex) + # 'name' can also refer to another table but cannot be set by Hive/Impala. Also, + # during table migration both Impala and Hive clears existing table properties + # See IMPALA-12410 + @SkipIfFS.incorrent_reported_ec def test_compute_stats(self, vector, unique_database): self.run_test_case('QueryTest/iceberg-compute-stats', vector, unique_database)