IMPALA-12584: Add backend config to restrict data file locations for Iceberg tables

This change adds backend flag 'iceberg_restrict_data_file_location',
when the flag is set to 'true', Impala will raise an error when at least
one data file of an Iceberg table is outside of the table directory.
The default value of the flag is 'false'.

Tests:
 - custom-cluster test added to validate both states of the flag

Change-Id: I60e3d93b5039dc977417e7b097b3d6ddeda52de4
Reviewed-on: http://gerrit.cloudera.org:8080/20786
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Peter Rozsa
2023-12-12 08:56:35 +01:00
committed by Impala Public Jenkins
parent 4114fe8db6
commit bd0ba644e6
6 changed files with 68 additions and 0 deletions

View File

@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
import pytest
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
SELECT_STATEMENT = "SELECT COUNT(1) FROM " \
"functional_parquet.iceberg_multiple_storage_locations"
EXCEPTION = "IcebergTableLoadingException: " \
"Error loading metadata for Iceberg table"
class TestIcebergStrictDataFileLocation(CustomClusterTestSuite):
"""Tests for checking the behaviour of startup flag
'iceberg_restrict_data_file_location'."""
@classmethod
def get_workload(self):
return 'functional-query'
@CustomClusterTestSuite.with_args(
catalogd_args='--iceberg_restrict_data_file_location=true')
@pytest.mark.execute_serially
def test_restricted_location(self, vector):
"""If the flag is enabled, tables with multiple storage locations will fail
to load their datafiles."""
result = self.execute_query_expect_failure(self.client, SELECT_STATEMENT)
assert EXCEPTION in str(result)
@CustomClusterTestSuite.with_args(
catalogd_args='--iceberg_restrict_data_file_location=false')
@pytest.mark.execute_serially
def test_disabled(self, vector):
"""If the flag is disabled, and tables with multiple storage locations
are configured properly, the tables load successfully."""
result = self.execute_query_expect_success(self.client, SELECT_STATEMENT)
assert '9' in result.data