mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14185: Error unnesting nested array from Iceberg with DELETE files
When trying to doubly unnest a 2D array from an Iceberg table that has delete files but not for every data file, we run into an error: Filtering an unnested collection that comes from a UNION [ALL] is not supported yet. This is because there is a UNION node because of the Iceberg delete files, and there is an added "not-empty" conjunct on the collections. IMPALA-12753 describes a bug where a conjunct on an unnested collection coming from a UNION ALL is only applied to the first UNION operand. To avoid incorrectness, we disabled this case in the commit for IMPALA-12695, but its unintended consequence is that it leads to this error with Iceberg tables. However, in this case with Iceberg deletes, the bug described in IMPALA-12753 is not present because both sides of the UNION have the same tuple id, so conjuncts are naturally applied to both sides. This commit relaxes the check, which now does not fire if all UNION operands have the same tuple ids. Testing: - existing tests related to IMPALA-12753 pass - added a regression test with an Iceberg table with DELETE files Change-Id: Ifbc6f580586d4b337f33a2f32052aa07f6fca828 Reviewed-on: http://gerrit.cloudera.org:8080/23107 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
bff3561573
commit
191aec5298
@@ -90,6 +90,9 @@ public class UnnestNode extends PlanNode {
|
||||
|
||||
// Filtering an unnested collection that comes from a UNION [ALL] is not supported, see
|
||||
// IMPALA-12753.
|
||||
// The exception is if all children of the UNION node have the same tuple id(s), because
|
||||
// then the conjuncts are naturally applied to all UNION operands. This is the case for
|
||||
// UNION nodes inserted because of Iceberg delete operations. See IMPALA-14185.
|
||||
private void checkUnnestFromUnionWithPredicate(Analyzer analyzer)
|
||||
throws AnalysisException {
|
||||
PlanNode subplanInputNode = containingSubplanNode_.getChild(0);
|
||||
@@ -97,6 +100,8 @@ public class UnnestNode extends PlanNode {
|
||||
|
||||
UnionNode union = (UnionNode) subplanInputNode;
|
||||
|
||||
if (allUnionChildrenHaveSameTupleIds(union)) return;
|
||||
|
||||
// Tuple descriptors of the UNION and their descendants (for complex types).
|
||||
List<TupleDescriptor> unionDescs = new ArrayList<>();
|
||||
for (TupleId tid : union.getTupleIds()) {
|
||||
@@ -125,6 +130,15 @@ public class UnnestNode extends PlanNode {
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean allUnionChildrenHaveSameTupleIds(UnionNode union) {
|
||||
if (union.getChildren().size() < 2) return true;
|
||||
|
||||
final List<TupleId> firstChildTupleIds = union.getChild(0).getTupleIds();
|
||||
return union.getChildren().stream()
|
||||
.map(planNode -> planNode.getTupleIds())
|
||||
.allMatch(tupleIdList -> tupleIdList.equals(firstChildTupleIds));
|
||||
}
|
||||
|
||||
// Returns the TupleDescriptors contained by 'tuple' (includes item tuple descs of
|
||||
// collections).
|
||||
private void getCollTupleDescs(TupleDescriptor tuple,
|
||||
|
||||
@@ -194,6 +194,29 @@ class TestNestedCollectionsInSelectList(ImpalaTestSuite):
|
||||
"""Queries where a map column is in the select list"""
|
||||
self.run_test_case('QueryTest/nested-map-in-select-list', vector)
|
||||
|
||||
@SkipIfFS.hive
|
||||
def test_nested_array_from_iceberg_with_delete(self, unique_database):
|
||||
"""Tests that a 2D array can be unnested from an Iceberg table that has delete files
|
||||
but not for all data files. In this case there is a UNION in the plan.
|
||||
Regression test for IMPALA-14185.
|
||||
"""
|
||||
tbl_name = unique_database + ".nested_arr_in_iceberg_with_delete"
|
||||
self.execute_query("create table {} (id INT, arr ARRAY<ARRAY<int>>) stored by \
|
||||
iceberg tblproperties('format-version'='2')".format(tbl_name))
|
||||
|
||||
# INSERTs are done in Hive as Impala cannot write complex types.
|
||||
self.run_stmt_in_hive("insert into {} values ( \
|
||||
1, array(array(1), array(2), array(3), array(4), array(5)))".format(tbl_name))
|
||||
self.run_stmt_in_hive("insert into {} values ( \
|
||||
2, array(array(1), array(2), array(3), array(4), array(5)))".format(tbl_name))
|
||||
# Impala can delete rows containing complex types.
|
||||
self.execute_query("delete from {} where id=2".format(tbl_name))
|
||||
|
||||
result = self.execute_query_expect_success(self.client,
|
||||
"select id, a1.item.item unnested_item from {0}, {0}.arr a1, a1.item \
|
||||
order by id, unnested_item".format(tbl_name))
|
||||
assert result.data == ['1\t1', '1\t2', '1\t3', '1\t4', '1\t5']
|
||||
|
||||
|
||||
class TestMixedCollectionsAndStructsInSelectList(ImpalaTestSuite):
|
||||
"""Functional tests for the case where collections and structs are embedded into one
|
||||
|
||||
Reference in New Issue
Block a user