mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-13272: Analytic function of collections can lead to crash
The following query leads to DCHECK in debug builds and may cause more
subtle issues in RELEASE builds:
select
row_no
from (
select
arr.small,
row_number() over (order by arr.inner_struct1.str) as row_no
from functional_parquet.collection_struct_mix t,
t.arr_contains_nested_struct arr
) res;
The problem is in AnalyticPlanner.createSortInfo(). Because it is an
array unnesting operation, there are two tuples from which we try to add
slot descriptors to the sorting tuple: the array item tuple (which we'll
need) and the main tuple (which we don't actually need). The main tuple
contains the slot desc for the array. It is marked as materialised, so
we add it to the sorting tuple, but its child 'small' is not
materialised, which leads to the error.
This change solves the problem by only adding slot descs to the sorting
tuple if they are fully materialised, i.e. they and all their children
recursively are also materialised.
Testing:
- added test queries in sort-complex.test.
Change-Id: I71d1fa28ad4ff2e1a8fc5b91d3fc271c33765190
Reviewed-on: http://gerrit.cloudera.org:8080/21643
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
5b7ed40d52
commit
c4230cff2c
@@ -1792,7 +1792,7 @@ public class Analyzer {
|
||||
|
||||
Preconditions.checkState(collTblRef.getCollectionExpr() instanceof SlotRef);
|
||||
SlotDescriptor desc = ((SlotRef) collTblRef.getCollectionExpr()).getDesc();
|
||||
desc.setIsMaterializedRecursively(true);
|
||||
desc.setShouldMaterializeRecursively(true);
|
||||
|
||||
try (TupleStackGuard guard = new TupleStackGuard(desc.getItemTupleDesc())) {
|
||||
if (slotPath.destType().isArrayType()) {
|
||||
|
||||
@@ -66,7 +66,7 @@ public class SlotDescriptor {
|
||||
private boolean isMaterialized_ = false;
|
||||
|
||||
// If true, then computeMemLayout() should be recursively called for itemTupleDesc_.
|
||||
private boolean materializeRecursively_ = false;
|
||||
private boolean shouldMaterializeRecursively_ = false;
|
||||
|
||||
// if false, this slot cannot be NULL
|
||||
// Note: it is still possible that a SlotRef pointing to this descriptor could have a
|
||||
@@ -139,15 +139,39 @@ public class SlotDescriptor {
|
||||
isMaterialized_ = value;
|
||||
LOG.trace("Mark slot(sid={}) of tuple(tid={}) as {}materialized",
|
||||
id_, parent_.getId(), isMaterialized_ ? "" : "non-");
|
||||
if (materializeRecursively_) {
|
||||
if (shouldMaterializeRecursively_) {
|
||||
Preconditions.checkState(itemTupleDesc_ != null);
|
||||
itemTupleDesc_.materializeSlots();
|
||||
}
|
||||
}
|
||||
public boolean isMaterializedRecursively() { return materializeRecursively_; }
|
||||
public void setIsMaterializedRecursively(boolean value) {
|
||||
materializeRecursively_ = value;
|
||||
public boolean shouldMaterializeRecursively() {
|
||||
return shouldMaterializeRecursively_;
|
||||
}
|
||||
|
||||
public void setShouldMaterializeRecursively(boolean value) {
|
||||
shouldMaterializeRecursively_ = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff this SlotDescriptor and all its children (if any) are materialized,
|
||||
* recursively.
|
||||
* This is relevant in case of complex types, for example when only a subset of the
|
||||
* fields of a struct is queried - in this case the queried fields are materialized, the
|
||||
* other fields are not materialized, but the struct itself also needs to be marked as
|
||||
* materialized because otherwise no memory layout can be calculated for the fields.
|
||||
*/
|
||||
public boolean isFullyMaterialized() {
|
||||
if (!isMaterialized()) return false;
|
||||
if (itemTupleDesc_ != null) {
|
||||
Preconditions.checkState(type_.isComplexType());
|
||||
for (SlotDescriptor childSlotDesc : itemTupleDesc_.getSlots()) {
|
||||
if (!childSlotDesc.isFullyMaterialized()) return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean getIsNullable() { return isNullable_; }
|
||||
public void setIsNullable(boolean value) { isNullable_ = value; }
|
||||
public int getByteSize() { return byteSize_; }
|
||||
@@ -487,7 +511,7 @@ public class SlotDescriptor {
|
||||
.add("label", label_)
|
||||
.add("type", typeStr)
|
||||
.add("materialized", isMaterialized_)
|
||||
.add("materializeRecursively", materializeRecursively_)
|
||||
.add("shouldMaterializeRecursively", shouldMaterializeRecursively_)
|
||||
.add("byteSize", byteSize_)
|
||||
.add("byteOffset", byteOffset_)
|
||||
.add("nullable", isNullable_)
|
||||
|
||||
@@ -276,7 +276,7 @@ public class SortInfo {
|
||||
Preconditions.checkNotNull(null);
|
||||
}
|
||||
} else if (dstType.isCollectionType()) {
|
||||
dstSlotDesc.setIsMaterializedRecursively(true);
|
||||
dstSlotDesc.setShouldMaterializeRecursively(true);
|
||||
}
|
||||
outputSmap_.put(srcExpr.clone(), dstExpr);
|
||||
materializedExprs_.add(srcExpr);
|
||||
|
||||
@@ -441,7 +441,7 @@ public class TupleDescriptor {
|
||||
nullIndicatorByte = nullIndicators.first;
|
||||
nullIndicatorBit = nullIndicators.second;
|
||||
}
|
||||
if (d.getType().isCollectionType() && d.isMaterializedRecursively()) {
|
||||
if (d.getType().isCollectionType() && d.shouldMaterializeRecursively()) {
|
||||
d.getItemTupleDesc().computeMemLayout();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -320,7 +320,7 @@ public class AnalyticPlanner {
|
||||
return createSortInfo(input, sortExprs, isAsc, nullsFirst, TSortingOrder.LEXICAL);
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Same as above, but with extra parameter, sorting order.
|
||||
*/
|
||||
private SortInfo createSortInfo(PlanNode input, List<Expr> sortExprs,
|
||||
@@ -329,7 +329,14 @@ public class AnalyticPlanner {
|
||||
for (TupleId tid: input.getTupleIds()) {
|
||||
TupleDescriptor tupleDesc = analyzer_.getTupleDesc(tid);
|
||||
for (SlotDescriptor inputSlotDesc: tupleDesc.getSlots()) {
|
||||
if (inputSlotDesc.isMaterialized()) {
|
||||
// Only add fully materialized slot descriptors. It is possible that a slot desc
|
||||
// shows up that is materialized but not all of its children are, see
|
||||
// IMPALA-13272. This can happen for example if only a subset of the fields of a
|
||||
// struct is queried - the overall struct needs to be marked materialized to allow
|
||||
// the required children to be materialized where they are needed, but the struct
|
||||
// as a whole does not need to be added to the sorting tuple (or any other tuple)
|
||||
// - the required fields are added separately in their own right.
|
||||
if (inputSlotDesc.isFullyMaterialized()) {
|
||||
// Project out collection slots that are not supported in the sorting tuple
|
||||
// (collections within structs).
|
||||
if (SortInfo.isValidInSortingTuple(inputSlotDesc.getType())) {
|
||||
|
||||
@@ -151,6 +151,66 @@ select id, arr_contains_nested_struct from collection_struct_mix order by id;
|
||||
INT,STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Regression test for # IMPALA-13272
|
||||
select
|
||||
row_no
|
||||
from (
|
||||
select
|
||||
arr.small,
|
||||
row_number() over (order by arr.inner_struct1.str) as row_no
|
||||
from collection_struct_mix t, t.arr_contains_nested_struct arr
|
||||
) res;
|
||||
---- RESULTS
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
---- TYPES
|
||||
BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Regression test for # IMPALA-13272
|
||||
select
|
||||
row_no, str
|
||||
from (
|
||||
select
|
||||
arr.small,
|
||||
arr.inner_struct1.str str,
|
||||
row_number() over (order by arr.inner_struct1.str) as row_no
|
||||
from collection_struct_mix t, t.arr_contains_nested_struct arr
|
||||
) res;
|
||||
---- RESULTS
|
||||
1,''
|
||||
2,'a few soju distilleries'
|
||||
3,'NULL'
|
||||
4,'NULL'
|
||||
5,'NULL'
|
||||
6,'NULL'
|
||||
---- TYPES
|
||||
BIGINT,STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Regression test for # IMPALA-13272
|
||||
select
|
||||
row_no, str, small
|
||||
from (
|
||||
select
|
||||
arr.small,
|
||||
arr.inner_struct2.str str,
|
||||
row_number() over (order by arr.inner_struct2.str) as row_no
|
||||
from collection_struct_mix t, t.arr_contains_nested_struct arr
|
||||
) res limit 4;
|
||||
---- RESULTS
|
||||
1,'four spaceship captains',2
|
||||
2,'lots of soju distilleries',105
|
||||
3,'more spaceship captains',20
|
||||
4,'very few distilleries',104
|
||||
---- TYPES
|
||||
BIGINT,STRING, SMALLINT
|
||||
====
|
||||
---- QUERY
|
||||
# Sorting is not supported yet for collections within structs: IMPALA-12160. Test with a
|
||||
# struct that contains an array.
|
||||
select id, struct_contains_arr from collection_struct_mix order by id
|
||||
|
||||
Reference in New Issue
Block a user