diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc index b8182e2d0..67603731f 100644 --- a/be/src/runtime/sorter.cc +++ b/be/src/runtime/sorter.cc @@ -1006,9 +1006,11 @@ bool Sorter::Run::ConvertOffsetsToPtrs(Tuple* tuple) { var_len_blocks_[var_len_blocks_index_]->buffer(); const vector& string_slots = sort_tuple_desc_->string_slots(); + int num_non_null_string_slots = 0; for (int i = 0; i < string_slots.size(); ++i) { SlotDescriptor* slot_desc = string_slots[i]; if (tuple->IsNull(slot_desc->null_indicator_offset())) continue; + ++num_non_null_string_slots; DCHECK(slot_desc->type().IsVarLenStringType()); StringValue* value = reinterpret_cast( @@ -1026,7 +1028,9 @@ bool Sorter::Run::ConvertOffsetsToPtrs(Tuple* tuple) { DCHECK_LE(block_index, var_len_blocks_.size()); DCHECK_EQ(block_index, var_len_blocks_index_ + 1); DCHECK_EQ(block_offset, 0); // The data is the first thing in the next block. - DCHECK_EQ(i, 0); // Var len data for tuple shouldn't be split across blocks. + // This must be the first slot with var len data for the tuple. Var len data + // for tuple shouldn't be split across blocks. + DCHECK_EQ(num_non_null_string_slots, 1); return false; } diff --git a/testdata/workloads/functional-query/queries/QueryTest/single-node-large-sorts.test b/testdata/workloads/functional-query/queries/QueryTest/single-node-large-sorts.test index 8000ad3c9..74b7eee6e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/single-node-large-sorts.test +++ b/testdata/workloads/functional-query/queries/QueryTest/single-node-large-sorts.test @@ -29,6 +29,38 @@ STRING,STRING,STRING 'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','','' 'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','','' ---- RUNTIME_PROFILE -row_regex: .* TotalMergesPerformed: [^0] .* -row_regex: .* SpilledRuns: [^0] .* +row_regex: .* TotalMergesPerformed: [^0].* +row_regex: .* SpilledRuns: [^0].* +==== +---- QUERY +# Regression test for IMPALA-5554: first string column in sort tuple is null +# on boundary of spilled block. Test does two sorts with a NULL and non-NULL +# string column in both potential orders. +set max_block_mgr_memory=50m; +select * +from ( + select *, first_value(col) over (order by sort_col) fv + from ( + select concat(l_linestatus, repeat('a', 63)) sort_col, if(l_returnflag = 'foo', l_returnflag, NULL) col + from tpch_parquet.lineitem limit 100000 + union all + select if(l_returnflag = 'foo', l_returnflag, NULL) sort_col, concat(l_linestatus, repeat('a', 63)) col + from tpch_parquet.lineitem) q limit 100000 + ) q2 +limit 10 +---- TYPES +STRING,STRING,STRING +---- RESULTS +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +'Faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa','NULL','NULL' +---- RUNTIME_PROFILE +row_regex: .* SpilledRuns: [^0].* ====