Files
impala/be/src/runtime/tuple.cc
Alex Behm 057b0b7dba IMPALA-2322: Set new pointer for ArrayValue in Tuple::DeepCopyVarlenData().
The bug was a simple oversight where copied the array data, but forgot
to update the pointer of the corresponding ArrayValue.

Change-Id: Ib6ec0380f66194efc7ea3eb989535652eb8b526f
Reviewed-on: http://gerrit.cloudera.org:8080/855
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
2015-09-22 10:58:32 -07:00

238 lines
9.5 KiB
C++

// Copyright 2012 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "runtime/tuple.h"
#include <vector>
#include "exprs/expr.h"
#include "exprs/expr-context.h"
#include "runtime/array-value.h"
#include "runtime/descriptors.h"
#include "runtime/mem-pool.h"
#include "runtime/raw-value.h"
#include "runtime/tuple-row.h"
#include "runtime/string-value.h"
#include "util/debug-util.h"
#include "common/names.h"
namespace impala {
const char* Tuple::LLVM_CLASS_NAME = "class.impala::Tuple";
int64_t Tuple::TotalByteSize(const TupleDescriptor& desc) const {
int64_t result = desc.byte_size();
if (!desc.HasVarlenSlots()) return result;
result += VarlenByteSize(desc);
return result;
}
int64_t Tuple::VarlenByteSize(const TupleDescriptor& desc) const {
int64_t result = 0;
vector<SlotDescriptor*>::const_iterator slot = desc.string_slots().begin();
for (; slot != desc.string_slots().end(); ++slot) {
DCHECK((*slot)->type().IsVarLenStringType());
if (IsNull((*slot)->null_indicator_offset())) continue;
const StringValue* string_val = GetStringSlot((*slot)->tuple_offset());
result += string_val->len;
}
slot = desc.collection_slots().begin();
for (; slot != desc.collection_slots().end(); ++slot) {
DCHECK((*slot)->type().IsCollectionType());
if (IsNull((*slot)->null_indicator_offset())) continue;
const ArrayValue* array_val = GetCollectionSlot((*slot)->tuple_offset());
uint8_t* array_data = array_val->ptr;
const TupleDescriptor& item_desc = *(*slot)->collection_item_descriptor();
for (int i = 0; i < array_val->num_tuples; ++i) {
result += reinterpret_cast<Tuple*>(array_data)->TotalByteSize(item_desc);
array_data += item_desc.byte_size();
}
}
return result;
}
Tuple* Tuple::DeepCopy(const TupleDescriptor& desc, MemPool* pool) {
Tuple* result = reinterpret_cast<Tuple*>(pool->Allocate(desc.byte_size()));
DeepCopy(result, desc, pool);
return result;
}
// TODO: the logic is very similar to the other DeepCopy implementation aside from how
// memory is allocated - can we templatise it somehow to avoid redundancy without runtime
// overhead.
void Tuple::DeepCopy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool) {
memcpy(dst, this, desc.byte_size());
if (desc.HasVarlenSlots()) dst->DeepCopyVarlenData(desc, pool);
}
void Tuple::DeepCopyVarlenData(const TupleDescriptor& desc, MemPool* pool) {
// allocate then copy all non-null string and collection slots
for (vector<SlotDescriptor*>::const_iterator slot = desc.string_slots().begin();
slot != desc.string_slots().end(); ++slot) {
DCHECK((*slot)->type().IsVarLenStringType());
if (IsNull((*slot)->null_indicator_offset())) continue;
StringValue* string_v = GetStringSlot((*slot)->tuple_offset());
char* string_copy = reinterpret_cast<char*>(pool->Allocate(string_v->len));
memcpy(string_copy, string_v->ptr, string_v->len);
string_v->ptr = string_copy;
}
for (vector<SlotDescriptor*>::const_iterator slot = desc.collection_slots().begin();
slot != desc.collection_slots().end(); ++slot) {
DCHECK((*slot)->type().IsCollectionType());
if (IsNull((*slot)->null_indicator_offset())) continue;
ArrayValue* av = GetCollectionSlot((*slot)->tuple_offset());
const TupleDescriptor* item_desc = (*slot)->collection_item_descriptor();
int array_byte_size = av->num_tuples * item_desc->byte_size();
uint8_t* array_data = reinterpret_cast<uint8_t*>(pool->Allocate(array_byte_size));
memcpy(array_data, av->ptr, array_byte_size);
av->ptr = array_data;
if (!item_desc->HasVarlenSlots()) continue;
for (int i = 0; i < av->num_tuples; ++i) {
int item_offset = i * item_desc->byte_size();
Tuple* dst_item = reinterpret_cast<Tuple*>(array_data + item_offset);
dst_item->DeepCopyVarlenData(*item_desc, pool);
}
}
}
void Tuple::DeepCopy(const TupleDescriptor& desc, char** data, int* offset,
bool convert_ptrs) {
Tuple* dst = reinterpret_cast<Tuple*>(*data);
memcpy(dst, this, desc.byte_size());
*data += desc.byte_size();
*offset += desc.byte_size();
if (desc.HasVarlenSlots()) dst->DeepCopyVarlenData(desc, data, offset, convert_ptrs);
}
void Tuple::DeepCopyVarlenData(const TupleDescriptor& desc, char** data, int* offset,
bool convert_ptrs) {
vector<SlotDescriptor*>::const_iterator slot = desc.string_slots().begin();
for (; slot != desc.string_slots().end(); ++slot) {
DCHECK((*slot)->type().IsVarLenStringType());
if (IsNull((*slot)->null_indicator_offset())) continue;
StringValue* string_v = GetStringSlot((*slot)->tuple_offset());
memcpy(*data, string_v->ptr, string_v->len);
string_v->ptr = convert_ptrs ? reinterpret_cast<char*>(*offset) : *data;
*data += string_v->len;
*offset += string_v->len;
}
slot = desc.collection_slots().begin();
for (; slot != desc.collection_slots().end(); ++slot) {
DCHECK((*slot)->type().IsCollectionType());
if (IsNull((*slot)->null_indicator_offset())) continue;
ArrayValue* array_val = GetCollectionSlot((*slot)->tuple_offset());
const TupleDescriptor& item_desc = *(*slot)->collection_item_descriptor();
int array_byte_size = array_val->num_tuples * item_desc.byte_size();
memcpy(*data, array_val->ptr, array_byte_size);
uint8_t* array_data = reinterpret_cast<uint8_t*>(*data);
array_val->ptr = convert_ptrs ? reinterpret_cast<uint8_t*>(*offset) : array_data;
*data += array_byte_size;
*offset += array_byte_size;
// Copy per-tuple varlen data if necessary.
if (!item_desc.HasVarlenSlots()) continue;
for (int i = 0; i < array_val->num_tuples; ++i) {
reinterpret_cast<Tuple*>(array_data)->DeepCopyVarlenData(
item_desc, data, offset, convert_ptrs);
array_data += item_desc.byte_size();
}
}
}
void Tuple::ConvertOffsetsToPointers(const TupleDescriptor& desc, uint8_t* tuple_data) {
vector<SlotDescriptor*>::const_iterator slot = desc.string_slots().begin();
for (; slot != desc.string_slots().end(); ++slot) {
DCHECK((*slot)->type().IsVarLenStringType());
if (IsNull((*slot)->null_indicator_offset())) continue;
StringValue* string_val = GetStringSlot((*slot)->tuple_offset());
int offset = reinterpret_cast<intptr_t>(string_val->ptr);
string_val->ptr = reinterpret_cast<char*>(tuple_data + offset);
}
slot = desc.collection_slots().begin();
for (; slot != desc.collection_slots().end(); ++slot) {
DCHECK((*slot)->type().IsCollectionType());
if (IsNull((*slot)->null_indicator_offset())) continue;
ArrayValue* array_val = GetCollectionSlot((*slot)->tuple_offset());
int offset = reinterpret_cast<intptr_t>(array_val->ptr);
array_val->ptr = tuple_data + offset;
uint8_t* array_data = array_val->ptr;
const TupleDescriptor& item_desc = *(*slot)->collection_item_descriptor();
for (int i = 0; i < array_val->num_tuples; ++i) {
reinterpret_cast<Tuple*>(array_data)->ConvertOffsetsToPointers(
item_desc, tuple_data);
array_data += item_desc.byte_size();
}
}
}
template <bool collect_string_vals>
void Tuple::MaterializeExprs(
TupleRow* row, const TupleDescriptor& desc,
const vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool,
vector<StringValue*>* non_null_string_values, int* total_string) {
if (collect_string_vals) {
non_null_string_values->clear();
*total_string = 0;
}
memset(this, 0, desc.num_null_bytes());
// Evaluate the output_slot_exprs and place the results in the tuples.
int mat_expr_index = 0;
for (int i = 0; i < desc.slots().size(); ++i) {
SlotDescriptor* slot_desc = desc.slots()[i];
if (!slot_desc->is_materialized()) continue;
// The FE ensures we don't get any TYPE_NULL expressions by picking an arbitrary type
// when necessary, but does not do this for slot descs.
// TODO: revisit this logic in the FE
DCHECK(slot_desc->type().type == TYPE_NULL ||
slot_desc->type() == materialize_expr_ctxs[mat_expr_index]->root()->type());
void* src = materialize_expr_ctxs[mat_expr_index]->GetValue(row);
if (src != NULL) {
void* dst = GetSlot(slot_desc->tuple_offset());
RawValue::Write(src, dst, slot_desc->type(), pool);
if (collect_string_vals && slot_desc->type().IsVarLenStringType()) {
StringValue* string_val = reinterpret_cast<StringValue*>(dst);
non_null_string_values->push_back(string_val);
*total_string += string_val->len;
}
} else {
SetNull(slot_desc->null_indicator_offset());
}
++mat_expr_index;
}
DCHECK_EQ(mat_expr_index, materialize_expr_ctxs.size());
}
template void Tuple::MaterializeExprs<false>(TupleRow* row, const TupleDescriptor& desc,
const vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool,
vector<StringValue*>* non_null_var_values, int* total_var_len);
template void Tuple::MaterializeExprs<true>(TupleRow* row, const TupleDescriptor& desc,
const vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool,
vector<StringValue*>* non_null_var_values, int* total_var_len);
}