IMPALA-5252: Fix crash in HiveUdfCall::GetStringVal() when mem_limit exceeded

We need to check for AllocateLocal() returning NULL. CopyFrom() takes
care of that for us.  Also adjust a few other places in the code base
that didn't have the check.

The new test reproduces the crash, but in order to get this test file to
execute, I had to move the xfail to be a function decorator. Apparently
xfail as a statement causes the test to not run at all. We should run
all of these queries even if they are non-determistic to at least verify
that impalad does not crash.

Change-Id: Iafefef24479164cc4d2b99191d2de28eb8b311b6
Reviewed-on: http://gerrit.cloudera.org:8080/6761
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Impala Public Jenkins
This commit is contained in:
Dan Hecht
2017-04-27 16:03:31 -07:00
committed by Impala Public Jenkins
parent 2e63752858
commit 741421de09
8 changed files with 27 additions and 25 deletions

View File

@@ -1291,10 +1291,8 @@ StringVal AggregateFunctions::ReservoirSampleFinalize(FunctionContext* ctx,
if (i < (src_state->num_samples() - 1)) out << ", ";
}
const string& out_str = out.str();
StringVal result_str(ctx, out_str.size());
if (LIKELY(!result_str.is_null)) {
memcpy(result_str.ptr, out_str.c_str(), result_str.len);
}
StringVal result_str = StringVal::CopyFrom(ctx,
reinterpret_cast<const uint8_t*>(out_str.c_str()), out_str.size());
src_state->Delete(ctx);
return result_str;
}

View File

@@ -323,15 +323,11 @@ DoubleVal HiveUdfCall::GetDoubleVal(ExprContext* ctx, const TupleRow* row) {
StringVal HiveUdfCall::GetStringVal(ExprContext* ctx, const TupleRow* row) {
DCHECK_EQ(type_.type, TYPE_STRING);
StringVal result = *reinterpret_cast<StringVal*>(Evaluate(ctx, row));
// Copy the string into a local allocation with the usual lifetime for expr results.
// Needed because the UDF output buffer is owned by the Java UDF executor and may be
// freed or reused by the next call into the Java UDF executor.
FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_);
uint8_t* local_alloc = fn_ctx->impl()->AllocateLocal(result.len);
memcpy(local_alloc, result.ptr, result.len);
result.ptr = local_alloc;
return result;
return StringVal::CopyFrom(fn_ctx, result.ptr, result.len);
}
TimestampVal HiveUdfCall::GetTimestampVal(ExprContext* ctx, const TupleRow* row) {

View File

@@ -509,10 +509,8 @@ StringVal UdfBuiltins::PrintVector(FunctionContext* context, const StringVal& ar
}
ss << ">";
const string& str = ss.str();
StringVal result(context, str.size());
if (UNLIKELY(result.is_null)) return StringVal::null();
memcpy(result.ptr, str.c_str(), str.size());
return result;
return StringVal::CopyFrom(context, reinterpret_cast<const uint8_t*>(str.c_str()),
str.size());
}
DoubleVal UdfBuiltins::VectorGet(FunctionContext* context, const BigIntVal& index,

View File

@@ -139,8 +139,7 @@ void MinMerge(FunctionContext* context, const BufferVal& src, BufferVal* dst) {
StringVal MinFinalize(FunctionContext* context, const BufferVal& val) {
const MinState* state = reinterpret_cast<const MinState*>(val);
if (state->value == NULL) return StringVal::null();
StringVal result = StringVal(context, state->len);
memcpy(result.ptr, state->value, state->len);
StringVal result = StringVal::CopyFrom(context, state->value, state->len);
context->Free(state->value);
return result;
}

View File

@@ -47,6 +47,7 @@ StringVal UpperUdf(FunctionContext* context, const StringVal& input) {
if (input.is_null) return StringVal::null();
// Create a new StringVal object that's the same length as the input
StringVal result = StringVal(context, input.len);
if (result.is_null) return StringVal::null();
for (int i = 0; i < input.len; ++i) {
result.ptr[i] = toupper(input.ptr[i]);
}

View File

@@ -87,14 +87,15 @@ void StringConcatUpdate(FunctionContext* context, const StringVal& arg1,
const StringVal& arg2, StringVal* val) {
if (val->is_null) {
val->is_null = false;
*val = StringVal(context, arg1.len);
memcpy(val->ptr, arg1.ptr, arg1.len);
*val = StringVal::CopyFrom(context, arg1.ptr, arg1.len);
} else {
int new_len = val->len + arg1.len + arg2.len;
StringVal new_val(context, new_len);
memcpy(new_val.ptr, val->ptr, val->len);
memcpy(new_val.ptr + val->len, arg2.ptr, arg2.len);
memcpy(new_val.ptr + val->len + arg2.len, arg1.ptr, arg1.len);
if (!new_val.is_null) {
memcpy(new_val.ptr, val->ptr, val->len);
memcpy(new_val.ptr + val->len, arg2.ptr, arg2.len);
memcpy(new_val.ptr + val->len + arg2.len, arg1.ptr, arg1.len);
}
*val = new_val;
}
}

View File

@@ -50,3 +50,12 @@ select ndv(l_partkey), distinctpc(l_suppkey) from tpch.lineitem
---- CATCH
failed to allocate
====
---- QUERY
# IMPALA-5252: Verify HiveUdfCall allocations are checked.
create function replace_string(string) returns string
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
symbol='org.apache.impala.ReplaceStringUdf';
select replace_string(l_comment) from tpch.lineitem limit 10;
---- CATCH
failed to allocate
====

View File

@@ -33,14 +33,14 @@ class TestAllocFail(CustomClusterTestSuite):
def test_alloc_fail_init(self, vector):
self.run_test_case('QueryTest/alloc-fail-init', vector)
# TODO: Rewrite or remove the non-deterministic test.
@pytest.mark.xfail(run=True, reason="IMPALA-2925: the execution is not deterministic "
"so some tests sometimes don't fail as expected")
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args("--stress_free_pool_alloc=3")
def test_alloc_fail_update(self, vector):
# TODO: Rewrite or remove the non-deterministic test.
pytest.xfail("IMPALA-2925: the execution is not deterministic so some "
"tests sometimes don't fail as expected")
def test_alloc_fail_update(self, vector, unique_database):
# Note that this test relies on pre-aggregation to exercise the Serialize() path so
# query option 'num_nodes' must not be 1. CustomClusterTestSuite.add_test_dimensions()
# already filters out vectors with 'num_nodes' != 0 so just assert it here.
assert vector.get_value('exec_option')['num_nodes'] == 0
self.run_test_case('QueryTest/alloc-fail-update', vector)
self.run_test_case('QueryTest/alloc-fail-update', vector, use_db=unique_database)