From fa774bfb85bd7560c24d6efaae46da502c910f9b Mon Sep 17 00:00:00 2001 From: Nong Li Date: Thu, 13 Nov 2014 11:06:09 -0800 Subject: [PATCH] IMPALA-1392: Fix crash from UDFs that throw exceptions. Change-Id: Ic8775d6344aba9655511f99c0a1760e8e148d0cf Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5243 Reviewed-by: Nong Li Tested-by: jenkins --- be/src/exprs/hive-udf-call.cc | 116 ++++++++++-------- be/src/exprs/hive-udf-call.h | 3 + .../queries/QueryTest/hive-udf.test | 22 ++++ .../queries/QueryTest/load-hive-udfs.test | 4 + .../com/cloudera/impala/TestUdfException.java | 29 +++++ 5 files changed, 121 insertions(+), 53 deletions(-) create mode 100644 tests/test-hive-udfs/src/main/java/com/cloudera/impala/TestUdfException.java diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc index 391b139e1..731a2a8f2 100644 --- a/be/src/exprs/hive-udf-call.cc +++ b/be/src/exprs/hive-udf-call.cc @@ -40,23 +40,25 @@ const char* EXECUTOR_CLOSE_SIGNATURE = "()V"; namespace impala { struct JniContext { - jclass class_; - jobject executor_; - jmethodID evaluate_id_; - jmethodID close_id_; + jclass cl; + jobject executor; + jmethodID evalute_id; + jmethodID close_id; - uint8_t* input_values_buffer_; - uint8_t* input_nulls_buffer_; - uint8_t* output_value_buffer_; - uint8_t output_null_value_; + uint8_t* input_values_buffer; + uint8_t* input_nulls_buffer; + uint8_t* output_value_buffer; + uint8_t output_null_value; + bool warning_logged; - AnyVal* output_anyval_; + AnyVal* output_anyval; JniContext() { - executor_ = NULL; - input_values_buffer_ = NULL; - input_nulls_buffer_ = NULL; - output_value_buffer_ = NULL; + executor = NULL; + input_values_buffer = NULL; + input_nulls_buffer = NULL; + output_value_buffer = NULL; + warning_logged = false; } }; @@ -75,19 +77,23 @@ AnyVal* HiveUdfCall::Evaluate(ExprContext* ctx, TupleRow* row) { JNIEnv* env = getJNIEnv(); if (env == NULL) { - fn_ctx->AddWarning("Could not get JNIEnv."); - return NULL; + stringstream ss; + ss << "Hive UDF path=" << fn_.hdfs_location << " class=" << fn_.scalar_fn.symbol + << " failed due to JNI issue getting the JNIEnv object"; + fn_ctx->SetError(ss.str().c_str()); + jni_ctx->output_anyval->is_null = true; + return jni_ctx->output_anyval; } - // Evaluate all the children values and put the results in input_values_buffer_ + // Evaluate all the children values and put the results in input_values_buffer for (int i = 0; i < GetNumChildren(); ++i) { void* v = ctx->GetValue(GetChild(i), row); if (v == NULL) { - jni_ctx->input_nulls_buffer_[i] = 1; + jni_ctx->input_nulls_buffer[i] = 1; } else { - uint8_t* input_ptr = jni_ctx->input_values_buffer_ + input_byte_offsets_[i]; - jni_ctx->input_nulls_buffer_[i] = 0; + uint8_t* input_ptr = jni_ctx->input_values_buffer + input_byte_offsets_[i]; + jni_ctx->input_nulls_buffer[i] = 0; switch (GetChild(i)->type().type) { case TYPE_BOOLEAN: case TYPE_TINYINT: @@ -119,23 +125,27 @@ AnyVal* HiveUdfCall::Evaluate(ExprContext* ctx, TupleRow* row) { // Using this version of Call has the lowest overhead. This eliminates the // vtable lookup and setting up return stacks. env->CallNonvirtualVoidMethodA( - jni_ctx->executor_, jni_ctx->class_, jni_ctx->evaluate_id_, NULL); + jni_ctx->executor, jni_ctx->cl, jni_ctx->evalute_id, NULL); Status status = JniUtil::GetJniExceptionMsg(env); if (!status.ok()) { - stringstream ss; - ss << "Hive UDF path=" << fn_.hdfs_location << " class=" << fn_.scalar_fn.symbol - << " failed due to: " << status.GetErrorMsg(); - fn_ctx->AddWarning(ss.str().c_str()); - return NULL; + if (!jni_ctx->warning_logged) { + stringstream ss; + ss << "Hive UDF path=" << fn_.hdfs_location << " class=" << fn_.scalar_fn.symbol + << " failed due to: " << status.GetErrorMsg(); + fn_ctx->AddWarning(ss.str().c_str()); + jni_ctx->warning_logged = true; + } + jni_ctx->output_anyval->is_null = true; + return jni_ctx->output_anyval; } - // Write output_value_buffer_ to output_anyval_ - if (jni_ctx->output_null_value_) { - jni_ctx->output_anyval_->is_null = true; + // Write output_value_buffer to output_anyval + if (jni_ctx->output_null_value) { + jni_ctx->output_anyval->is_null = true; } else { - AnyValUtil::SetAnyVal(jni_ctx->output_value_buffer_, type(), jni_ctx->output_anyval_); + AnyValUtil::SetAnyVal(jni_ctx->output_value_buffer, type(), jni_ctx->output_anyval); } - return jni_ctx->output_anyval_; + return jni_ctx->output_anyval; } Status HiveUdfCall::Prepare(RuntimeState* state, const RowDescriptor& row_desc, @@ -173,15 +183,15 @@ Status HiveUdfCall::Open(RuntimeState* state, ExprContext* ctx, JNIEnv* env = getJNIEnv(); if (env == NULL) return Status("Failed to get/create JVM"); - RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, EXECUTOR_CLASS, &jni_ctx->class_)); + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, EXECUTOR_CLASS, &jni_ctx->cl)); jmethodID executor_ctor = env->GetMethodID( - jni_ctx->class_, "", EXECUTOR_CTOR_SIGNATURE); + jni_ctx->cl, "", EXECUTOR_CTOR_SIGNATURE); RETURN_ERROR_IF_EXC(env); - jni_ctx->evaluate_id_ = env->GetMethodID( - jni_ctx->class_, "evaluate", EXECUTOR_EVALUATE_SIGNATURE); + jni_ctx->evalute_id = env->GetMethodID( + jni_ctx->cl, "evaluate", EXECUTOR_EVALUATE_SIGNATURE); RETURN_ERROR_IF_EXC(env); - jni_ctx->close_id_ = env->GetMethodID( - jni_ctx->class_, "close", EXECUTOR_CLOSE_SIGNATURE); + jni_ctx->close_id = env->GetMethodID( + jni_ctx->cl, "close", EXECUTOR_CLOSE_SIGNATURE); RETURN_ERROR_IF_EXC(env); THiveUdfExecutorCtorParams ctor_params; @@ -189,14 +199,14 @@ Status HiveUdfCall::Open(RuntimeState* state, ExprContext* ctx, ctor_params.local_location = local_location_; ctor_params.input_byte_offsets = input_byte_offsets_; - jni_ctx->input_values_buffer_ = new uint8_t[input_buffer_size_]; - jni_ctx->input_nulls_buffer_ = new uint8_t[GetNumChildren()]; - jni_ctx->output_value_buffer_ = new uint8_t[type().GetSlotSize()]; + jni_ctx->input_values_buffer = new uint8_t[input_buffer_size_]; + jni_ctx->input_nulls_buffer = new uint8_t[GetNumChildren()]; + jni_ctx->output_value_buffer = new uint8_t[type().GetSlotSize()]; - ctor_params.input_buffer_ptr = (int64_t)jni_ctx->input_values_buffer_; - ctor_params.input_nulls_ptr = (int64_t)jni_ctx->input_nulls_buffer_; - ctor_params.output_buffer_ptr = (int64_t)jni_ctx->output_value_buffer_; - ctor_params.output_null_ptr = (int64_t)&jni_ctx->output_null_value_; + ctor_params.input_buffer_ptr = (int64_t)jni_ctx->input_values_buffer; + ctor_params.input_nulls_ptr = (int64_t)jni_ctx->input_nulls_buffer; + ctor_params.output_buffer_ptr = (int64_t)jni_ctx->output_value_buffer; + ctor_params.output_null_ptr = (int64_t)&jni_ctx->output_null_value; jbyteArray ctor_params_bytes; @@ -207,12 +217,12 @@ Status HiveUdfCall::Open(RuntimeState* state, ExprContext* ctx, RETURN_IF_ERROR(SerializeThriftMsg(env, &ctor_params, &ctor_params_bytes)); // Create the java executor object - jni_ctx->executor_ = env->NewObject(jni_ctx->class_, + jni_ctx->executor = env->NewObject(jni_ctx->cl, executor_ctor, ctor_params_bytes); RETURN_ERROR_IF_EXC(env); - jni_ctx->executor_ = env->NewGlobalRef(jni_ctx->executor_); + jni_ctx->executor = env->NewGlobalRef(jni_ctx->executor); - jni_ctx->output_anyval_ = CreateAnyVal(type_); + jni_ctx->output_anyval = CreateAnyVal(type_); return Status::OK; } @@ -225,19 +235,19 @@ void HiveUdfCall::Close(RuntimeState* state, ExprContext* ctx, if (jni_ctx != NULL) { JNIEnv* env = getJNIEnv(); - if (jni_ctx->executor_ != NULL) { + if (jni_ctx->executor != NULL) { env->CallNonvirtualVoidMethodA( - jni_ctx->executor_, jni_ctx->class_, jni_ctx->close_id_, NULL); - env->DeleteGlobalRef(jni_ctx->executor_); + jni_ctx->executor, jni_ctx->cl, jni_ctx->close_id, NULL); + env->DeleteGlobalRef(jni_ctx->executor); // Clear any exceptions. Not much we can do about them here. Status status = JniUtil::GetJniExceptionMsg(env); if (!status.ok()) VLOG_QUERY << status.GetErrorMsg(); } - delete[] jni_ctx->input_values_buffer_; - delete[] jni_ctx->input_nulls_buffer_; - delete[] jni_ctx->output_value_buffer_; + delete[] jni_ctx->input_values_buffer; + delete[] jni_ctx->input_nulls_buffer; + delete[] jni_ctx->output_value_buffer; - delete jni_ctx->output_anyval_; + delete jni_ctx->output_anyval; } else { DCHECK(!ctx->opened_); } diff --git a/be/src/exprs/hive-udf-call.h b/be/src/exprs/hive-udf-call.h index 2d2de012b..ba4fb048e 100644 --- a/be/src/exprs/hive-udf-call.h +++ b/be/src/exprs/hive-udf-call.h @@ -84,6 +84,9 @@ class HiveUdfCall : public Expr { virtual std::string DebugString() const; private: + // Evalutes the UDF over row. Returns the result as an AnyVal. This function + // never returns NULL but rather an AnyVal object with is_null set to true on + // error. AnyVal* Evaluate(ExprContext* ctx, TupleRow* row); // The path on the local FS to the UDF's jar diff --git a/testdata/workloads/functional-query/queries/QueryTest/hive-udf.test b/testdata/workloads/functional-query/queries/QueryTest/hive-udf.test index 83d959276..af4f7c03c 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/hive-udf.test +++ b/testdata/workloads/functional-query/queries/QueryTest/hive-udf.test @@ -99,3 +99,25 @@ int, int, int ---- RESULTS 10,20,30 ==== +---- QUERY +# IMPALA-1392: Hive UDFs that throw exceptions should return NULL +select udf_test.throws_exception(); +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select udf_test.throws_exception() from functional.alltypestiny; +---- TYPES +boolean +---- RESULTS +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/load-hive-udfs.test b/testdata/workloads/functional-query/queries/QueryTest/load-hive-udfs.test index fdc6269fe..4ea2b4c4e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/load-hive-udfs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/load-hive-udfs.test @@ -84,4 +84,8 @@ symbol='com.cloudera.impala.TestUdf'; create function udf_test.identity(string, string, string) returns string location '/test-warehouse/impala-hive-udfs.jar' symbol='com.cloudera.impala.TestUdf'; + +create function udf_test.throws_exception() returns boolean +location '/test-warehouse/impala-hive-udfs.jar' +symbol='com.cloudera.impala.TestUdfException'; ==== diff --git a/tests/test-hive-udfs/src/main/java/com/cloudera/impala/TestUdfException.java b/tests/test-hive-udfs/src/main/java/com/cloudera/impala/TestUdfException.java new file mode 100644 index 000000000..f09c71c8f --- /dev/null +++ b/tests/test-hive-udfs/src/main/java/com/cloudera/impala/TestUdfException.java @@ -0,0 +1,29 @@ +// Copyright 2012 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.cloudera.impala; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.BooleanWritable; + +/** + * Simple UDFs that always throws an exception. + */ +public class TestUdfException extends UDF { + + // Identity UDFs for all the supported types + public BooleanWritable evaluate() { + throw new NullPointerException("Test exception"); + } +}