Cross-compiled UDF builtins.

When codegen is enabled, UDF builtins will be loaded from the IR
module rather than using the native functions. Since we cannot run
UDFs without codegen yet this means UDF builtins can only be run this
way, but once we add support for running UDFs without codegen this
will allow us to switch back to the native functions for
development/debugging.

Change-Id: I948b113c61603801b84f80982384bbc07596f119
Reviewed-on: http://gerrit.ent.cloudera.com:8080/605
Tested-by: jenkins
Reviewed-by: Nong Li <nong@cloudera.com>
This commit is contained in:
Skye Wanderman-Milne
2013-10-04 12:08:49 -07:00
committed by Henry Robinson
parent bf139d1eba
commit 656ae8b1c8
7 changed files with 39 additions and 10 deletions

View File

@@ -51,6 +51,7 @@ set(IR_DEPENDENT_FILES
../exec/hash-join-node-ir.cc
../exec/hdfs-scanner-ir.cc
../exprs/expr-ir.cc
../exprs/udf-builtins.cc
../runtime/string-value-ir.cc
../util/hash-util-ir.cc
)

View File

@@ -23,6 +23,7 @@
#include "exec/hdfs-avro-scanner-ir.cc"
#include "exec/hdfs-scanner-ir.cc"
#include "exprs/expr-ir.cc"
#include "exprs/udf-builtins.cc"
#include "runtime/string-value-ir.cc"
#include "util/hash-util-ir.cc"
#else

View File

@@ -248,9 +248,10 @@ Status NativeUdfExpr::GetIrComputeFn(RuntimeState* state, llvm::Function** fn) {
Status NativeUdfExpr::GetUdf(RuntimeState* state, llvm::Function** udf) {
LlvmCodeGen* codegen = state->llvm_codegen();
bool codegen_disabled = (codegen == NULL);
if (udf_type_ == TFunctionBinaryType::NATIVE ||
udf_type_ == TFunctionBinaryType::BUILTIN) {
(udf_type_ == TFunctionBinaryType::BUILTIN && codegen_disabled)) {
void* udf_ptr;
if (udf_type_ == TFunctionBinaryType::NATIVE) {
RETURN_IF_ERROR(state->lib_cache()->GetFunctionPtr(
@@ -290,6 +291,14 @@ Status NativeUdfExpr::GetUdf(RuntimeState* state, llvm::Function** udf) {
// defined. This tells LLVM where the compiled function definition is located in
// memory.
codegen->execution_engine()->addGlobalMapping(*udf, udf_ptr);
} else if (udf_type_ == TFunctionBinaryType::BUILTIN && !codegen_disabled) {
const string& symbol = OpcodeRegistry::Instance()->GetFunctionSymbol(opcode_);
*udf = codegen->module()->getFunction(symbol);
if (*udf == NULL) {
stringstream ss;
ss << "Could not load builtin " << opcode_ << " with symbol: " << symbol;
return Status(ss.str());
}
} else {
DCHECK_EQ(udf_type_, TFunctionBinaryType::IR);

View File

@@ -21,7 +21,7 @@
#include "udf/udf.h"
namespace impala_udf {
class AnyVal;
struct AnyVal;
};
namespace impala {

View File

@@ -38,6 +38,14 @@ class OpcodeRegistry {
return functions_[index];
}
// Returns the function symbol for this opcode (used for loading IR functions).
const std::string& GetFunctionSymbol(TExprOpcode::type opcode) {
int index = static_cast<int>(opcode);
DCHECK_GE(index, 0);
DCHECK_LT(index, symbols_.size());
return symbols_[index];
}
// Registry is a singleton
static OpcodeRegistry* Instance() {
if (instance_ == NULL) {
@@ -56,6 +64,7 @@ class OpcodeRegistry {
OpcodeRegistry() {
int num_opcodes = static_cast<int>(TExprOpcode::LAST_OPCODE);
functions_.resize(num_opcodes);
symbols_.resize(num_opcodes);
Init();
}
@@ -63,16 +72,18 @@ class OpcodeRegistry {
// opcode-registry-init.cc which is an auto-generated file
void Init();
void Add(TExprOpcode::type opcode, void* fn) {
void Add(TExprOpcode::type opcode, void* fn, const char* symbol) {
int index = static_cast<int>(opcode);
DCHECK_LT(index, functions_.size());
DCHECK_GE(index, 0);
functions_[index] = fn;
symbols_[index] = symbol;
}
static OpcodeRegistry* instance_;
static boost::mutex instance_lock_;
std::vector<void*> functions_;
std::vector<std::string> symbols_;
};
}

View File

@@ -227,6 +227,7 @@ def add_function(fn_meta_data, udf_interface):
entry["args"] = fn_meta_data[2]
entry["be_fn"] = fn_meta_data[3]
entry["sql_names"] = fn_meta_data[4]
entry["symbol"] = fn_meta_data[5] if udf_interface else "<no symbol specified>"
entry["udf_interface"] = udf_interface
if fn_name in meta_data_entries:
@@ -272,8 +273,10 @@ def generate_be_registry_init(filename):
for entry in entries:
opcode = entry["opcode"]
be_fn = entry["be_fn"]
symbol = entry["symbol"]
# We generate two casts to work around GCC Bug 11407
cc_output = "TExprOpcode::%s, (void*)(Expr::ComputeFn)%s" % (opcode, be_fn)
cc_output = 'TExprOpcode::%s, (void*)(Expr::ComputeFn)%s, "%s"' \
% (opcode, be_fn, symbol)
cc_registry_file.write(" this->Add(%s);\n" % (cc_output))
cc_registry_file.write(cc_registry_epilogue)
@@ -334,11 +337,12 @@ for function in impala_functions.functions:
print "Invalid function entry in impala_functions.py:\n\t" + repr(function)
sys.exit(1)
add_function(function, False)
for function in impala_functions.udf_functions:
if len(function) != 5:
print "Invalid function entry in impala_functions.py:\n\t" + repr(function)
sys.exit(1)
assert len(function) == 6, \
"Invalid function entry in impala_functions.py:\n\t" + repr(function)
add_function(function, True)
for function in generated_functions.functions:
if len(function) != 5:
print "Invalid function entry in generated_functions.py:\n\t" + repr(function)

View File

@@ -366,7 +366,10 @@ functions = [
# These functions are implemented against the UDF interface.
# TODO: this list should subsume the one above when all builtins are migrated.
udf_functions = [
['Udf_Math_Pi', 'DOUBLE', [], 'UdfBuiltins::Pi', ['udf_pi']],
['Udf_Math_Abs', 'DOUBLE', ['DOUBLE'], 'UdfBuiltins::Abs', ['udf_abs']],
['Udf_String_Lower', 'STRING', ['STRING'], 'UdfBuiltins::Lower', ['udf_lower']],
['Udf_Math_Pi', 'DOUBLE', [], 'UdfBuiltins::Pi', ['udf_pi'],
'_ZN6impala11UdfBuiltins2PiEPN10impala_udf15FunctionContextE'],
['Udf_Math_Abs', 'DOUBLE', ['DOUBLE'], 'UdfBuiltins::Abs', ['udf_abs'],
'_ZN6impala11UdfBuiltins3AbsEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
['Udf_String_Lower', 'STRING', ['STRING'], 'UdfBuiltins::Lower', ['udf_lower'],
'_ZN6impala11UdfBuiltins5LowerEPN10impala_udf15FunctionContextERKNS1_9StringValE'],
]