IMPALA-5031: Allow UBSAN to be set on codegen

This adds a new buildall options, -full_ubsan, that is stronger than
just -ubsan, in that it forces code generated by cross compilation to
LLVM IR to use the undefined behavior sanitizer as well. Because this
slows down testing significantly, it is not made the default when
using -ubsan.

Change-Id: I054e78dd172ee140f2095a523595ff030494e560
Reviewed-on: http://gerrit.cloudera.org:8080/11380
Reviewed-by: Jim Apple <jbapple-impala@apache.org>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Jim Apple
2017-03-11 19:47:48 -08:00
committed by Impala Public Jenkins
parent b8d68cad00
commit 2a274a1597
13 changed files with 76 additions and 42 deletions

View File

@@ -233,6 +233,13 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "ADDRESS_SANITIZER")
SET(CLANG_IR_CXX_FLAGS "${CLANG_IR_CXX_FLAGS}" "-DADDRESS_SANITIZER")
endif()
if (UBSAN_CODEGEN)
set(CLANG_IR_CXX_FLAGS "${CLANG_IR_CXX_FLAGS}" "-DUNDEFINED_SANITIZER"
"-fno-omit-frame-pointer" "-fsanitize=undefined" "-fno-wrapv" "-ggdb3"
"-fno-sanitize=alignment,function,vptr,float-divide-by-zero,float-cast-overflow"
"-DUNDEFINED_SANITIZER_SUPPRESSIONS=\\\"$ENV{IMPALA_HOME}/bin/ubsan-suppressions.txt\\\"")
endif()
IF($ENV{ENABLE_IMPALA_IR_DEBUG_INFO} STREQUAL "true")
# -g: emit debug symbols in IR. These increase IR size and memory overhead of LLVM, but
# are useful for debugging codegened code and interpreting codegen disassembly

View File

@@ -70,6 +70,17 @@ namespace llvm {
class IRBuilderDefaultInserter;
}
// The number of function calls replaced is not knowable when UBSAN is enabled, since it
// can double the number of references to a function. To fix, we replaced
// "DCHECK_EQ(replaced" with "DCHECK_REPLACE_COUNT(replaced":
//
// find be/src -type f -execdir sed -i s/DCHECK_EQ\(replaced,\ /DCHECK_REPLACE_COUNT\(replaced,\ /g {} \;
#if defined(UNDEFINED_SANITIZER)
#define DCHECK_REPLACE_COUNT(p, q) DCHECK_GE(p, q); DCHECK_LE(p, 2*(q))
#else
#define DCHECK_REPLACE_COUNT(p, q) DCHECK_EQ(p, q)
#endif
namespace impala {
class CodegenCallGraph;

View File

@@ -1000,13 +1000,13 @@ Status GroupingAggregator::CodegenAddBatchImpl(
// Replace call sites
replaced =
codegen->ReplaceCallSites(add_batch_impl_fn, eval_grouping_expr_fn, "EvalProbeRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(add_batch_impl_fn, hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(add_batch_impl_fn, build_equals_fn, "Equals");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
HashTableCtx::HashTableReplacedConstants replaced_constants;
const bool stores_duplicates = false;
@@ -1065,17 +1065,17 @@ Status GroupingAggregator::CodegenAddBatchStreamingImpl(
// Replace call sites
int replaced = codegen->ReplaceCallSites(
add_batch_streaming_impl_fn, update_tuple_fn, "UpdateTuple");
DCHECK_EQ(replaced, 2);
DCHECK_REPLACE_COUNT(replaced, 2);
replaced = codegen->ReplaceCallSites(
add_batch_streaming_impl_fn, eval_grouping_expr_fn, "EvalProbeRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(add_batch_streaming_impl_fn, hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(add_batch_streaming_impl_fn, equals_fn, "Equals");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
HashTableCtx::HashTableReplacedConstants replaced_constants;
const bool stores_duplicates = false;

View File

@@ -1090,27 +1090,27 @@ Status HdfsAvroScanner::CodegenDecodeAvroData(const HdfsScanNodeBase* node,
llvm::Function* init_tuple_fn;
RETURN_IF_ERROR(CodegenInitTuple(node, codegen, &init_tuple_fn));
int replaced = codegen->ReplaceCallSites(fn, init_tuple_fn, "InitTuple");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(fn, materialize_tuple_fn, "MaterializeTuple");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
llvm::Function* eval_conjuncts_fn;
RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(codegen, conjuncts, &eval_conjuncts_fn));
replaced = codegen->ReplaceCallSites(fn, eval_conjuncts_fn, "EvalConjuncts");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
llvm::Function* copy_strings_fn;
RETURN_IF_ERROR(Tuple::CodegenCopyStrings(
codegen, *node->tuple_desc(), &copy_strings_fn));
replaced = codegen->ReplaceCallSites(fn, copy_strings_fn, "CopyStrings");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
int tuple_byte_size = node->tuple_desc()->byte_size();
replaced = codegen->ReplaceCallSitesWithValue(fn,
codegen->GetI32Constant(tuple_byte_size), "tuple_byte_size");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
fn->setName("DecodeAvroData");
*decode_avro_data_fn = codegen->FinalizeFunction(fn);

View File

@@ -1033,7 +1033,7 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
DCHECK(eval_conjuncts_fn != nullptr);
int replaced = codegen->ReplaceCallSites(fn, eval_conjuncts_fn, "EvalConjuncts");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
llvm::Function* eval_runtime_filters_fn;
RETURN_IF_ERROR(CodegenEvalRuntimeFilters(
@@ -1041,7 +1041,7 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
DCHECK(eval_runtime_filters_fn != nullptr);
replaced = codegen->ReplaceCallSites(fn, eval_runtime_filters_fn, "EvalRuntimeFilters");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
fn->setName("ProcessScratchBatch");
*process_scratch_batch_fn = codegen->FinalizeFunction(fn);

View File

@@ -539,19 +539,19 @@ Status HdfsScanner::CodegenWriteAlignedTuples(const HdfsScanNodeBase* node,
int replaced = codegen->ReplaceCallSites(write_tuples_fn, write_complete_tuple_fn,
"WriteCompleteTuple");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
llvm::Function* copy_strings_fn;
RETURN_IF_ERROR(Tuple::CodegenCopyStrings(
codegen, *node->tuple_desc(), &copy_strings_fn));
replaced = codegen->ReplaceCallSites(
write_tuples_fn, copy_strings_fn, "CopyStrings");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
int tuple_byte_size = node->tuple_desc()->byte_size();
replaced = codegen->ReplaceCallSitesWithValue(write_tuples_fn,
codegen->GetI32Constant(tuple_byte_size), "tuple_byte_size");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
*write_aligned_tuples_fn = codegen->FinalizeFunction(write_tuples_fn);
if (*write_aligned_tuples_fn == NULL) {
@@ -568,21 +568,21 @@ Status HdfsScanner::CodegenInitTuple(
// Replace all of the constants in InitTuple() to specialize the code.
int replaced = codegen->ReplaceCallSitesWithBoolConst(
*init_tuple_fn, node->num_materialized_partition_keys() > 0, "has_template_tuple");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
const TupleDescriptor* tuple_desc = node->tuple_desc();
replaced = codegen->ReplaceCallSitesWithValue(*init_tuple_fn,
codegen->GetI32Constant(tuple_desc->byte_size()), "tuple_byte_size");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSitesWithValue(*init_tuple_fn,
codegen->GetI32Constant(tuple_desc->null_bytes_offset()),
"null_bytes_offset");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSitesWithValue(*init_tuple_fn,
codegen->GetI32Constant(tuple_desc->num_null_bytes()), "num_null_bytes");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
*init_tuple_fn = codegen->FinalizeFunction(*init_tuple_fn);
if (*init_tuple_fn == nullptr) {
@@ -651,7 +651,7 @@ Status HdfsScanner::CodegenEvalRuntimeFilters(
int replaced = codegen->ReplaceCallSites(eval_runtime_filter_fn, eval_one_filter_fn,
"FilterContext4Eval");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
llvm::Value* idx = codegen->GetI32Constant(i);
llvm::Value* passed_filter = builder.CreateCall(

View File

@@ -796,11 +796,11 @@ Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen, llvm::Function
// Replace call sites
int replaced =
codegen->ReplaceCallSites(process_build_batch_fn, eval_row_fn, "EvalBuildRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(
process_build_batch_fn, insert_filters_fn, "InsertRuntimeFilters");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
// Replace some hash table parameters with constants.
HashTableCtx::HashTableReplacedConstants replaced_constants;
@@ -831,12 +831,12 @@ Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen, llvm::Function
// process_build_batch_fn_level0 uses CRC hash if available,
replaced =
codegen->ReplaceCallSites(process_build_batch_fn_level0, hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
// process_build_batch_fn uses murmur
replaced =
codegen->ReplaceCallSites(process_build_batch_fn, murmur_hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
// Never build filters after repartitioning, as all rows have already been added to the
// filters during the level0 build. Note that the first argument of this function is the
@@ -883,11 +883,11 @@ Status PhjBuilder::CodegenInsertBatch(LlvmCodeGen* codegen, llvm::Function* hash
// Use codegen'd EvalBuildRow() function
int replaced = codegen->ReplaceCallSites(insert_batch_fn, eval_row_fn, "EvalBuildRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
// Use codegen'd Equals() function
replaced = codegen->ReplaceCallSites(insert_batch_fn, build_equals_fn, "Equals");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
// Replace hash-table parameters with constants.
HashTableCtx::HashTableReplacedConstants replaced_constants;
@@ -905,9 +905,9 @@ Status PhjBuilder::CodegenInsertBatch(LlvmCodeGen* codegen, llvm::Function* hash
// Use codegen'd hash functions
replaced = codegen->ReplaceCallSites(insert_batch_fn_level0, hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(insert_batch_fn, murmur_hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn);
if (insert_batch_fn == NULL) {

View File

@@ -1451,7 +1451,7 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
// Replace all call sites with codegen version
int replaced = codegen->ReplaceCallSites(process_probe_batch_fn, eval_row_fn,
"EvalProbeRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(process_probe_batch_fn, create_output_row_fn,
"CreateOutputRow");
@@ -1466,16 +1466,16 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
case TJoinOp::LEFT_SEMI_JOIN:
case TJoinOp::RIGHT_OUTER_JOIN:
case TJoinOp::RIGHT_SEMI_JOIN:
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
break;
case TJoinOp::LEFT_OUTER_JOIN:
case TJoinOp::FULL_OUTER_JOIN:
DCHECK_EQ(replaced, 2);
DCHECK_REPLACE_COUNT(replaced, 2);
break;
case TJoinOp::LEFT_ANTI_JOIN:
case TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN:
case TJoinOp::RIGHT_ANTI_JOIN:
DCHECK_EQ(replaced, 0);
DCHECK_REPLACE_COUNT(replaced, 0);
break;
default:
DCHECK(false);
@@ -1483,7 +1483,7 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
replaced = codegen->ReplaceCallSites(process_probe_batch_fn, eval_other_conjuncts_fn,
"EvalOtherJoinConjuncts");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(process_probe_batch_fn, probe_equals_fn, "Equals");
// Depends on join_op_
@@ -1508,10 +1508,10 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
// process_probe_batch_fn_level0 uses CRC hash if available,
// process_probe_batch_fn uses murmur
replaced = codegen->ReplaceCallSites(process_probe_batch_fn_level0, hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
replaced = codegen->ReplaceCallSites(process_probe_batch_fn, murmur_hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
// Finalize ProcessProbeBatch functions
process_probe_batch_fn = codegen->FinalizeFunction(process_probe_batch_fn);

View File

@@ -65,7 +65,7 @@ Status SelectNode::CodegenCopyRows(RuntimeState* state) {
int replaced = codegen->ReplaceCallSites(copy_rows_fn, eval_conjuncts_fn,
"EvalConjuncts");
DCHECK_EQ(replaced, 1);
DCHECK_REPLACE_COUNT(replaced, 1);
copy_rows_fn = codegen->FinalizeFunction(copy_rows_fn);
if (copy_rows_fn == nullptr) return Status("Failed to finalize CopyRows().");
codegen->AddFunctionToJit(copy_rows_fn,

View File

@@ -118,11 +118,11 @@ void TopNNode::Codegen(RuntimeState* state) {
if (codegen_status.ok()) {
int replaced = codegen->ReplaceCallSites(insert_batch_fn,
materialize_exprs_tuple_pool_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL);
DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
DCHECK_REPLACE_COUNT(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
replaced = codegen->ReplaceCallSites(insert_batch_fn,
materialize_exprs_no_pool_fn, Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL);
DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
DCHECK_REPLACE_COUNT(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn);
DCHECK(insert_batch_fn != NULL);

View File

@@ -123,7 +123,7 @@ void UnionNode::Codegen(RuntimeState* state) {
int replaced = codegen->ReplaceCallSites(union_materialize_batch_fn,
tuple_materialize_exprs_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL);
DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(union_materialize_batch_fn);
DCHECK_REPLACE_COUNT(replaced, 1) << LlvmCodeGen::Print(union_materialize_batch_fn);
union_materialize_batch_fn = codegen->FinalizeFunction(
union_materialize_batch_fn);

View File

@@ -32,6 +32,7 @@ CLEAN=0
TARGET_BUILD_TYPE=${TARGET_BUILD_TYPE:-""}
BUILD_SHARED_LIBS=${BUILD_SHARED_LIBS:-""}
CMAKE_ONLY=0
UBSAN_CODEGEN=0
MAKE_CMD=make
MAKE_ARGS="-j${IMPALA_BUILD_THREADS:-4} ${IMPALA_MAKE_FLAGS}"
@@ -80,6 +81,9 @@ do
-tarballs)
MAKE_TARGETS+=" tarballs"
;;
-ubsan_codegen)
UBSAN_CODEGEN=1
;;
-help|*)
echo "make_impala.sh [-build_type=<build type> -notests -clean]"
echo "[-build_type] : Target build type. Examples: Debug, Release, Address_sanitizer."
@@ -95,6 +99,8 @@ do
echo "[-cscope] : Builds cscope metadata."
echo "[-impala-lzo] : Builds Impala LZO."
echo "[-tarballs] : Builds additional tarballs like the shell tarball."
echo "[-ubsan_codegen] : apply undefined behavior sanitizer to code generated by "\
echo " cross-compilation to LLVM IT."
echo ""
echo "If either -build_type or -build_*_libs is set, cmake will be re-run for the "
echo "project. Otherwise the last cmake configuration will continue to take effect."
@@ -138,6 +144,9 @@ then
CMAKE_ARGS+=(-DCMAKE_BUILD_TYPE=${TARGET_BUILD_TYPE})
fi
if [ $UBSAN_CODEGEN -eq 1 ]; then
CMAKE_ARGS+=(-DUBSAN_CODEGEN=1)
fi
if [ "x${BUILD_SHARED_LIBS}" != "x" ]; then
CMAKE_ARGS+=(-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS})
fi

View File

@@ -125,6 +125,10 @@ do
-ubsan)
BUILD_UBSAN=1
;;
-full_ubsan)
BUILD_UBSAN=1
MAKE_IMPALA_ARGS="${MAKE_IMPALA_ARGS} -ubsan_codegen"
;;
-tsan)
BUILD_TSAN=1
;;
@@ -199,7 +203,10 @@ do
echo "[-codecoverage] : Build with code coverage [Default: False]"
echo "[-asan] : Address sanitizer build [Default: False]"
echo "[-tidy] : clang-tidy build [Default: False]"
echo "[-ubsan] : Undefined behavior build [Default: False]"
echo "[-ubsan] : Undefined behavior sanitizer build [Default: False]"
echo "[-full_ubsan] : Undefined behavior sanitizer build, including code generated"\
" by cross-compilation to LLVM IR. Much slower queries than plain -ubsan "\
" [Default: False]"
echo "[-skiptests] : Skips execution of all tests"
echo "[-notests] : Skips building and execution of all tests"
echo "[-start_minicluster] : Start test cluster including Impala and all"\