IMPALA-10691: Fix multithreading related crash in CAST FORMAT

The issue occurs when a CastFormatExpr is shared among threads and
multiple threads call its OpenEvaluator(). Later calls delete the
DateTimeFormatContext created by older calls which makes
fn_ctx->GetFunctionState() a dangling pointer.

This only happens when CastFormatExpr is shared among
FragmentInstances - in case of scanner threads OpenEvaluator() is
called with THREAD_LOCAL and returns early without modifying anything.

Testing:
- added a regression test

Change-Id: I501c8a184591b1c836b2ca4cada1f2117f9f5c99
Reviewed-on: http://gerrit.cloudera.org:8080/17374
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Csaba Ringhofer
2021-04-30 16:07:37 +02:00
committed by Impala Public Jenkins
parent 87384e3377
commit 676f79aa81
3 changed files with 23 additions and 9 deletions

View File

@@ -34,18 +34,19 @@ Status CastFormatExpr::OpenEvaluator(FunctionContext::FunctionStateScope scope,
DCHECK_GE(fn_ctx_idx_, 0);
FunctionContext* fn_ctx = eval->fn_context(fn_ctx_idx_);
dt_ctx_ = std::make_unique<DateTimeFormatContext>(format_.c_str(), format_.length());
std::unique_ptr<DateTimeFormatContext> dt_ctx =
std::make_unique<DateTimeFormatContext>(format_.c_str(), format_.length());
bool accept_time_toks = (fn_ctx->GetReturnType().type != FunctionContext::TYPE_DATE &&
fn_ctx->GetArgType(0)->type != FunctionContext::TYPE_DATE);
IsoSqlFormatTokenizer format_tokenizer(dt_ctx_.get(), GetCastMode(fn_ctx),
IsoSqlFormatTokenizer format_tokenizer(dt_ctx.get(), GetCastMode(fn_ctx),
accept_time_toks);
FormatTokenizationResult parse_result = format_tokenizer.Tokenize();
if (parse_result != FormatTokenizationResult::SUCCESS) {
ReportBadFormat(fn_ctx, parse_result, StringVal(format_.c_str()), true);
return Status(TErrorCode::INTERNAL_ERROR, fn_ctx->error_msg());
}
dt_ctx_->SetCenturyBreakAndCurrentTime(*fn_ctx->impl()->state()->now());
fn_ctx->SetFunctionState(scope, dt_ctx_.get());
dt_ctx->SetCenturyBreakAndCurrentTime(*fn_ctx->impl()->state()->now());
fn_ctx->SetFunctionState(scope, dt_ctx.release());
return Status::OK();
}
@@ -56,7 +57,12 @@ void CastFormatExpr::CloseEvaluator(FunctionContext::FunctionStateScope scope,
if (scope == FunctionContext::FRAGMENT_LOCAL) {
DCHECK_GE(fn_ctx_idx_, 0);
FunctionContext* fn_ctx = eval->fn_context(fn_ctx_idx_);
fn_ctx->SetFunctionState(scope, nullptr);
DateTimeFormatContext* dt_ctx =
(DateTimeFormatContext*) fn_ctx->GetFunctionState(scope);
if (dt_ctx != nullptr) {
delete dt_ctx;
fn_ctx->SetFunctionState(scope, nullptr);
}
}
ScalarFnCall::CloseEvaluator(scope, state, eval);
}

View File

@@ -38,10 +38,6 @@ public:
ScalarExprEvaluator* eval) const override;
private:
std::string format_;
/// Keeps track of the DateTimeFormatContext put into FunctionContext in
/// OpenEvaluator().
/// This is empty in case of FunctionContext::THREAD_LOCAL.
mutable std::unique_ptr<datetime_parse_util::DateTimeFormatContext> dt_ctx_;
datetime_parse_util::CastDirection GetCastMode(const FunctionContext* ctx) const;
};

View File

@@ -107,3 +107,15 @@ select cast("9999-12-31" as timestamp FORMAT "YYYY-MM-DD");
---- TYPES
timestamp
====
---- QUERY
# Regression test for IMPALA-10691.
set mt_dop=2;
set max_scan_range_length=16384;
select count(*) from alltypes a1, alltypes a2
where cast(cast(a1.date_string_col as date format 'MM/dd/yy') as string format 'MM/dd/yy') =
cast(cast(a2.date_string_col as date format 'MM/dd/yy') as string format 'MM/dd/yy');
---- RESULTS
73000
---- TYPES
bigint
====