diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index c0360a710..50bbf7f13 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -2794,11 +2794,31 @@ TEST_F(ExprTest, TimestampFunctions) { TestStringValue( "cast(trunc(cast('2012-09-10 07:59:59' as timestamp), 'MI') as string)", "2012-09-10 07:59:00"); + TestNonOkStatus("cast(trunc(cast('2012-09-10 07:59:59' as timestamp), 'MIN') as string)"); + TestNonOkStatus("cast(trunc(cast('2012-09-10 07:59:59' as timestamp), 'XXYYZZ') as string)"); - TestIsNull("cast(trunc(cast('2012-09-10 07:59:59' as timestamp), 'MIN') as string)", - TYPE_STRING); - TestIsNull("cast(trunc(cast('2012-09-10 07:59:59' as timestamp), 'XXYYZZ') as string)", - TYPE_STRING); + TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'YEAR')", + TYPE_INT, 2006); + TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'MoNTH')", + TYPE_INT, 5); + TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'DaY')", + TYPE_INT, 12); + TestValue("extract(cast('2006-05-12 06:27:28.12345' as timestamp), 'hour')", + TYPE_INT, 6); + TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'MINUTE')", + TYPE_INT, 27); + TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'SECOND')", + TYPE_INT, 28); + TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'MILLISECOND')", + TYPE_INT, 123); + TestValue("extract(cast('2006-05-13 01:27:28.12345' as timestamp), 'EPOCH')", + TYPE_INT, 1147483648); + TestValue("extract(cast('2006-05-13 01:27:28.12345' as timestamp), 'EPOCH')", + TYPE_INT, 1147483648); + TestNonOkStatus("extract(cast('2006-05-13 01:27:28.12345' as timestamp), 'foo')"); + TestNonOkStatus("extract(cast('2006-05-13 01:27:28.12345' as timestamp), NULL)"); + TestIsNull("extract(NULL, 'EPOCH')", TYPE_INT); + TestNonOkStatus("extract(NULL, NULL)"); } TEST_F(ExprTest, ConditionalFunctions) { diff --git a/be/src/exprs/udf-builtins.cc b/be/src/exprs/udf-builtins.cc index aafa78437..c798a3167 100644 --- a/be/src/exprs/udf-builtins.cc +++ b/be/src/exprs/udf-builtins.cc @@ -48,6 +48,7 @@ StringVal UdfBuiltins::Lower(FunctionContext* context, const StringVal& v) { // The units which can be used when Truncating a Timestamp struct TruncUnit { enum Type { + UNIT_INVALID, YEAR, QUARTER, MONTH, @@ -56,33 +57,32 @@ struct TruncUnit { DAY, DAY_OF_WEEK, HOUR, - MINUTE, - UNIT_INVALID + MINUTE }; }; // Maps the user facing name of a unit to a TruncUnit -// Returns the TruncUnit via parameter trunc_unit -// Returns true if unit is a known unit, else false -TruncUnit::Type StrToTruncUnit(const StringVal& unit) { - if ((unit == "SYYYY") || (unit == "YYYY") || (unit == "YEAR") || (unit == "SYEAR") || - (unit == "YYY") || (unit == "YY") || (unit == "Y")) { +// Returns the TruncUnit for the given string +TruncUnit::Type StrToTruncUnit(FunctionContext* ctx, const StringVal& unit_str) { + StringVal unit = UdfBuiltins::Lower(ctx, unit_str); + if ((unit == "syyyy") || (unit == "yyyy") || (unit == "year") || (unit == "syear") || + (unit == "yyy") || (unit == "yy") || (unit == "y")) { return TruncUnit::YEAR; - } else if (unit == "Q") { + } else if (unit == "q") { return TruncUnit::QUARTER; - } else if ((unit == "MONTH") || (unit == "MON") || (unit == "MM") || (unit == "RM")) { + } else if ((unit == "month") || (unit == "mon") || (unit == "mm") || (unit == "rm")) { return TruncUnit::MONTH; - } else if (unit == "WW") { + } else if (unit == "ww") { return TruncUnit::WW; - } else if (unit == "W") { + } else if (unit == "w") { return TruncUnit::W; - } else if ((unit == "DDD") || (unit == "DD") || (unit == "J")) { + } else if ((unit == "ddd") || (unit == "dd") || (unit == "j")) { return TruncUnit::DAY; - } else if ((unit == "DAY") || (unit == "DY") || (unit == "D")) { + } else if ((unit == "day") || (unit == "dy") || (unit == "d")) { return TruncUnit::DAY_OF_WEEK; - } else if ((unit == "HH") || (unit == "HH12") || (unit == "HH24")) { + } else if ((unit == "hh") || (unit == "hh12") || (unit == "hh24")) { return TruncUnit::HOUR; - } else if (unit == "MI") { + } else if (unit == "mi") { return TruncUnit::MINUTE; } else { return TruncUnit::UNIT_INVALID; @@ -181,7 +181,7 @@ TimestampVal UdfBuiltins::Trunc( if (state != NULL) { trunc_unit = *reinterpret_cast(state); } else { - trunc_unit = StrToTruncUnit(unit_str); + trunc_unit = StrToTruncUnit(context, unit_str); if (trunc_unit == TruncUnit::UNIT_INVALID) { string string_unit(reinterpret_cast(unit_str.ptr), unit_str.len); context->SetError(Substitute("Invalid Truncate Unit: $0", string_unit).c_str()); @@ -233,7 +233,7 @@ void UdfBuiltins::TruncPrepare(FunctionContext* ctx, // Parse the unit up front if we can, otherwise do it on the fly in Trunc() if (ctx->IsArgConstant(1)) { StringVal* unit_str = reinterpret_cast(ctx->GetConstantArg(1)); - TruncUnit::Type trunc_unit = StrToTruncUnit(*unit_str); + TruncUnit::Type trunc_unit = StrToTruncUnit(ctx, *unit_str); if (trunc_unit == TruncUnit::UNIT_INVALID) { string string_unit(reinterpret_cast(unit_str->ptr), unit_str->len); ctx->SetError(Substitute("Invalid Truncate Unit: $0", string_unit).c_str()); @@ -254,4 +254,118 @@ void UdfBuiltins::TruncClose(FunctionContext* ctx, ctx->SetFunctionState(scope, NULL); } } + +// The units which can be used when extracting a Timestamp +struct ExtractField { + enum Type { + INVALID_FIELD, + YEAR, + MONTH, + DAY, + HOUR, + MINUTE, + SECOND, + MILLISECOND, + EPOCH + }; +}; + +// Maps the user facing name of a unit to a ExtractField +// Returns the ExtractField for the given unit +ExtractField::Type StrToExtractField(FunctionContext* ctx, const StringVal& unit_str) { + StringVal unit = UdfBuiltins::Lower(ctx, unit_str); + if (unit == "year") return ExtractField::YEAR; + if (unit == "month") return ExtractField::MONTH; + if (unit == "day") return ExtractField::DAY; + if (unit == "hour") return ExtractField::HOUR; + if (unit == "minute") return ExtractField::MINUTE; + if (unit == "second") return ExtractField::SECOND; + if (unit == "millisecond") return ExtractField::MILLISECOND; + if (unit == "epoch") return ExtractField::EPOCH; + return ExtractField::INVALID_FIELD; +} + +IntVal UdfBuiltins::Extract( + FunctionContext* context, const TimestampVal& tv, const StringVal &unit_str) { + // resolve extract_field using the prepared state if possible, o.w. parse now + // ExtractPrepare() can only parse extract_field if user passes it as a string literal + ExtractField::Type field; + void* state = context->GetFunctionState(FunctionContext::THREAD_LOCAL); + if (state != NULL) { + field = *reinterpret_cast(state); + } else { + field = StrToExtractField(context, unit_str); + if (field == ExtractField::INVALID_FIELD) { + string string_unit(reinterpret_cast(unit_str.ptr), unit_str.len); + context->SetError(Substitute("invalid extract field: $0", string_unit).c_str()); + return IntVal::null(); + } + } + + const date& orig_date = *reinterpret_cast(&tv.date); + const time_duration& time = *reinterpret_cast(&tv.time_of_day); + if (orig_date.is_special()) return IntVal::null(); + + switch (field) { + case ExtractField::YEAR: { + return IntVal(orig_date.year()); + } + case ExtractField::MONTH: { + return IntVal(orig_date.month()); + } + case ExtractField::DAY: { + return IntVal(orig_date.day()); + } + case ExtractField::HOUR: { + return IntVal(time.hours()); + } + case ExtractField::MINUTE: { + return IntVal(time.minutes()); + } + case ExtractField::SECOND: { + return IntVal(time.seconds()); + } + case ExtractField::MILLISECOND: { + return IntVal(time.total_milliseconds() - time.total_seconds() * 1000); + } + case ExtractField::EPOCH: { + ptime epoch_date(date(1970, 1, 1), time_duration(0, 0, 0)); + ptime cur_date(orig_date, time); + time_duration diff = cur_date - epoch_date; + return IntVal(diff.total_seconds()); + } + default: { + DCHECK(false) << field; + return IntVal::null(); + } + } +} + +void UdfBuiltins::ExtractPrepare(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { + // Parse the unit up front if we can, otherwise do it on the fly in Extract() + if (ctx->IsArgConstant(1)) { + StringVal* unit_str = reinterpret_cast(ctx->GetConstantArg(1)); + ExtractField::Type field = StrToExtractField(ctx, *unit_str); + if (field == ExtractField::INVALID_FIELD) { + string string_field(reinterpret_cast(unit_str->ptr), unit_str->len); + ctx->SetError(Substitute("invalid extract field: $0", string_field).c_str()); + } else { + ExtractField::Type* state = reinterpret_cast( + ctx->Allocate(sizeof(ExtractField::Type))); + *state = field; + ctx->SetFunctionState(scope, state); + } + } +} + +void UdfBuiltins::ExtractClose(FunctionContext* ctx, + FunctionContext::FunctionStateScope scope) { + void* state = ctx->GetFunctionState(scope); + if (state != NULL) { + ctx->Free(reinterpret_cast(state)); + ctx->SetFunctionState(scope, NULL); + } +} + } // namespace impala diff --git a/be/src/exprs/udf-builtins.h b/be/src/exprs/udf-builtins.h index 221826642..4e930fbe8 100644 --- a/be/src/exprs/udf-builtins.h +++ b/be/src/exprs/udf-builtins.h @@ -53,6 +53,18 @@ class UdfBuiltins { FunctionContext::FunctionStateScope scope); static void TruncClose(FunctionContext* context, FunctionContext::FunctionStateScope scope); + + // Returns a single field from a timestamp + // Fields: + // YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, EPOCH + // Reference: + // http://docs.oracle.com/cd/B19306_01/server.102/b14200/functions050.htm + static IntVal Extract(FunctionContext* context, const TimestampVal& date, + const StringVal& field_str); + static void ExtractPrepare(FunctionContext* context, + FunctionContext::FunctionStateScope scope); + static void ExtractClose(FunctionContext* context, + FunctionContext::FunctionStateScope scope); }; } // namespace impala diff --git a/be/src/testutil/test-udfs.cc b/be/src/testutil/test-udfs.cc index 919c7b091..7aaf475ff 100644 --- a/be/src/testutil/test-udfs.cc +++ b/be/src/testutil/test-udfs.cc @@ -177,9 +177,12 @@ void CountClose(FunctionContext* context, FunctionContext::FunctionStateScope sc void ConstantArgPrepare( FunctionContext* context, FunctionContext::FunctionStateScope scope) { if (scope == FunctionContext::THREAD_LOCAL) { - IntVal* arg = reinterpret_cast(context->GetConstantArg(0)); IntVal* state = reinterpret_cast(context->Allocate(sizeof(IntVal))); - *state = (arg != NULL) ? *arg : IntVal::null(); + if (context->IsArgConstant(0)) { + *state = *reinterpret_cast(context->GetConstantArg(0)); + } else { + *state = IntVal::null(); + } context->SetFunctionState(scope, state); } } diff --git a/be/src/udf/udf.cc b/be/src/udf/udf.cc index 500e1986e..493c7076d 100644 --- a/be/src/udf/udf.cc +++ b/be/src/udf/udf.cc @@ -311,7 +311,7 @@ void FunctionContextImpl::SetConstantArgs(const vector& constant_args) bool FunctionContext::IsArgConstant(int i) const { if (i < 0 || i >= impl_->constant_args_.size()) return false; - return impl_->constant_args_[i] == NULL; + return impl_->constant_args_[i] != NULL; } AnyVal* FunctionContext::GetConstantArg(int i) const { diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py index 39c2a7eb3..f398ae30d 100755 --- a/common/function-registry/impala_functions.py +++ b/common/function-registry/impala_functions.py @@ -489,4 +489,8 @@ udf_functions = [ '_ZN6impala11UdfBuiltins5TruncEPN10impala_udf15FunctionContextERKNS1_12TimestampValERKNS1_9StringValE', '_ZN6impala11UdfBuiltins12TruncPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE', '_ZN6impala11UdfBuiltins10TruncCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'], + [['extract'], 'INT', ['TIMESTAMP', 'STRING'], + '_ZN6impala11UdfBuiltins7ExtractEPN10impala_udf15FunctionContextERKNS1_12TimestampValERKNS1_9StringValE', + '_ZN6impala11UdfBuiltins14ExtractPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN6impala11UdfBuiltins12ExtractCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'], ] diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index ee24718a5..aebce34a5 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -1,5 +1,37 @@ ==== ---- QUERY +# test extract with non-constant field name +select b.unit, extract(a.ts, b.unit) from +(values(cast('2013-02-18 16:46:00.01' as timestamp) ts)) a +cross join +(values('year' unit), ('month'), ('day'), ('hour'), ('minute'), ('second'), +('millisecond'), ('epoch' )) b +---- TYPES +string, int +---- RESULTS +'year',2013 +'month',2 +'day',18 +'hour',16 +'minute',46 +'second',0 +'millisecond',10 +'epoch',1361205960 +==== +---- QUERY +# EXTRACT fields from timestamp +select EXTRACT(timestamp_col, 'yEar'), EXTRACT(timestamp_col, 'MilliSECond') +from alltypesagg order by id limit 5 +---- TYPES +int, int +---- RESULTS +2010,0 +2010,0 +2010,100 +2010,300 +2010,600 +==== +---- QUERY # IS NULL predicate select count(*) from alltypesagg ---- TYPES