From e151ebaa7198f4ce1076b3aca049d0846acd591a Mon Sep 17 00:00:00 2001 From: Tim Armstrong Date: Thu, 9 Jul 2015 14:40:27 -0700 Subject: [PATCH] IMPALA-1001: Bit and byte manipulation functions Bit and byte functions for compatibility with Teradata: bitand, bitor, bitxor, bitnot, countset, getbit, setbit, shiftleft, shiftright, rotateleft, rotateright. Interfaces and behavior follow Teradata documentation. All bit* functions are compatible with DB2. bitand only is compatible with Oracle. Change-Id: Idba3fb7beb029de493b602e6279aa68e32688df3 --- be/src/exprs/CMakeLists.txt | 1 + be/src/exprs/bit-byte-functions.cc | 200 ++++++++++++++++++ be/src/exprs/bit-byte-functions.h | 78 +++++++ be/src/exprs/expr-test.cc | 173 +++++++++++++++ be/src/exprs/expr.cc | 2 + be/src/util/bit-util.h | 41 ++++ common/function-registry/impala_functions.py | 55 +++++ .../impala/analysis/ArithmeticExpr.java | 13 +- .../queries/QueryTest/exprs.test | 71 +++++++ 9 files changed, 625 insertions(+), 9 deletions(-) create mode 100644 be/src/exprs/bit-byte-functions.cc create mode 100644 be/src/exprs/bit-byte-functions.h diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index cf0ef142f..cf119bbe3 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -23,6 +23,7 @@ add_library(Exprs agg-fn-evaluator.cc aggregate-functions.cc anyval-util.cc + bit-byte-functions.cc case-expr.cc cast-functions.cc compound-predicates.cc diff --git a/be/src/exprs/bit-byte-functions.cc b/be/src/exprs/bit-byte-functions.cc new file mode 100644 index 000000000..8bc088171 --- /dev/null +++ b/be/src/exprs/bit-byte-functions.cc @@ -0,0 +1,200 @@ +// Copyright 2015 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "exprs/bit-byte-functions.h" + +#include + +#include "gutil/strings/substitute.h" + +#include "util/bit-util.h" + +using namespace impala_udf; + +using boost::make_unsigned; + +using impala::BitUtil; +using strings::Substitute; + +namespace impala { + +// Generic algorithm for shifting and rotating signed integers +// Declare here to resolve mutual recursion +template +static T RotateLeftImpl(T v, int32_t shift); +template +static T RotateRightImpl(T v, int32_t shift); +template +static T ShiftLeftImpl(T v, int32_t shift); +template +static T ShiftRightLogicalImpl(T v, int32_t shift); + +template +IntVal BitByteFunctions::CountSet(FunctionContext* ctx, const T& v) { + if (v.is_null) return IntVal::null(); + return IntVal(BitUtil::PopcountSigned(v.val)); +} + +template IntVal BitByteFunctions::CountSet(FunctionContext*, const TinyIntVal&); +template IntVal BitByteFunctions::CountSet(FunctionContext*, const SmallIntVal&); +template IntVal BitByteFunctions::CountSet(FunctionContext*, const IntVal&); +template IntVal BitByteFunctions::CountSet(FunctionContext*, const BigIntVal&); + +template +IntVal BitByteFunctions::CountSet(FunctionContext* ctx, const T& v, const IntVal &bitval) { + if (v.is_null || bitval.is_null) return IntVal::null(); + if (bitval.val == 0) { + return IntVal(sizeof(v.val) * 8 - BitUtil::PopcountSigned(v.val)); + } else if (bitval.val == 1) { + return IntVal(BitUtil::PopcountSigned(v.val)); + } + + ctx->SetError(Substitute("Invalid bit val: $0", bitval.val).c_str()); + return IntVal::null(); +} + +template IntVal BitByteFunctions::CountSet(FunctionContext*, const TinyIntVal&, + const IntVal&); +template IntVal BitByteFunctions::CountSet(FunctionContext*, const SmallIntVal&, + const IntVal&); +template IntVal BitByteFunctions::CountSet(FunctionContext*, const IntVal&, + const IntVal&); +template IntVal BitByteFunctions::CountSet(FunctionContext*, const BigIntVal&, + const IntVal&); + +template +TinyIntVal BitByteFunctions::GetBit(FunctionContext* ctx, const T& v, + const IntVal& bitpos) { + if (v.is_null || bitpos.is_null) return TinyIntVal::null(); + if (bitpos.val < 0 || bitpos.val >= sizeof(v.val) * 8) { + ctx->SetError(Substitute("Invalid bit position: $0", bitpos.val).c_str()); + return TinyIntVal::null(); + } + return TinyIntVal(BitUtil::GetBit(v.val, bitpos.val)); +} + +template TinyIntVal BitByteFunctions::GetBit(FunctionContext*, const TinyIntVal&, + const IntVal&); +template TinyIntVal BitByteFunctions::GetBit(FunctionContext*, const SmallIntVal&, + const IntVal&); +template TinyIntVal BitByteFunctions::GetBit(FunctionContext*, const IntVal&, + const IntVal&); +template TinyIntVal BitByteFunctions::GetBit(FunctionContext*, const BigIntVal&, + const IntVal&); + +template +T BitByteFunctions::SetBit(FunctionContext* ctx, const T& v, + const IntVal& bitpos) { + if (v.is_null || bitpos.is_null) return T::null(); + if (bitpos.val < 0 || bitpos.val >= sizeof(v.val) * 8) { + ctx->SetError(Substitute("Invalid bit position: $0", bitpos.val).c_str()); + return T::null(); + } + return T(BitUtil::SetBit(v.val, bitpos.val)); +} + +template +T BitByteFunctions::SetBit(FunctionContext* ctx, const T& v, + const IntVal& bitpos, const IntVal& bitval) { + if (v.is_null || bitpos.is_null || bitval.is_null) return T::null(); + if (bitpos.val < 0 || bitpos.val >= sizeof(v.val) * 8) { + ctx->SetError(Substitute("Invalid bit position: $0", bitpos.val).c_str()); + return T::null(); + } + if (bitval.val == 0) { + return T(BitUtil::UnsetBit(v.val, bitpos.val)); + } else if (bitval.val == 1) { + return T(BitUtil::SetBit(v.val, bitpos.val)); + } + ctx->SetError(Substitute("Invalid bit val: $0", bitval.val).c_str()); + return T::null(); +} + +template TinyIntVal BitByteFunctions::SetBit(FunctionContext*, const TinyIntVal&, + const IntVal&); +template SmallIntVal BitByteFunctions::SetBit(FunctionContext*, const SmallIntVal&, + const IntVal&); +template IntVal BitByteFunctions::SetBit(FunctionContext*, const IntVal&, const IntVal&); +template BigIntVal BitByteFunctions::SetBit(FunctionContext*, const BigIntVal&, + const IntVal&); +template TinyIntVal BitByteFunctions::SetBit(FunctionContext*, const TinyIntVal&, + const IntVal&, const IntVal&); +template SmallIntVal BitByteFunctions::SetBit(FunctionContext*, const SmallIntVal&, + const IntVal&, const IntVal&); +template IntVal BitByteFunctions::SetBit(FunctionContext*, const IntVal&, const IntVal&, + const IntVal&); +template BigIntVal BitByteFunctions::SetBit(FunctionContext*, const BigIntVal&, + const IntVal&, const IntVal&); + +template +static T RotateLeftImpl(T v, int32_t shift) { + // Handle negative shifts + if (shift < 0) return RotateRightImpl(v, -shift); + + // Handle wrapping around multiple times + shift = shift % (sizeof(T) * 8); + return (v << shift) | BitUtil::ShiftRightLogical(v, sizeof(T) * 8 - shift); +} + +template +static T RotateRightImpl(T v, int32_t shift) { + // Handle negative shifts + if (shift < 0) return RotateLeftImpl(v, -shift); + + // Handle wrapping around multiple times + shift = shift % (sizeof(T) * 8); + return BitUtil::ShiftRightLogical(v, shift) | (v << (sizeof(T) * 8 - shift)); +} + +template +static T ShiftLeftImpl(T v, int32_t shift) { + if (shift < 0) return ShiftRightLogicalImpl(v, -shift); + return v << shift; +} + +// Logical right shift rather than arithmetic right shift +template +static T ShiftRightLogicalImpl(T v, int32_t shift) { + if (shift < 0) return ShiftLeftImpl(v, -shift); + // Conversion to unsigned ensures most significant bits always filled with 0's + return BitUtil::ShiftRightLogical(v, shift); +} + +// Generates a shift/rotate function for Impala integer value type based on the shift +// algorithm implemented by ALGO +#define SHIFT_FN(NAME, INPUT_TYPE, ALGO) \ + INPUT_TYPE BitByteFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v, \ + const IntVal& shift) { \ + if (v.is_null || shift.is_null) return INPUT_TYPE::null(); \ + return INPUT_TYPE(ALGO(v.val, shift.val)); \ + } + +SHIFT_FN(RotateLeft, TinyIntVal, RotateLeftImpl); +SHIFT_FN(RotateLeft, SmallIntVal, RotateLeftImpl); +SHIFT_FN(RotateLeft, IntVal, RotateLeftImpl); +SHIFT_FN(RotateLeft, BigIntVal, RotateLeftImpl); +SHIFT_FN(RotateRight, TinyIntVal, RotateRightImpl); +SHIFT_FN(RotateRight, SmallIntVal, RotateRightImpl); +SHIFT_FN(RotateRight, IntVal, RotateRightImpl); +SHIFT_FN(RotateRight, BigIntVal, RotateRightImpl); +SHIFT_FN(ShiftLeft, TinyIntVal, ShiftLeftImpl); +SHIFT_FN(ShiftLeft, SmallIntVal, ShiftLeftImpl); +SHIFT_FN(ShiftLeft, IntVal, ShiftLeftImpl); +SHIFT_FN(ShiftLeft, BigIntVal, ShiftLeftImpl); +SHIFT_FN(ShiftRight, TinyIntVal, ShiftRightLogicalImpl); +SHIFT_FN(ShiftRight, SmallIntVal, ShiftRightLogicalImpl); +SHIFT_FN(ShiftRight, IntVal, ShiftRightLogicalImpl); +SHIFT_FN(ShiftRight, BigIntVal, ShiftRightLogicalImpl); + +} diff --git a/be/src/exprs/bit-byte-functions.h b/be/src/exprs/bit-byte-functions.h new file mode 100644 index 000000000..c0757e495 --- /dev/null +++ b/be/src/exprs/bit-byte-functions.h @@ -0,0 +1,78 @@ +// Copyright 2015 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef IMPALA_EXPRS_BIT_BYTE_FUNCTIONS_H +#define IMPALA_EXPRS_BIT_BYTE_FUNCTIONS_H + +#include "udf/udf.h" + +using namespace impala_udf; + +namespace impala { + +class BitByteFunctions { + public: + // Count number of bits set in binary representation of integer types (aka popcount) + template static IntVal CountSet(FunctionContext* ctx, const T& v); + template static IntVal CountSet(FunctionContext* ctx, const T& v, + const IntVal& bitval); + + // Get and set individual bits in binary representation of integer types + template static TinyIntVal GetBit(FunctionContext* ctx, const T& v, + const IntVal& bitpos); + template static T SetBit(FunctionContext* ctx, const T& v, + const IntVal& bitpos); + template static T SetBit(FunctionContext* ctx, const T& v, + const IntVal& bitpos, const IntVal&); + + // Bitwise rotation of integer types + static TinyIntVal RotateLeft(FunctionContext* ctx, const TinyIntVal& v, + const IntVal& shift); + static SmallIntVal RotateLeft(FunctionContext* ctx, const SmallIntVal& v, + const IntVal& shift); + static IntVal RotateLeft(FunctionContext* ctx, const IntVal& v, + const IntVal& shift); + static BigIntVal RotateLeft(FunctionContext* ctx, const BigIntVal& v, + const IntVal& shift); + static TinyIntVal RotateRight(FunctionContext* ctx, const TinyIntVal& v, + const IntVal& shift); + static SmallIntVal RotateRight(FunctionContext* ctx, const SmallIntVal& v, + const IntVal& shift); + static IntVal RotateRight(FunctionContext* ctx, const IntVal& v, + const IntVal& shift); + static BigIntVal RotateRight(FunctionContext* ctx, const BigIntVal& v, + const IntVal& shift); + + // Bitwise logical shifts of integer types + static TinyIntVal ShiftLeft(FunctionContext* ctx, const TinyIntVal& v, + const IntVal& shift); + static SmallIntVal ShiftLeft(FunctionContext* ctx, const SmallIntVal& v, + const IntVal& shift); + static IntVal ShiftLeft(FunctionContext* ctx, const IntVal& v, + const IntVal& shift); + static BigIntVal ShiftLeft(FunctionContext* ctx, const BigIntVal& v, + const IntVal& shift); + static TinyIntVal ShiftRight(FunctionContext* ctx, const TinyIntVal& v, + const IntVal& shift); + static SmallIntVal ShiftRight(FunctionContext* ctx, const SmallIntVal& v, + const IntVal& shift); + static IntVal ShiftRight(FunctionContext* ctx, const IntVal& v, + const IntVal& shift); + static BigIntVal ShiftRight(FunctionContext* ctx, const BigIntVal& v, + const IntVal& shift); +}; + +} + +#endif diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 4c50f8470..e4caaa84b 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -63,6 +64,7 @@ using boost::bad_lexical_cast; using boost::date_time::c_local_adjustor; using boost::posix_time::from_time_t; using boost::posix_time::ptime; +using std::numeric_limits; using namespace Apache::Hadoop::Hive; using namespace impala; using namespace llvm; @@ -5003,6 +5005,177 @@ TEST_F(ExprTest, MADlib) { TYPE_DOUBLE, 3.0); } +TEST_F(ExprTest, BitByteBuiltins) { + TestIsNull("bitand(1,NULL)", TYPE_TINYINT); + TestIsNull("bitand(NULL,3)", TYPE_TINYINT); + // And of numbers differing in 2nd bit position gives min of two numbers + TestValue("bitand(1,3)", TYPE_TINYINT, 1); + TestValue("bitand(129,131)", TYPE_SMALLINT, 129); + TestValue("bitand(32769,32771)", TYPE_INT, 32769); + TestValue("bitand(2147483649,2147483651)", TYPE_BIGINT, 2147483649); + + TestIsNull("bitor(1,NULL)", TYPE_TINYINT); + TestIsNull("bitor(NULL,3)", TYPE_TINYINT); + // Or of numbers differing in 2nd bit position gives max of two numbers + TestValue("bitor(1,3)", TYPE_TINYINT, 3); + TestValue("bitor(129,131)", TYPE_SMALLINT, 131); + TestValue("bitor(32769,32771)", TYPE_INT, 32771); + TestValue("bitor(2147483649,2147483651)", TYPE_BIGINT, 2147483651); + + TestIsNull("bitxor(1,NULL)", TYPE_TINYINT); + TestIsNull("bitxor(NULL,3)", TYPE_TINYINT); + // Xor of numbers differing in 2nd bit position gives 2 + TestValue("bitxor(1,3)", TYPE_TINYINT, 2); + TestValue("bitxor(129,131)", TYPE_SMALLINT, 2); + TestValue("bitxor(32769,32771)", TYPE_INT, 2); + TestValue("bitxor(2147483649,2147483651)", TYPE_BIGINT, 2); + + TestIsNull("bitnot(NULL)", TYPE_TINYINT); + TestValue("bitnot(1)", TYPE_TINYINT, -2); + TestValue("bitnot(129)", TYPE_SMALLINT, -130); + TestValue("bitnot(32769)", TYPE_INT, -32770); + TestValue("bitnot(2147483649)", TYPE_BIGINT, -2147483650); + + // basic bit patterns + TestValue("countset(0)", TYPE_INT, 0); + TestValue("countset(1)", TYPE_INT, 1); + TestValue("countset(2)", TYPE_INT, 1); + TestValue("countset(3)", TYPE_INT, 2); + // 0101... bit pattern + TestValue("countset(" + lexical_cast(0x55) + ")", TYPE_INT, 4); + TestValue("countset(" + lexical_cast(0x5555) + ")", TYPE_INT, 8); + TestValue("countset(" + lexical_cast(0x55555555) + ")", TYPE_INT, 16); + TestValue("countset(" + lexical_cast(0x5555555555555555) + ")", TYPE_INT, 32); + // 1111... bit pattern to test signed/unsigned conversion + TestValue("countset(cast(-1 as TINYINT))", TYPE_INT, 8); + TestValue("countset(cast(-1 as SMALLINT))", TYPE_INT, 16); + TestValue("countset(cast(-1 as INT))", TYPE_INT, 32); + TestValue("countset(cast(-1 as BIGINT))", TYPE_INT, 64); + // NULL arguments + TestIsNull("countset(cast(NULL as TINYINT))", TYPE_INT); + TestIsNull("countset(cast(NULL as SMALLINT))", TYPE_INT); + TestIsNull("countset(cast(NULL as INT))", TYPE_INT); + TestIsNull("countset(cast(NULL as BIGINT))", TYPE_INT); + + // Check with optional argument + TestIsNull("countset(0, NULL)", TYPE_INT); + TestValue("countset(0, 0)", TYPE_INT, 8); + TestValue("countset(0, 1)", TYPE_INT, 0); + // TestError("countset(0, 2)"); TODO - disabled because of IMPALA-1746 + + // getbit for all integer types + TestIsNull("getbit(NULL, 1)", TYPE_TINYINT); + TestIsNull("getbit(1, NULL)", TYPE_TINYINT); + TestValue("getbit(1, 0)", TYPE_TINYINT, 1); + TestValue("getbit(1, 1)", TYPE_TINYINT, 0); + string int8_min = lexical_cast((int16_t)numeric_limits::min()); + TestValue("getbit(" + int8_min + ", 7)", TYPE_TINYINT, 1); + TestValue("getbit(" + int8_min + ", 6)", TYPE_TINYINT, 0); + string int16_min = lexical_cast(numeric_limits::min()); + TestValue("getbit(" + int16_min + ", 15)", TYPE_TINYINT, 1); + TestValue("getbit(" + int16_min + ", 14)", TYPE_TINYINT, 0); + string int32_min = lexical_cast(numeric_limits::min()); + TestValue("getbit(" + int32_min + ", 31)", TYPE_TINYINT, 1); + TestValue("getbit(" + int32_min + ", 30)", TYPE_TINYINT, 0); + string int64_min = lexical_cast(numeric_limits::min()); + TestValue("getbit(" + int64_min + ", 63)", TYPE_TINYINT, 1); + TestValue("getbit(" + int64_min + ", 62)", TYPE_TINYINT, 0); + // Out of range bitpos causes errors + // TODO - disabled because of IMPALA-1746 + // TestError("getbit(0, -1)", TYPE_TINYINT); + // TestError("getbit(0, 8)", TYPE_TINYINT); + // TestError("getbit(" + int16_min + ", 16)", TYPE_TINYINT); + // TestError("getbit(" + int32_min + ", 32)", TYPE_TINYINT); + // TestError("getbit(" + int64_min + ", 64)", TYPE_TINYINT); + + // Set bits for all integer types + TestIsNull("setbit(cast(NULL as INT), 1)", TYPE_INT); + TestIsNull("setbit(1, NULL)", TYPE_TINYINT); + TestIsNull("setbit(cast(NULL as INT), 1, 1)", TYPE_INT); + TestIsNull("setbit(1, NULL, 1)", TYPE_TINYINT); + TestIsNull("setbit(1, 1, NULL)", TYPE_TINYINT); + // TestError("setbit(1, 1, -1)"); TODO - disabled because of IMPALA-1746 + // TestError("setbit(1, 1, 2)"); TODO - disabled because of IMPALA-1746 + TestValue("setbit(0, 0)", TYPE_TINYINT, 1); + TestValue("setbit(0, 0, 1)", TYPE_TINYINT, 1); + TestValue("setbit(1, 0, 0)", TYPE_TINYINT, 0); + TestValue("setbit(cast(1 as INT), 8)", TYPE_INT, 257); + TestValue("setbit(cast(1 as INT), 8, 1)", TYPE_INT, 257); + TestValue("setbit(cast(257 as INT), 8, 0)", TYPE_INT, 1); + TestValue("setbit(cast(-1 as BIGINT), 63, 0)", TYPE_BIGINT, + numeric_limits::max()); + // Out of range bitpos causes errors + // TODO - disabled because of IMPALA-1746 + // TestError("setbit(0, -1)", TYPE_TINYINT); + // TestError("setbit(0, 8)", TYPE_TINYINT); + // TestError("setbit(0, -1, 1)", TYPE_TINYINT); + // TestError("setbit(0, 8, 1)", TYPE_TINYINT); + + // Shift and rotate null checks + TestIsNull("shiftleft(1, NULL)", TYPE_TINYINT); + TestIsNull("shiftleft(cast(NULL as INT), 2)", TYPE_INT); + TestIsNull("rotateleft(cast(NULL as INT), 2)", TYPE_INT); + TestIsNull("shiftright(1, NULL)", TYPE_TINYINT); + TestIsNull("shiftright(cast(NULL as INT), 2)", TYPE_INT); + TestIsNull("rotateright(cast(NULL as INT), 2)", TYPE_INT); + + // Basic left shift/rotate tests for all integer types + TestValue("shiftleft(1, 2)", TYPE_TINYINT, 4); + TestValue("rotateleft(1, 2)", TYPE_TINYINT, 4); + string pow2_6 = lexical_cast(1 << 6); + TestValue("shiftleft(" + pow2_6 + ", 2)", TYPE_TINYINT, 0); + TestValue("rotateleft(" + pow2_6 + ", 2)", TYPE_TINYINT, 1); + TestValue("shiftleft(" + pow2_6 + ", 1)", TYPE_TINYINT, -(1 << 7)); + TestValue("rotateleft(" + pow2_6 + ", 1)", TYPE_TINYINT, -(1 << 7)); + TestValue("shiftleft(cast(1 as SMALLINT), 2)", TYPE_SMALLINT, 4); + string pow2_14 = lexical_cast(1 << 14); + TestValue("shiftleft(" + pow2_14 + ", 2)", TYPE_SMALLINT, 0); + TestValue("rotateleft(" + pow2_14 + ", 2)", TYPE_SMALLINT, 1); + TestValue("rotateleft(" + pow2_14 + ", 34)", TYPE_SMALLINT, 1); // Wraparound + TestValue("shiftleft(" + pow2_14 + ", 1)", TYPE_SMALLINT, -(1 << 15)); + TestValue("shiftleft(cast(1 as INT), 2)", TYPE_INT, 4); + string pow2_30 = lexical_cast(1 << 30); + TestValue("shiftleft(" + pow2_30 + ", 2)", TYPE_INT, 0); + TestValue("shiftleft(" + pow2_30 + ", 1)", TYPE_INT, 1 << 31); + TestValue("shiftleft(cast(1 as BIGINT), 2)", TYPE_BIGINT, 4); + string pow2_62 = lexical_cast(((int64_t)1) << 62); + TestValue("shiftleft(" + pow2_62 + ", 2)", TYPE_BIGINT, 0); + TestValue("rotateleft(" + pow2_62 + ", 2)", TYPE_BIGINT, 1); + TestValue("shiftleft(" + pow2_62 + ", 1)", TYPE_BIGINT, + ((int64_t)1) << 63); + + // Basic right shift/rotate tests for all integer types + TestValue("shiftright(4, 2)", TYPE_TINYINT, 1); + TestValue("shiftright(4, 3)", TYPE_TINYINT, 0); + TestValue("rotateright(4, 3)", TYPE_TINYINT, -128); + TestValue("shiftright(4, 4)", TYPE_TINYINT, 0); + TestValue("rotateright(4, 132)", TYPE_TINYINT, 64); + string pow2_8 = lexical_cast(1 << 8); + TestValue("shiftright(" + pow2_8 + ", 1)", TYPE_SMALLINT, 1 << 7); + TestValue("shiftright(" + pow2_8 + ", 9)", TYPE_SMALLINT, 0); + string pow2_16 = lexical_cast(1 << 16); + TestValue("shiftright(" + pow2_16 + ", 1)", TYPE_INT, 1 << 15); + TestValue("rotateright(" + pow2_16 + ", 1)", TYPE_INT, 1 << 15); + TestValue("shiftright(" + pow2_16 + ", 17)", TYPE_INT, 0); + string pow2_32 = lexical_cast(((int64_t)1) << 32); + TestValue("shiftright(" + pow2_32 + ", 1)", TYPE_BIGINT, ((int64_t)1) << 31); + TestValue("rotateright(" + pow2_32 + ", 1)", TYPE_BIGINT, ((int64_t)1) << 31); + TestValue("shiftright(" + pow2_32 + ", 33)", TYPE_BIGINT, 0); + TestValue("rotateright(" + pow2_32 + ", 33)", TYPE_BIGINT, + numeric_limits::min()); + + // Check that no sign extension happens for negative numbers + TestValue("shiftright(cast(-1 as INT), 1)", TYPE_INT, 0x7FFFFFFF); + TestValue("rotateright(-128, 1)", TYPE_TINYINT, 1 << 6); + + // Test shifting/rotating negative amount - should reverse direction + TestValue("shiftleft(4, -2)", TYPE_TINYINT, 1); + TestValue("shiftright(cast(1 as BIGINT), -2)", TYPE_BIGINT, 4); + TestValue("rotateleft(4, -3)", TYPE_TINYINT, -128); + TestValue("rotateright(256, -2)", TYPE_SMALLINT, 1024); + +} + } // namespace impala int main(int argc, char **argv) { diff --git a/be/src/exprs/expr.cc b/be/src/exprs/expr.cc index 7dcd336fb..3542a956f 100644 --- a/be/src/exprs/expr.cc +++ b/be/src/exprs/expr.cc @@ -30,6 +30,7 @@ #include "exprs/expr.h" #include "exprs/expr-context.h" #include "exprs/aggregate-functions.h" +#include "exprs/bit-byte-functions.h" #include "exprs/case-expr.h" #include "exprs/cast-functions.h" #include "exprs/compound-predicates.h" @@ -475,6 +476,7 @@ void Expr::InitBuiltinsDummy() { // from that class in. // TODO: is there a better way to do this? AggregateFunctions::InitNull(NULL, NULL); + BitByteFunctions::CountSet(NULL, TinyIntVal::null()); CastFunctions::CastToBooleanVal(NULL, TinyIntVal::null()); CompoundPredicate::Not(NULL, BooleanVal::null()); ConditionalFunctions::NullIfZero(NULL, TinyIntVal::null()); diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h index cddf10dc7..a20761ae8 100644 --- a/be/src/util/bit-util.h +++ b/be/src/util/bit-util.h @@ -18,12 +18,16 @@ #include +#include + #include "common/compiler-util.h" #include "util/cpu-info.h" #include "util/sse-util.h" namespace impala { +using boost::make_unsigned; + /// Utility class to do standard bit tricks /// TODO: is this in boost or something else like that? class BitUtil { @@ -121,6 +125,13 @@ class BitUtil { } } + // Compute correct population count for various-width signed integers + template + static inline int PopcountSigned(T v) { + // Converting to same-width unsigned then extending preserves the bit pattern. + return BitUtil::Popcount(static_cast::type>(v)); + } + /// Returns the 'num_bits' least-significant bits of 'v'. static inline uint64_t TrailingBits(uint64_t v, int num_bits) { if (UNLIKELY(num_bits == 0)) return 0; @@ -191,6 +202,7 @@ class BitUtil { for (int i = 0; i < len; ++i) { d[i] = s[len - i - 1]; } + } /// Converts to big endian format (if not already in big endian) from the @@ -228,6 +240,35 @@ class BitUtil { static inline uint16_t FromBigEndian(uint16_t val) { return val; } #endif + // Logical right shift for signed integer types + // This is needed because the C >> operator does arithmetic right shift + // Negative shift amounts lead to undefined behavior + template + static T ShiftRightLogical(T v, int shift) { + // Conversion to unsigned ensures most significant bits always filled with 0's + return static_cast::type>(v) >> shift; + } + + // Get an specific bit of a numeric type + template + static inline int8_t GetBit(T v, int bitpos) { + T masked = v & (static_cast(0x1) << bitpos); + return static_cast(ShiftRightLogical(masked, bitpos)); + } + + // Set a specific bit to 1 + // Behavior when bitpos is negative is undefined + template + static T SetBit(T v, int bitpos) { + return v | (static_cast(0x1) << bitpos); + } + + // Set a specific bit to 0 + // Behavior when bitpos is negative is undefined + template + static T UnsetBit(T v, int bitpos) { + return v & ~(static_cast(0x1) << bitpos); + } }; } diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py index 8e6bde887..7857910ea 100755 --- a/common/function-registry/impala_functions.py +++ b/common/function-registry/impala_functions.py @@ -554,6 +554,61 @@ visible_functions = [ [['nonnullvalue'], 'BOOLEAN', ['STRING'], '_ZN6impala15IsNullPredicate9IsNotNullIN10impala_udf9StringValEEENS2_10BooleanValEPNS2_15FunctionContextERKT_'], [['nonnullvalue'], 'BOOLEAN', ['TIMESTAMP'], '_ZN6impala15IsNullPredicate9IsNotNullIN10impala_udf12TimestampValEEENS2_10BooleanValEPNS2_15FunctionContextERKT_'], [['nonnullvalue'], 'BOOLEAN', ['DECIMAL'], '_ZN6impala15IsNullPredicate9IsNotNullIN10impala_udf10DecimalValEEENS2_10BooleanValEPNS2_15FunctionContextERKT_'], + + # Bit and Byte functions + # For functions corresponding to builtin operators, we can reuse the implementations + [['bitand'], 'TINYINT', ['TINYINT', 'TINYINT'], 'impala::Operators::Bitand_TinyIntVal_TinyIntVal'], + [['bitand'], 'SMALLINT', ['SMALLINT', 'SMALLINT'], 'impala::Operators::Bitand_SmallIntVal_SmallIntVal'], + [['bitand'], 'INT', ['INT', 'INT'], 'impala::Operators::Bitand_IntVal_IntVal'], + [['bitand'], 'BIGINT', ['BIGINT', 'BIGINT'], 'impala::Operators::Bitand_BigIntVal_BigIntVal'], + [['bitor'], 'TINYINT', ['TINYINT', 'TINYINT'], 'impala::Operators::Bitor_TinyIntVal_TinyIntVal'], + [['bitor'], 'SMALLINT', ['SMALLINT', 'SMALLINT'], 'impala::Operators::Bitor_SmallIntVal_SmallIntVal'], + [['bitor'], 'INT', ['INT', 'INT'], 'impala::Operators::Bitor_IntVal_IntVal'], + [['bitor'], 'BIGINT', ['BIGINT', 'BIGINT'], 'impala::Operators::Bitor_BigIntVal_BigIntVal'], + [['bitxor'], 'TINYINT', ['TINYINT', 'TINYINT'], 'impala::Operators::Bitxor_TinyIntVal_TinyIntVal'], + [['bitxor'], 'SMALLINT', ['SMALLINT', 'SMALLINT'], 'impala::Operators::Bitxor_SmallIntVal_SmallIntVal'], + [['bitxor'], 'INT', ['INT', 'INT'], 'impala::Operators::Bitxor_IntVal_IntVal'], + [['bitxor'], 'BIGINT', ['BIGINT', 'BIGINT'], 'impala::Operators::Bitxor_BigIntVal_BigIntVal'], + [['bitnot'], 'TINYINT', ['TINYINT'], 'impala::Operators::Bitnot_TinyIntVal'], + [['bitnot'], 'SMALLINT', ['SMALLINT'], 'impala::Operators::Bitnot_SmallIntVal'], + [['bitnot'], 'INT', ['INT'], 'impala::Operators::Bitnot_IntVal'], + [['bitnot'], 'BIGINT', ['BIGINT'], 'impala::Operators::Bitnot_BigIntVal'], + [['countset'], 'INT', ['TINYINT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf10TinyIntValEEENS2_6IntValEPNS2_15FunctionContextERKT_'], + [['countset'], 'INT', ['SMALLINT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf11SmallIntValEEENS2_6IntValEPNS2_15FunctionContextERKT_'], + [['countset'], 'INT', ['INT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf6IntValEEES3_PNS2_15FunctionContextERKT_'], + [['countset'], 'INT', ['BIGINT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf9BigIntValEEENS2_6IntValEPNS2_15FunctionContextERKT_'], + [['countset'], 'INT', ['TINYINT', 'INT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf10TinyIntValEEENS2_6IntValEPNS2_15FunctionContextERKT_RKS4_'], + [['countset'], 'INT', ['SMALLINT', 'INT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf11SmallIntValEEENS2_6IntValEPNS2_15FunctionContextERKT_RKS4_'], + [['countset'], 'INT', ['INT', 'INT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf6IntValEEES3_PNS2_15FunctionContextERKT_RKS3_'], + [['countset'], 'INT', ['BIGINT', 'INT'], '_ZN6impala16BitByteFunctions8CountSetIN10impala_udf9BigIntValEEENS2_6IntValEPNS2_15FunctionContextERKT_RKS4_'], + [['getbit'], 'TINYINT', ['TINYINT', 'INT'], '_ZN6impala16BitByteFunctions6GetBitIN10impala_udf10TinyIntValEEES3_PNS2_15FunctionContextERKT_RKNS2_6IntValE'], + [['getbit'], 'TINYINT', ['SMALLINT', 'INT'], '_ZN6impala16BitByteFunctions6GetBitIN10impala_udf11SmallIntValEEENS2_10TinyIntValEPNS2_15FunctionContextERKT_RKNS2_6IntValE'], + [['getbit'], 'TINYINT', ['INT', 'INT'], '_ZN6impala16BitByteFunctions6GetBitIN10impala_udf6IntValEEENS2_10TinyIntValEPNS2_15FunctionContextERKT_RKS3_'], + [['getbit'], 'TINYINT', ['BIGINT', 'INT'], '_ZN6impala16BitByteFunctions6GetBitIN10impala_udf9BigIntValEEENS2_10TinyIntValEPNS2_15FunctionContextERKT_RKNS2_6IntValE'], + [['rotateleft'], 'TINYINT', ['TINYINT', 'INT'], 'impala::BitByteFunctions::RotateLeft'], + [['rotateleft'], 'SMALLINT', ['SMALLINT', 'INT'], 'impala::BitByteFunctions::RotateLeft'], + [['rotateleft'], 'INT', ['INT', 'INT'], 'impala::BitByteFunctions::RotateLeft'], + [['rotateleft'], 'BIGINT', ['BIGINT', 'INT'], 'impala::BitByteFunctions::RotateLeft'], + [['rotateright'], 'TINYINT', ['TINYINT', 'INT'], 'impala::BitByteFunctions::RotateRight'], + [['rotateright'], 'SMALLINT', ['SMALLINT', 'INT'], 'impala::BitByteFunctions::RotateRight'], + [['rotateright'], 'INT', ['INT', 'INT'], 'impala::BitByteFunctions::RotateRight'], + [['rotateright'], 'BIGINT', ['BIGINT', 'INT'], 'impala::BitByteFunctions::RotateRight'], + [['setbit'], 'TINYINT', ['TINYINT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf10TinyIntValEEET_PNS2_15FunctionContextERKS4_RKNS2_6IntValE'], + [['setbit'], 'SMALLINT', ['SMALLINT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf11SmallIntValEEET_PNS2_15FunctionContextERKS4_RKNS2_6IntValE'], + [['setbit'], 'INT', ['INT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf6IntValEEET_PNS2_15FunctionContextERKS4_RKS3_'], + [['setbit'], 'BIGINT', ['BIGINT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf9BigIntValEEET_PNS2_15FunctionContextERKS4_RKNS2_6IntValE'], + [['setbit'], 'TINYINT', ['TINYINT', 'INT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf10TinyIntValEEET_PNS2_15FunctionContextERKS4_RKNS2_6IntValESB_'], + [['setbit'], 'SMALLINT', ['SMALLINT', 'INT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf11SmallIntValEEET_PNS2_15FunctionContextERKS4_RKNS2_6IntValESB_'], + [['setbit'], 'INT', ['INT', 'INT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf6IntValEEET_PNS2_15FunctionContextERKS4_RKS3_SA_'], + [['setbit'], 'BIGINT', ['BIGINT', 'INT', 'INT'], '_ZN6impala16BitByteFunctions6SetBitIN10impala_udf9BigIntValEEET_PNS2_15FunctionContextERKS4_RKNS2_6IntValESB_'], + [['shiftleft'], 'TINYINT', ['TINYINT', 'INT'], 'impala::BitByteFunctions::ShiftLeft'], + [['shiftleft'], 'SMALLINT', ['SMALLINT', 'INT'], 'impala::BitByteFunctions::ShiftLeft'], + [['shiftleft'], 'INT', ['INT', 'INT'], 'impala::BitByteFunctions::ShiftLeft'], + [['shiftleft'], 'BIGINT', ['BIGINT', 'INT'], 'impala::BitByteFunctions::ShiftLeft'], + [['shiftright'], 'TINYINT', ['TINYINT', 'INT'], 'impala::BitByteFunctions::ShiftRight'], + [['shiftright'], 'SMALLINT', ['SMALLINT', 'INT'], 'impala::BitByteFunctions::ShiftRight'], + [['shiftright'], 'INT', ['INT', 'INT'], 'impala::BitByteFunctions::ShiftRight'], + [['shiftright'], 'BIGINT', ['BIGINT', 'INT'], 'impala::BitByteFunctions::ShiftRight'], ] invisible_functions = [ diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java index 3a7b17a69..f8ca9c11a 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java @@ -111,18 +111,13 @@ public class ArithmeticExpr extends Expr { Lists.newArrayList(Type.DECIMAL, Type.DECIMAL), Type.DECIMAL)); - // MOD() and FACTORIAL() are registered as builtins, see impala_functions.py + /* + * MOD(), FACTORIAL(), BITAND(), BITOR(), BITXOR(), and BITNOT() are registered as + * builtins, see impala_functions.py + */ for (Type t: Type.getIntegerTypes()) { db.addBuiltin(ScalarFunction.createBuiltinOperator( Operator.INT_DIVIDE.getName(), Lists.newArrayList(t, t), t)); - db.addBuiltin(ScalarFunction.createBuiltinOperator( - Operator.BITAND.getName(), Lists.newArrayList(t, t), t)); - db.addBuiltin(ScalarFunction.createBuiltinOperator( - Operator.BITOR.getName(), Lists.newArrayList(t, t), t)); - db.addBuiltin(ScalarFunction.createBuiltinOperator( - Operator.BITXOR.getName(), Lists.newArrayList(t, t), t)); - db.addBuiltin(ScalarFunction.createBuiltinOperator( - Operator.BITNOT.getName(), Lists.newArrayList(t), t)); } } diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index f9209b199..475964062 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -1889,3 +1889,74 @@ select madlib_vector(1.0, 2.0, NULL); ---- RESULTS ---- CATCH madlib vector entry 2 is NULL +==== +---- QUERY +# Test countset +select tinyint_col, countset(tinyint_col), countset(tinyint_col, 0), + smallint_col, countset(smallint_col), countset(smallint_col, 0), + int_col, countset(int_col), countset(int_col, 0), + bigint_col, countset(bigint_col), countset(bigint_col, 0) +from alltypestiny +where id <= 1 +order by id +---- RESULTS +0,0,8,0,0,16,0,0,32,0,0,64 +1,1,7,1,1,15,1,1,31,10,2,62 +---- TYPES +TINYINT,INT,INT,SMALLINT,INT,INT,INT,INT,INT,BIGINT,INT,INT +==== +---- QUERY +# Test basic bitwise ops +select bitand(tinyint_col, int_col), + bitor(smallint_col, bigint_col), + bitxor(tinyint_col, smallint_col), + bitxor(int_col, bigint_col), + bitxor(int_col, bitnot(int_col)), + bitnot(tinyint_col) +from alltypes +where id <= 3 +order by id +---- RESULTS +0,0,0,0,-1,-1 +1,11,0,11,-1,-2 +2,22,0,22,-1,-3 +3,31,0,29,-1,-4 +---- TYPES +INT,BIGINT,SMALLINT,BIGINT,INT,TINYINT +==== +---- QUERY +# Test getbit and setbit +select bigint_col, + getbit(bigint_col,0), + getbit(bigint_col,1), + getbit(bigint_col,int_col), + setbit(bigint_col,0), + setbit(bigint_col,1,0) +from alltypes +where id <= 3 +order by id +---- RESULTS +0,0,0,0,1,0 +10,0,1,1,11,8 +20,0,0,1,21,20 +30,0,1,1,31,28 +---- TYPES +BIGINT,TINYINT,TINYINT,TINYINT,BIGINT,BIGINT +==== +---- QUERY +# Test shifts and rotates +select int_col, + shiftright(int_col,1), + shiftleft(int_col,2), + rotateleft(int_col,30), + rotateright(int_col,2) +from alltypes +where id <= 3 +order by id +---- RESULTS +0,0,0,0,0 +1,0,4,1073741824,1073741824 +2,1,8,-2147483648,-2147483648 +3,1,12,-1073741824,-1073741824 +---- TYPES +INT,INT,INT,INT,INT