Implmented opcode registry. Added substr() and pi() functions. Added backend testing to buildall.sh

This commit is contained in:
Nong Li
2011-11-16 15:49:42 -08:00
parent a8acd52281
commit b1833d4de8
70 changed files with 1493 additions and 1124 deletions

3
be/.gitignore vendored
View File

@@ -7,9 +7,8 @@ CMakeFiles
cmake_install.cmake
CTestTestfile.cmake
!CMakeLists.txt
Testing/
build
generated-sources
src/exprs/functions.cc
src/exprs/functions.h

View File

@@ -114,6 +114,7 @@ add_subdirectory(src/testutil)
add_subdirectory(src/util)
add_subdirectory(generated-sources/gen-cpp)
add_subdirectory(generated-sources/opcode)
link_directories(
${CMAKE_SOURCE_DIR}/build/common

View File

@@ -20,12 +20,14 @@ add_library(ImpalaThrift
ImpalaService_types.cpp
Descriptors_constants.cpp
Descriptors_types.cpp
Exprs_constants.cpp
Exprs_types.cpp
Opcodes_constants.cpp
Opcodes_types.cpp
PlanNodes_constants.cpp
PlanNodes_types.cpp
Types_constants.cpp
Types_types.cpp
Exprs_constants.cpp
Exprs_types.cpp
)
add_library(thrift STATIC IMPORTED)

View File

@@ -240,7 +240,7 @@ AggregationTuple* AggregationNode::ConstructAggTuple(TupleRow* row) {
// (so that SUM(<col>) stays NULL if <col> only contains NULL values).
for (int i = 0; i < aggregate_exprs_.size(); ++i, ++slot_d) {
AggregateExpr* agg_expr = static_cast<AggregateExpr*>(aggregate_exprs_[i]);
if (agg_expr->op() == TExprOperator::AGG_COUNT) {
if (agg_expr->agg_op() == TAggregationOp::COUNT) {
// we're only aggregating into bigint slots and never return NULL
*reinterpret_cast<int64_t*>(agg_tuple->GetSlot((*slot_d)->tuple_offset())) = 0;
} else {
@@ -360,7 +360,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
}
// deal with COUNT(*) separately (no need to check the actual child expr value)
if (agg_expr->op() == TExprOperator::AGG_COUNT && agg_expr->is_star()) {
if (agg_expr->agg_op() == TAggregationOp::COUNT && agg_expr->is_star()) {
// we're only aggregating into bigint slots
DCHECK_EQ((*slot_d)->type(), TYPE_BIGINT);
++*reinterpret_cast<int64_t*>(slot);
@@ -374,12 +374,12 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
continue;
}
switch (agg_expr->op()) {
case TExprOperator::AGG_COUNT:
switch (agg_expr->agg_op()) {
case TAggregationOp::COUNT:
++*reinterpret_cast<int64_t*>(slot);
break;
case TExprOperator::AGG_MIN:
case TAggregationOp::MIN:
switch (agg_expr->type()) {
case TYPE_BOOLEAN:
UpdateMinSlot<bool>(tuple, (*slot_d)->null_indicator_offset(), slot, value);
@@ -411,7 +411,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
};
break;
case TExprOperator::AGG_MAX:
case TAggregationOp::MAX:
switch (agg_expr->type()) {
case TYPE_BOOLEAN:
UpdateMaxSlot<bool>(tuple, (*slot_d)->null_indicator_offset(), slot, value);
@@ -443,7 +443,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
};
break;
case TExprOperator::AGG_SUM:
case TAggregationOp::SUM:
switch (agg_expr->type()) {
case TYPE_BOOLEAN:
UpdateSumSlot<bool>(tuple, (*slot_d)->null_indicator_offset(), slot, value);
@@ -472,7 +472,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
break;
default:
DCHECK(false) << "bad aggregate operator: " << agg_expr->op();
DCHECK(false) << "bad aggregate operator: " << agg_expr->agg_op();
}
}
}

View File

@@ -18,14 +18,16 @@ add_library(Exprs
expr.cc
float-literal.cc
function-call.cc
functions.cc
int-literal.cc
is-null-predicate.cc
like-predicate.cc
literal-predicate.cc
math-functions.cc
null-literal.cc
opcode-registry.cc
slot-ref.cc
string-literal.cc
string-functions.cc
)
target_link_libraries(Exprs
@@ -33,22 +35,23 @@ target_link_libraries(Exprs
boost_regex-mt
)
add_custom_command(
OUTPUT functions.cc
COMMAND "./gen-functions.py"
)
add_executable(expr-test
expr-test.cc
)
# The order of link libaries matters. Specifically:
# - Util must come before ImpalaThrift
# - Exprs & Opcode have a circular dependency (Exprs is included twice)
target_link_libraries(expr-test
TestUtil
Service
Exec
Exprs
Opcode
Exprs
Runtime
Common
Util
thrift
ImpalaThrift
MockHdfs

View File

@@ -11,14 +11,24 @@ namespace impala {
AggregateExpr::AggregateExpr(const TExprNode& node)
: Expr(node),
op_(node.op),
agg_op_(node.agg_expr.op),
is_star_(node.agg_expr.is_star),
is_distinct_(node.agg_expr.is_distinct) {
}
Status AggregateExpr::Prepare(RuntimeState* state, const RowDescriptor& desc) {
RETURN_IF_ERROR(Expr::PrepareChildren(state, desc));
if (agg_op_ == TAggregationOp::INVALID) {
stringstream out;
out << "AggregateExpr::Prepare: Invalid aggregation op: " << agg_op_;
return Status(out.str());
}
return Status::OK;
}
string AggregateExpr::DebugString() const {
stringstream out;
out << "AggExpr(op=" << op_ << " star=" << is_star_ << " distinct=" << is_distinct_
out << "AggExpr(star=" << is_star_ << " distinct=" << is_distinct_
<< " " << Expr::DebugString() << ")";
return out.str();
}

View File

@@ -13,7 +13,7 @@ class TExprNode;
class AggregateExpr: public Expr {
public:
TExprOperator::type op() const { return op_; }
TAggregationOp::type agg_op() const { return agg_op_; }
bool is_star() const { return is_star_; }
bool is_distinct() const { return is_distinct_; }
virtual std::string DebugString() const;
@@ -21,10 +21,11 @@ class AggregateExpr: public Expr {
protected:
friend class Expr;
virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
AggregateExpr(const TExprNode& node);
private:
const TExprOperator::type op_;
const TAggregationOp::type agg_op_;
const bool is_star_;
const bool is_distinct_;
};

View File

@@ -4,7 +4,6 @@
#include <glog/logging.h>
#include "exprs/arithmetic-expr.h"
#include "exprs/functions.h"
#include "util/debug-util.h"
#include "gen-cpp/Exprs_types.h"
@@ -13,222 +12,17 @@ using namespace std;
namespace impala {
ArithmeticExpr::ArithmeticExpr(const TExprNode& node)
: Expr(node), op_(node.op) {
: Expr(node) {
}
// TODO: replace this with a generic function registry
// (registered by opcode and parameter types)
Status ArithmeticExpr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK(type_ != INVALID_TYPE);
Status ArithmeticExpr::Prepare(RuntimeState* state, const RowDescriptor& desc) {
DCHECK_LE(children_.size(), 2);
DCHECK(children_.size() == 1 || children_[0]->type() == children_[1]->type());
switch (op_) {
case TExprOperator::MULTIPLY:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_double;
return Status::OK;
default:
DCHECK(false) << "bad MULTIPLY type: " << type();
}
return Status::OK;
case TExprOperator::DIVIDE:
// in "<expr> / <expr>", operands are always cast to double
assert(type_ == TYPE_DOUBLE
&& children_[0]->type() == TYPE_DOUBLE
&& children_[1]->type() == TYPE_DOUBLE);
compute_function_ = GetValueFunctions::ArithmeticExpr_divide_double;
return Status::OK;
case TExprOperator::MOD:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_mod_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_mod_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_mod_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_mod_long;
return Status::OK;
default:
DCHECK(false) << "bad MOD type: " << type();
}
return Status::OK;
case TExprOperator::INT_DIVIDE:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_divide_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_divide_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_divide_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_divide_long;
return Status::OK;
default:
DCHECK(false) << "bad INT_DIVIDE type: " << type();
}
return Status::OK;
case TExprOperator::PLUS:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_add_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_add_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_add_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_add_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::ArithmeticExpr_add_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::ArithmeticExpr_add_double;
return Status::OK;
default:
DCHECK(false) << "bad PLUS type: " << type();
}
return Status::OK;
case TExprOperator::MINUS:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_double;
return Status::OK;
default:
DCHECK(false) << "bad MINUS type: " << type();
}
return Status::OK;
case TExprOperator::BITAND:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_long;
return Status::OK;
default:
DCHECK(false) << "bad BITAND type: " << type();
}
return Status::OK;
case TExprOperator::BITOR:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_long;
return Status::OK;
default:
DCHECK(false) << "bad BITOR type: " << type();
}
return Status::OK;
case TExprOperator::BITXOR:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_long;
return Status::OK;
default:
DCHECK(false) << "bad BITXOR type: " << type();
}
return Status::OK;
case TExprOperator::BITNOT:
switch (type()) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_long;
return Status::OK;
default:
DCHECK(false) << "bad BITNOT type: " << type();
}
return Status::OK;
default:
DCHECK(false) << "bad arithmetic op: " << op_;
}
return Status::OK;
return Expr::Prepare(state, desc);
}
string ArithmeticExpr::DebugString() const {
stringstream out;
out << "ArithmeticExpr(op=" << op_ << " " << Expr::DebugString() << ")";
out << "ArithmeticExpr(" << Expr::DebugString() << ")";
return out.str();
}

View File

@@ -15,13 +15,10 @@ class ArithmeticExpr: public Expr {
protected:
friend class Expr;
virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
ArithmeticExpr(const TExprNode& node);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
virtual std::string DebugString() const;
private:
const TExprOperator::type op_;
};
}

View File

@@ -4,7 +4,6 @@
#include <glog/logging.h>
#include "exprs/binary-predicate.h"
#include "exprs/functions.h"
#include "util/debug-util.h"
#include "gen-cpp/Exprs_types.h"
@@ -13,210 +12,17 @@ using namespace std;
namespace impala {
BinaryPredicate::BinaryPredicate(const TExprNode& node)
: Predicate(node), op_(node.op) {
: Predicate(node) {
}
Status BinaryPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
PrimitiveType op_type = children_[0]->type();
DCHECK(type_ != INVALID_TYPE);
Status BinaryPredicate::Prepare(RuntimeState* state, const RowDescriptor& desc) {
DCHECK_EQ(children_.size(), 2);
switch (op_) {
case TExprOperator::EQ:
switch (op_type) {
case TYPE_BOOLEAN:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_bool;
return Status::OK;
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::BinaryPredicate_eq_fn_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad EQ type: " << TypeToString(op_type);
}
return Status::OK;
case TExprOperator::NE:
switch (op_type) {
case TYPE_BOOLEAN:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_bool;
return Status::OK;
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::BinaryPredicate_ne_fn_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad NE type: " << TypeToString(op_type);
}
return Status::OK;
case TExprOperator::LE:
switch (op_type) {
case TYPE_BOOLEAN:
compute_function_ = GetValueFunctions::BinaryPredicate_le_bool;
return Status::OK;
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::BinaryPredicate_le_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::BinaryPredicate_le_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::BinaryPredicate_le_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::BinaryPredicate_le_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::BinaryPredicate_le_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::BinaryPredicate_le_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::BinaryPredicate_le_fn_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad LE type: " << TypeToString(op_type);
}
return Status::OK;
case TExprOperator::GE:
switch (op_type) {
case TYPE_BOOLEAN:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_bool;
return Status::OK;
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::BinaryPredicate_ge_fn_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad GE type: " << TypeToString(op_type);
}
return Status::OK;
case TExprOperator::LT:
switch (op_type) {
case TYPE_BOOLEAN:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_bool;
return Status::OK;
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::BinaryPredicate_lt_fn_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad LT type: " << TypeToString(op_type);
}
return Status::OK;
case TExprOperator::GT:
switch (op_type) {
case TYPE_BOOLEAN:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_bool;
return Status::OK;
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::BinaryPredicate_gt_fn_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad GT type: " << TypeToString(op_type);
}
return Status::OK;
default:
DCHECK(false) << "bad binary predicate op: " << op_;
}
return Status::OK;
return Expr::Prepare(state, desc);
}
string BinaryPredicate::DebugString() const {
stringstream out;
out << "BinaryPredicate(op=" << op_ << " " << Expr::DebugString() << ")";
out << "BinaryPredicate(" << Expr::DebugString() << ")";
return out.str();
}

View File

@@ -15,11 +15,8 @@ class BinaryPredicate : public Predicate {
BinaryPredicate(const TExprNode& node);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
virtual std::string DebugString() const;
private:
const TExprOperator::type op_;
};
}

View File

@@ -26,7 +26,7 @@ void* BoolLiteral::ReturnValue(Expr* e, TupleRow* row) {
}
Status BoolLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK_EQ(children_.size(), 0);
compute_function_ = ReturnValue;
return Status::OK;
}

View File

@@ -20,7 +20,6 @@ CaseExpr::CaseExpr(const TExprNode& node)
}
Status CaseExpr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
compute_function_ = ComputeFunction;
return Status::OK;
}

View File

@@ -23,6 +23,7 @@ class CaseExpr: public Expr {
const bool has_case_expr_;
const bool has_else_expr_;
static void* ComputeFunction(Expr* e, TupleRow* row);
};

View File

@@ -4,205 +4,19 @@
#include <glog/logging.h>
#include "exprs/cast-expr.h"
#include "exprs/functions.h"
#include "gen-cpp/Exprs_types.h"
using namespace std;
namespace impala {
// TODO: generate cast eval functions between all legal combinations of source
// and target type
CastExpr::CastExpr(const TExprNode& node)
: Expr(node) {
}
Status CastExpr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK(type_ != INVALID_TYPE);
DCHECK_LE(children_.size(), 1);
switch (children_[0]->type()) {
case TYPE_TINYINT:
switch (type_) {
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::Cast_char_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::Cast_char_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::Cast_char_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::Cast_char_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::Cast_char_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::Cast_char_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
case TYPE_SMALLINT:
switch (type_) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::Cast_short_char;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::Cast_short_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::Cast_short_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::Cast_short_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::Cast_short_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::Cast_short_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
case TYPE_INT:
switch (type_) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::Cast_int_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::Cast_int_short;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::Cast_int_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::Cast_int_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::Cast_int_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::Cast_int_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
case TYPE_BIGINT:
switch (type_) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::Cast_long_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::Cast_long_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::Cast_long_int;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::Cast_long_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::Cast_long_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::Cast_long_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
case TYPE_FLOAT:
switch (type_) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::Cast_float_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::Cast_float_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::Cast_float_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::Cast_float_long;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::Cast_float_double;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::Cast_float_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
case TYPE_DOUBLE:
switch (type_) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::Cast_double_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::Cast_double_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::Cast_double_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::Cast_double_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::Cast_double_float;
return Status::OK;
case TYPE_STRING:
compute_function_ = GetValueFunctions::Cast_double_StringValue;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
case TYPE_STRING:
switch (type_) {
case TYPE_TINYINT:
compute_function_ = GetValueFunctions::Cast_StringValue_char;
return Status::OK;
case TYPE_SMALLINT:
compute_function_ = GetValueFunctions::Cast_StringValue_short;
return Status::OK;
case TYPE_INT:
compute_function_ = GetValueFunctions::Cast_StringValue_int;
return Status::OK;
case TYPE_BIGINT:
compute_function_ = GetValueFunctions::Cast_StringValue_long;
return Status::OK;
case TYPE_FLOAT:
compute_function_ = GetValueFunctions::Cast_StringValue_float;
return Status::OK;
case TYPE_DOUBLE:
compute_function_ = GetValueFunctions::Cast_StringValue_double;
return Status::OK;
default:
DCHECK(false) << "bad cast type: " << TypeToString(type_);
}
return Status::OK;
default:
DCHECK(false) << "bad cast child type: " << TypeToString(children_[0]->type());
}
return Status::OK;
Status CastExpr::Prepare(RuntimeState* state, const RowDescriptor& desc) {
DCHECK_EQ(children_.size(), 1);
return Expr::Prepare(state, desc);
}
string CastExpr::DebugString() const {

View File

@@ -12,7 +12,7 @@ class TExprNode;
class CastExpr: public Expr {
public:
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
virtual std::string DebugString() const;
protected:

View File

@@ -11,13 +11,18 @@ using namespace std;
namespace impala {
CompoundPredicate::CompoundPredicate(const TExprNode& node)
: Predicate(node), op_(node.op) {
: Predicate(node) {
}
Status CompoundPredicate::Prepare(RuntimeState* state, const RowDescriptor& desc) {
DCHECK_LE(children_.size(), 2);
return Expr::Prepare(state, desc);
}
void* CompoundPredicate::AndComputeFunction(Expr* e, TupleRow* row) {
CompoundPredicate* p = static_cast<CompoundPredicate*>(e);
// assert(p->children_.size() == 2);
// assert(p->op_ == TExprOperator::AND);
DCHECK_EQ(p->children_.size(), 2);
DCHECK_EQ(p->opcode_, TExprOpcode::COMPOUND_AND);
Expr* op1 = e->children()[0];
bool* val1 = reinterpret_cast<bool*>(op1->GetValue(row));
Expr* op2 = e->children()[1];
@@ -37,8 +42,8 @@ void* CompoundPredicate::AndComputeFunction(Expr* e, TupleRow* row) {
void* CompoundPredicate::OrComputeFunction(Expr* e, TupleRow* row) {
CompoundPredicate* p = static_cast<CompoundPredicate*>(e);
// assert(p->children_.size() == 2);
// assert(p->op_ == TExprOperator::OR);
DCHECK_EQ(p->children_.size(), 2);
DCHECK_EQ(p->opcode_, TExprOpcode::COMPOUND_OR);
Expr* op1 = e->children()[0];
bool* val1 = reinterpret_cast<bool*>(op1->GetValue(row));
Expr* op2 = e->children()[1];
@@ -58,8 +63,8 @@ void* CompoundPredicate::OrComputeFunction(Expr* e, TupleRow* row) {
void* CompoundPredicate::NotComputeFunction(Expr* e, TupleRow* row) {
CompoundPredicate* p = static_cast<CompoundPredicate*>(e);
// assert(p->children_.size() == 1);
// assert(p->op_ == TExprOperator::NOT);
DCHECK_EQ(p->children_.size(), 1);
DCHECK_EQ(p->opcode_, TExprOpcode::COMPOUND_NOT);
Expr* op = e->children()[0];
bool* val = reinterpret_cast<bool*>(op->GetValue(row));
if (val == NULL) return NULL;
@@ -67,29 +72,9 @@ void* CompoundPredicate::NotComputeFunction(Expr* e, TupleRow* row) {
return &p->result_.bool_val;
}
Status CompoundPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK(type_ != INVALID_TYPE);
DCHECK_LE(children_.size(), 2);
switch (op_) {
case TExprOperator::AND:
compute_function_ = AndComputeFunction;
return Status::OK;
case TExprOperator::OR:
compute_function_ = OrComputeFunction;
return Status::OK;
case TExprOperator::NOT:
compute_function_ = NotComputeFunction;
return Status::OK;
default:
DCHECK(false) << "Invalid compound predicate op: " << op_;
}
return Status::OK;
}
string CompoundPredicate::DebugString() const {
stringstream out;
out << "CompoundPredicate(op=" << op_ << " " << Expr::DebugString() << ")";
out << "CompoundPredicate(" << Expr::DebugString() << ")";
return out.str();
}

View File

@@ -15,11 +15,11 @@ class CompoundPredicate: public Predicate {
CompoundPredicate(const TExprNode& node);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
virtual std::string DebugString() const;
private:
const TExprOperator::type op_;
friend class OpcodeRegistry;
static void* AndComputeFunction(Expr* e, TupleRow* row);
static void* OrComputeFunction(Expr* e, TupleRow* row);

View File

@@ -558,6 +558,22 @@ TEST_F(ExprTest, LikePredicate) {
TestValue("'\\\\a' LIKE '\\\\\\_'", TYPE_BOOLEAN, false);
}
TEST_F(ExprTest, StringFunctions) {
TestStringValue("substring('Hello', 1)", "Hello");
TestStringValue("substring('Hello', -2)", "lo");
TestStringValue("substring('Hello', 0)", "");
TestStringValue("substring('Hello', -5)", "Hello");
TestStringValue("substring('Hello', -6)", "");
TestStringValue("substring('Hello', 100)", "");
TestStringValue("substring('Hello', 1, 1)", "H");
TestStringValue("substring('Hello', 2, 100)", "ello");
TestStringValue("substring('Hello', -3, 2)", "ll");
//TODO: this NULLs, currently we can't parse them inside function calls
}
TEST_F(ExprTest, MathFunctions) {
TestValue("pi()", TYPE_DOUBLE, M_PI);
}
}
int main(int argc, char **argv) {

View File

@@ -21,6 +21,7 @@
#include "exprs/like-predicate.h"
#include "exprs/literal-predicate.h"
#include "exprs/null-literal.h"
#include "exprs/opcode-registry.h"
#include "exprs/string-literal.h"
#include "gen-cpp/Exprs_types.h"
#include "gen-cpp/ImpalaService_types.h"
@@ -37,17 +38,20 @@ bool ParseString(const string& str, T* val) {
}
Expr::Expr(PrimitiveType type)
: is_slotref_(false),
: opcode_(TExprOpcode::INVALID_OPCODE),
is_slotref_(false),
type_(type) {
}
Expr::Expr(const TExprNode& node)
: is_slotref_(false),
: opcode_(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE),
is_slotref_(false),
type_(ThriftToType(node.type)) {
}
Expr::Expr(const TExprNode& node, bool is_slotref)
: is_slotref_(is_slotref),
: opcode_(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE),
is_slotref_(is_slotref),
type_(ThriftToType(node.type)) {
}
@@ -190,16 +194,10 @@ Status Expr::CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** ex
return Status::OK;
}
case TExprNodeType::ARITHMETIC_EXPR: {
if (!texpr_node.__isset.op) {
return Status("Arithmetic expression not set in thrift node");
}
*expr = pool->Add(new ArithmeticExpr(texpr_node));
return Status::OK;
}
case TExprNodeType::BINARY_PRED: {
if (!texpr_node.__isset.op) {
return Status("Binary predicate not set in thrift node");
}
*expr = pool->Add(new BinaryPredicate(texpr_node));
return Status::OK;
}
@@ -258,9 +256,6 @@ Status Expr::CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** ex
return Status::OK;
}
case TExprNodeType::LIKE_PRED: {
if (!texpr_node.__isset.op) {
return Status("Like predicate not set in thrift node");
}
*expr = pool->Add(new LikePredicate(texpr_node));
return Status::OK;
}
@@ -356,13 +351,27 @@ void Expr::PrintValue(void* value, string* str) {
RawValue::PrintValue(value, type_, str);
}
Status Expr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Status Expr::PrepareChildren(RuntimeState* state, const RowDescriptor& row_desc) {
DCHECK(type_ != INVALID_TYPE);
for (int i = 0; i < children_.size(); ++i) {
RETURN_IF_ERROR(children_[i]->Prepare(state, row_desc));
}
return Status::OK;
}
Status Expr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
PrepareChildren(state, row_desc);
// Not all exprs have opcodes (i.e. literals, agg-exprs)
DCHECK(opcode_ != TExprOpcode::INVALID_OPCODE);
compute_function_ = OpcodeRegistry::Instance()->GetFunction(opcode_);
if (compute_function_ == NULL) {
stringstream out;
out << "Expr::Prepare(): Opcode: " << opcode_ << " does not have a registry entry. ";
return Status(out.str());
}
return Status::OK;
}
Status Expr::Prepare(const std::vector<Expr*>& exprs, RuntimeState* state,
const RowDescriptor& row_desc) {
for (int i = 0; i < exprs.size(); ++i) {
@@ -375,6 +384,9 @@ string Expr::DebugString() const {
// TODO: implement partial debug string for member vars
stringstream out;
out << "type=" << TypeToString(type_);
if (opcode_ != TExprOpcode::INVALID_OPCODE) {
out << " opcode=" << opcode_;
}
if (!children_.empty()) {
out << " children=" << DebugString(children_);
}

View File

@@ -7,6 +7,7 @@
#include <vector>
#include "common/status.h"
#include "gen-cpp/Opcodes_types.h"
#include "runtime/descriptors.h"
#include "runtime/tuple.h"
#include "runtime/tuple-row.h"
@@ -14,6 +15,7 @@
namespace impala {
class Expr;
class ObjectPool;
class RowDescriptor;
class RuntimeState;
@@ -75,8 +77,11 @@ struct ExprValue {
// This is the superclass of all expr evaluation nodes.
class Expr {
public:
// typedef for compute functions.
typedef void* (*ComputeFunction)(Expr*, TupleRow*);
// Prepare expr tree for evaluation. In particular, set compute_function_.
// This implementation simply invokes it recursively for the entire tree.
// Prepare should be invoked recurisvely on the expr tree.
// Return OK if successful, otherwise return error status.
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
@@ -109,6 +114,8 @@ class Expr {
PrimitiveType type() const { return type_; }
const std::vector<Expr*>& children() const { return children_; }
TExprOpcode::type op() const { return opcode_; }
// Returns true if expr doesn't contain slotrefs, ie, can be evaluated
// with GetValue(NULL). The default implementation returns true if all of
// the children are constant.
@@ -143,16 +150,25 @@ class Expr {
static std::string DebugString(const std::vector<Expr*>& exprs);
protected:
friend class GetValueFunctions;
friend class ComputeFunctions;
friend class MathFunctions;
friend class StringFunctions;
Expr(PrimitiveType type);
Expr(const TExprNode& node);
Expr(const TExprNode& node, bool is_slotref);
// Helper function that just calls prepare on all the children
// Does not do anything on the this expr.
// Return OK if successful, otherwise return error status.
Status PrepareChildren(RuntimeState* state, const RowDescriptor& row_desc);
// function to evaluate expr; typically set in Prepare()
typedef void* (*ComputeFunction)(Expr*, TupleRow*);
ComputeFunction compute_function_;
// function opcode
TExprOpcode::type opcode_;
// recognize if this node is a slotref in order to speed up GetValue()
const bool is_slotref_;
// analysis is done, types are fixed at this point

View File

@@ -51,7 +51,7 @@ void* FloatLiteral::ReturnDoubleValue(Expr* e, TupleRow* row) {
}
Status FloatLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK_EQ(children_.size(), 0);
switch (type_) {
case TYPE_FLOAT:
compute_function_ = ReturnFloatValue;

View File

@@ -1,10 +1,9 @@
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
#include "function-call.h"
#include <sstream>
#include <glog/logging.h>
#include "gen-cpp/Exprs_types.h"
#include "exprs/function-call.h"
using namespace std;

View File

@@ -1,256 +0,0 @@
#!/usr/bin/env python
from string import Template
# operators/functions and their names
operators = {
'add': '+',
'subtract': '-',
'multiply': '*',
'divide': '/',
'mod': '%',
'bitand': '&',
'bitor': '|',
'bitxor': '^',
'bitnot': '~',
'eq': '==',
'ne': '!=',
'le': '<=',
'ge': '>=',
'lt': '<',
'gt': '>',
'eq_fn': 'Eq',
'ne_fn': 'Ne',
'le_fn': 'Le',
'ge_fn': 'Ge',
'lt_fn': 'Lt',
'gt_fn': 'Gt',
}
# map of signatures (operand types and return type/result field)
op_signatures = {
'tinyint_op': ('char', 'tinyint_val'),
'smallint_op': ('short', 'smallint_val'),
'int_op': ('int', 'int_val'),
'bigint_op': ('long', 'bigint_val'),
'float_op': ('float', 'float_val'),
'double_op': ('double', 'double_val'),
'string_op': ('string', 'string_val'),
'bool_pred': ('bool', 'bool_val'),
'tinyint_pred': ('char', 'bool_val'),
'smallint_pred': ('short', 'bool_val'),
'int_pred': ('int', 'bool_val'),
'bigint_pred': ('long', 'bool_val'),
'float_pred': ('float', 'bool_val'),
'double_pred': ('double', 'bool_val'),
'string_pred': ('StringValue', 'bool_val'),
}
# map from native type to corresponding result field
result_fields = {
'bool': 'bool_val',
'char': 'tinyint_val',
'short': 'smallint_val',
'int': 'int_val',
'long': 'bigint_val',
'float': 'float_val',
'double': 'double_val',
'StringValue': 'string_val'
}
binary_op_invocations = [
('ArithmeticExpr',
['add', 'subtract', 'multiply'],
['tinyint_op', 'smallint_op', 'int_op', 'bigint_op', 'float_op', 'double_op']),
('ArithmeticExpr',
['divide'],
['double_op']),
('ArithmeticExpr',
['mod', 'divide', 'bitand', 'bitor', 'bitxor'],
['tinyint_op', 'smallint_op', 'int_op', 'bigint_op']),
('BinaryPredicate',
['eq', 'ne', 'le', 'ge', 'lt', 'gt'],
['bool_pred', 'tinyint_pred', 'smallint_pred', 'int_pred', 'bigint_pred', 'float_pred', 'double_pred']),
]
member_fn_invocations = [
('BinaryPredicate',
['eq_fn', 'ne_fn', 'le_fn', 'ge_fn', 'lt_fn', 'gt_fn'],
['string_pred']),
]
unary_op_invocations = [
('ArithmeticExpr',
['bitnot'],
['tinyint_op', 'smallint_op', 'int_op', 'bigint_op']),
]
binary_op_template = Template("\
void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
${expr_class}* expr = static_cast<${expr_class}*>(e);\n\
// assert(p->children_.size() == 2);\n\
Expr* op1 = e->children()[0];\n\
${native_type}* val1 = reinterpret_cast<${native_type}*>(op1->GetValue(row));\n\
Expr* op2 = e->children()[1];\n\
${native_type}* val2 = reinterpret_cast<${native_type}*>(op2->GetValue(row));\n\
if (val1 == NULL || val2 == NULL) return NULL;\n\
expr->result_.${result_field} = *val1 ${op} *val2;\n\
return &expr->result_.${result_field};\n\
}\n")
member_fn_template = Template("\
void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
${expr_class}* expr = static_cast<${expr_class}*>(e);\n\
// assert(p->children_.size() == 2);\n\
Expr* op1 = e->children()[0];\n\
${native_type}* val1 = reinterpret_cast<${native_type}*>(op1->GetValue(row));\n\
Expr* op2 = e->children()[1];\n\
${native_type}* val2 = reinterpret_cast<${native_type}*>(op2->GetValue(row));\n\
if (val1 == NULL || val2 == NULL) return NULL;\n\
expr->result_.${result_field} = val1->${op}(*val2);\n\
return &expr->result_.${result_field};\n\
}\n")
unary_op_template = Template("\
void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
${expr_class}* expr = static_cast<${expr_class}*>(e);\n\
// assert(p->children_.size() == 1);\n\
Expr* op = e->children()[0];\n\
${native_type}* val = reinterpret_cast<${native_type}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
expr->result_.${result_field} = ${op} *val;\n\
return &expr->result_.${result_field};\n\
}\n")
cast_template = Template("\
void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
// assert(p->children_.size() == 1);\n\
Expr* op = e->children()[0];\n\
${native_type}* val = reinterpret_cast<${native_type}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
e->result_.${result_field} = *val;\n\
return &e->result_.${result_field};\n\
}\n")
string_to_numeric_cast_template = Template("\
void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
// assert(p->children_.size() == 1);\n\
Expr* op = e->children()[0];\n\
StringValue* val = reinterpret_cast<StringValue*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
std::string tmp(val->ptr, val->len);\n\
try {\n\
e->result_.${result_field} = boost::lexical_cast<${result_type}>(tmp);\n\
} catch (boost::bad_lexical_cast &) {\n\
return NULL;\n\
}\n\
return &e->result_.${result_field};\n\
}\n")
numeric_to_string_cast_template = Template("\
void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
// assert(p->children_.size() == 1);\n\
Expr* op = e->children()[0];\n\
${native_type}* val = reinterpret_cast<${native_type}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
e->result_.SetStringVal(boost::lexical_cast<std::string>(*val));\n\
return &e->result_.string_val;\n\
}\n")
op_invocations = [
(unary_op_invocations, unary_op_template),
(binary_op_invocations, binary_op_template),
(member_fn_invocations, member_fn_template),
]
# entry: src-type, dest-type, template
cast_invocations = [
(['char', 'short', 'int', 'long', 'float', 'double'],
['char', 'short', 'int', 'long', 'float', 'double'],
cast_template),
(['StringValue'],
['char', 'short', 'int', 'long', 'float', 'double'],
string_to_numeric_cast_template),
(['char', 'short', 'int', 'long', 'float', 'double'],
['StringValue'],
numeric_to_string_cast_template)
]
cc_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT IT.\n\
\n\
#include "exprs/functions.h"\n\
\n\
#include <boost/lexical_cast.hpp>\n\
#include <string>\n\
\n\
#include "exprs/arithmetic-expr.h"\n\
#include "exprs/binary-predicate.h"\n\
#include "runtime/tuple.h"\n\
\n\
namespace impala {\n\
\n'
cc_epilogue = '}\n'
h_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT IT.\n\
\n\
#ifndef IMPALA_EXPRS_FUNCTIONS_H\n\
#define IMPALA_EXPRS_FUNCTIONS_H\n\
\n\
namespace impala {\n\
class Expr;\n\
class TupleRow;\n\
\n\
class GetValueFunctions {\n\
public:\n'
h_epilogue = '\
};\n\
\n\
}\n\
\n\
#endif\n'
cc_file = open('functions.cc', 'w')
cc_file.write(cc_preamble)
h_file = open('functions.h', 'w')
h_file.write(h_preamble)
for i in op_invocations:
for entry in i[0]:
for op in entry[1]:
for operand_type in entry[2]:
d = {}
fn_name= entry[0] + "_" + op + "_" + op_signatures[operand_type][0]
h_file.write(" static void* " + fn_name + "(Expr* e, TupleRow* row);\n")
d["function_name"] = fn_name
d["expr_class"] = entry[0]
d["native_type"] = op_signatures[operand_type][0]
d["result_field"] = op_signatures[operand_type][1]
d["op"] = operators[op]
cc_file.write(i[1].substitute(d))
cc_file.write('\n')
for i in cast_invocations:
for src_type in i[0]:
for dest_type in i[1]:
if src_type == dest_type:
continue
d = {}
fn_name= "Cast_" + src_type + "_" + dest_type
h_file.write(" static void* " + fn_name + "(Expr* e, TupleRow* row);\n")
d["function_name"] = fn_name
d["native_type"] = src_type
d["result_type"] = dest_type
d["result_field"] = result_fields[dest_type]
cc_file.write(i[2].substitute(d))
cc_file.write('\n')
cc_file.write(cc_epilogue)
cc_file.close()
h_file.write(h_epilogue)
h_file.close()

View File

@@ -73,7 +73,7 @@ void* IntLiteral::ReturnBigintValue(Expr* e, TupleRow* row) {
}
Status IntLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK_EQ(children_.size(), 0);
switch (type_) {
case TYPE_TINYINT:
compute_function_ = ReturnTinyintValue;

View File

@@ -25,7 +25,7 @@ IsNullPredicate::IsNullPredicate(const TExprNode& node)
}
Status IsNullPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
compute_function_ = ComputeFunction;
return Status::OK;
}

View File

@@ -15,7 +15,6 @@ namespace impala {
LikePredicate::LikePredicate(const TExprNode& node)
: Predicate(node),
op_(node.op),
escape_char_(node.like_pred.escape_char[0]) {
DCHECK_EQ(node.like_pred.escape_char.size(), 1);
}
@@ -72,7 +71,7 @@ void* LikePredicate::RegexFn(Expr* e, TupleRow* row) {
}
Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
DCHECK_EQ(children_.size(), 2);
if (GetChild(1)->IsConstant()) {
// determine pattern and decide on eval fn
@@ -80,14 +79,14 @@ Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc
string pattern_str(pattern->ptr, pattern->len);
regex substring_re("(%*)([^%_]*)(%*)", regex::extended);
smatch match_res;
if (op_ == TExprOperator::LIKE
if (opcode_ == TExprOpcode::LIKE
&& regex_match(pattern_str, match_res, substring_re)) {
// match_res.str(0) is the whole string, match_res.str(1) the first group, etc.
substring_ = match_res.str(2);
compute_function_ = ConstantSubstringFn;
} else {
string re_pattern;
if (op_ == TExprOperator::LIKE) {
if (opcode_ == TExprOpcode::LIKE) {
ConvertLikePattern(pattern, &re_pattern);
} else {
re_pattern = pattern_str;
@@ -100,16 +99,16 @@ Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc
compute_function_ = ConstantRegexFn;
}
} else {
switch (op_) {
case TExprOperator::LIKE:
switch (opcode_) {
case TExprOpcode::LIKE:
compute_function_ = LikeFn;
break;
case TExprOperator::REGEXP:
case TExprOpcode::REGEX:
compute_function_ = RegexFn;
break;
default:
stringstream error;
error << "Invalid LIKE operator: " << op_;
error << "Invalid LIKE operator: " << opcode_;
return Status(error.str());
}
}

View File

@@ -15,13 +15,12 @@ namespace impala {
class LikePredicate: public Predicate {
protected:
friend class Expr;
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
LikePredicate(const TExprNode& node);
virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
private:
const TExprOperator::type op_;
friend class OpcodeRegistry;
char escape_char_;
std::string substring_;
boost::scoped_ptr<boost::regex> regex_;

View File

@@ -21,7 +21,7 @@ LiteralPredicate::LiteralPredicate(const TExprNode& node)
}
Status LiteralPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
compute_function_ = ComputeFunction;
return Status::OK;
}

View File

@@ -0,0 +1,18 @@
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
#include "exprs/math-functions.h"
#include <math.h>
#include "exprs/expr.h"
#include "runtime/tuple-row.h"
namespace impala {
void* MathFunctions::Pi(Expr* e, TupleRow* row) {
e->result_.double_val = M_PI;
return &e->result_.double_val;
}
}

View File

@@ -0,0 +1,21 @@
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
#ifndef IMPALA_EXPRS_MATH_FUNCTIONS_H
#define IMPALA_EXPRS_MATH_FUNCTIONS_H
namespace impala {
class Expr;
class OpcodeRegistry;
class TupleRow;
class MathFunctions {
public:
static void Init(OpcodeRegistry*);
static void* Pi(Expr* e, TupleRow* row);
};
}
#endif

View File

@@ -15,6 +15,7 @@ void* NullLiteral::ReturnValue(Expr* e, TupleRow* row) {
}
Status NullLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
DCHECK_EQ(children_.size(), 0);
return Status::OK;
}

View File

@@ -0,0 +1,8 @@
#include "exprs/opcode-registry.h"
namespace impala {
OpcodeRegistry* OpcodeRegistry::instance_ = NULL;
}

View File

@@ -0,0 +1,63 @@
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
#ifndef IMPALA_EXPRS_OPCODE_REGISTRY_H
#define IMPALA_EXPRS_OPCODE_REGISTRY_H
#include <string>
#include <vector>
#include <glog/logging.h>
#include "exprs/expr.h" // For ComputeFunction typedef
#include "gen-cpp/Opcodes_types.h"
namespace impala {
class Expr;
class TupleRow;
class OpcodeRegistry {
public:
// Returns the function for this opcode. If the opcdoe is not valid,
// this function returns NULL
Expr::ComputeFunction GetFunction(TExprOpcode::type opcode) {
int index = static_cast<int>(opcode);
DCHECK_GE(index, 0);
DCHECK_LT(index, functions_.size());
return functions_[index];
}
// Registry is a singleton
static OpcodeRegistry* Instance() {
if (instance_ == NULL) {
instance_ = new OpcodeRegistry();
instance_->Init();
}
return instance_;
}
private:
// Private constructor, singleton interface
OpcodeRegistry() {
int num_opcodes = static_cast<int>(TExprOpcode::LAST_OPCODE);
functions_.resize(num_opcodes);
}
// Populates all of the registered functions. Implemented in
// opcode-registry-init.cc which is an auto-generated file
void Init();
// Add a function to the registry.
void Add(TExprOpcode::type opcode, const Expr::ComputeFunction& function) {
int index = static_cast<int>(opcode);
DCHECK_LT(index, functions_.size());
DCHECK_GE(index, 0);
functions_[index] = function;
}
static OpcodeRegistry* instance_;
std::vector<Expr::ComputeFunction> functions_;
};
}
#endif

View File

@@ -20,7 +20,7 @@ SlotRef::SlotRef(const TExprNode& node)
}
Status SlotRef::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK_EQ(children_.size(), 0);
const SlotDescriptor* slot_desc = state->descs().GetSlotDescriptor(slot_id_);
if (slot_desc == NULL) {
// TODO: create macro MAKE_ERROR() that returns a stream

View File

@@ -0,0 +1,41 @@
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
#include "exprs/string-functions.h"
#include "exprs/expr.h"
#include "runtime/tuple-row.h"
using namespace boost;
using namespace std;
namespace impala {
// Implementation of Substr. The signature is
// string substr(string input, int pos, int len)
// This behaves identically to the mysql implemenation, namely:
// - 1-indexed positions
// - supported negative positions (count from the end of the string)
// - [optional] len. No len indicates longest substr possible
void* StringFunctions::Substring(Expr* e, TupleRow* row) {
DCHECK_GE(e->GetNumChildren(), 2);
Expr* op1 = e->children()[0];
Expr* op2 = e->children()[1];
Expr* op3 = NULL;
if (e->GetNumChildren() == 3) op3 = e->children()[2];
StringValue* str = reinterpret_cast<StringValue*>(op1->GetValue(row));
int* pos = reinterpret_cast<int*>(op2->GetValue(row));
int* len = op3 != NULL ? reinterpret_cast<int*>(op3->GetValue(row)) : NULL;
if (str == NULL || pos == NULL || (op3 != NULL && len == NULL)) return NULL;
string tmp(str->ptr, str->len);
int fixed_pos = *pos;
int fixed_len = (len == NULL ? str->len : *len);
string result;
if (fixed_pos < 0) fixed_pos = str->len + fixed_pos + 1;
if (fixed_pos > 0 && fixed_pos <= str->len && fixed_len > 0) {
result = tmp.substr(fixed_pos - 1, fixed_len);
}
e->result_.SetStringVal(result);
return &e->result_.string_val;
}
}

View File

@@ -0,0 +1,19 @@
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
#ifndef IMPALA_EXPRS_STRING_FUNCTIONS_H
#define IMPALA_EXPRS_STRING_FUNCTIONS_H
namespace impala {
class Expr;
class OpcodeRegistry;
class TupleRow;
class StringFunctions {
public:
static void* Substring(Expr* e, TupleRow* row);
};
}
#endif

View File

@@ -31,7 +31,7 @@ void* StringLiteral::ComputeFunction(Expr* e, TupleRow* row) {
}
Status StringLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
Expr::Prepare(state, row_desc);
DCHECK_EQ(children_.size(), 0);
compute_function_ = ComputeFunction;
return Status::OK;
}

View File

@@ -17,11 +17,17 @@ add_library(backend SHARED
plan-executor-adaptor.cc
)
# The order of link libararies matter.
# - Util must come before ImpalaThrift
# - Exprs & Opcode have a circular dependency (Exprs is included twice)
target_link_libraries(backend
Common
Exec
Exprs
Opcode
Exprs
Runtime
Util
thrift
ImpalaThrift
${HDFS_LIBS}
@@ -40,13 +46,20 @@ add_executable(runquery
# when linking statically the linker won't look for the needed symbols.
# TODO: is there a better way to specify link dependencies without having to
# list all included libs for each specific binary? any gcc flags to help with this?
#
# The order of link libararies matter.
# - Util must come before ImpalaThrift
# - Exprs & Opcode have a circular dependency (Exprs is included twice)
target_link_libraries(runquery
TestUtil
Service
Exec
Exprs
Opcode
Exprs
Runtime
Common
Util
thrift
ImpalaThrift
MockHdfs

View File

@@ -7,14 +7,25 @@
#include "runtime/descriptors.h"
#include "runtime/raw-value.h"
#include "runtime/tuple-row.h"
#include "gen-cpp/Opcodes_types.h"
using namespace std;
namespace impala {
ostream& operator<<(ostream& os, const TExprOperator::type& op) {
map<int, const char*>::const_iterator i = _TExprOperator_VALUES_TO_NAMES.find(op);
if (i != _TExprOperator_VALUES_TO_NAMES.end()) {
ostream& operator<<(ostream& os, const TExprOpcode::type& op) {
map<int, const char*>::const_iterator i;
i = _TExprOpcode_VALUES_TO_NAMES.find(0);
if (i != _TExprOpcode_VALUES_TO_NAMES.end()) {
os << i->second;
}
return os;
}
ostream& operator<<(ostream& os, const TAggregationOp::type& op) {
map<int, const char*>::const_iterator i;
i = _TAggregationOp_VALUES_TO_NAMES.find(0);
if (i != _TAggregationOp_VALUES_TO_NAMES.end()) {
os << i->second;
}
return os;

View File

@@ -6,6 +6,7 @@
#include <ostream>
#include <string>
#include "gen-cpp/Opcodes_types.h"
#include "gen-cpp/Exprs_types.h"
namespace impala {
@@ -15,7 +16,8 @@ class TupleDescriptor;
class Tuple;
class TupleRow;
std::ostream& operator<<(std::ostream& os, const TExprOperator::type& op);
std::ostream& operator<<(std::ostream& os, const TExprOpcode::type& op);
std::ostream& operator<<(std::ostream& os, const TAggregationOp::type& op);
std::string PrintTuple(const Tuple* t, const TupleDescriptor& d);
std::string PrintRow(TupleRow* row, const RowDescriptor& d);

View File

@@ -35,6 +35,7 @@ fi
export IMPALA_FE_DIR=$IMPALA_HOME/fe
export IMPALA_BE_DIR=$IMPALA_HOME/be
export IMPALA_COMMON_DIR=$IMPALA_HOME/common
export PATH=$IMPALA_HOME/bin:$PATH
export HADOOP_HOME=$IMPALA_HOME/thirdparty/hadoop-0.20.2-cdh3u1

View File

@@ -74,6 +74,11 @@ then
fi
# build common
cd $IMPALA_COMMON_DIR
./gen_functions.py
./gen_opcodes.py
# Generate hive-site.xml from template via env var substitution
# TODO: Throw an error if the template references an undefined environment variable
cd ${IMPALA_FE_DIR}/src/test/resources
@@ -138,5 +143,20 @@ then
mvn test
fi
# run backend tests
if [ $tests_action -eq 1 ]
then
cd $IMPALA_FE_DIR
mvn exec:java -Dexec.mainClass=com.cloudera.impala.testutil.PlanService \
-Dexec.classpathScope=test &
PID=$!
# Wait for planner to startup TODO: can we do something better than wait arbitrarily for
# 3 seconds. Not a huge deal if it's not long enough, BE tests will just wait a bit
sleep 3
cd $IMPALA_BE_DIR
make test
kill $PID
fi
# Generate list of files for Cscope to index
$IMPALA_HOME/bin/gen-cscope.sh

2
common/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*.pyc
generated_functions.py

375
common/gen_functions.py Executable file
View File

@@ -0,0 +1,375 @@
#!/usr/bin/env python
from string import Template
import os
# This script will generate the implementation of the simple functions for the BE.
# These include:
# - Arithmetic functions
# - Binary functions
# - Cast functions
#
# The script outputs (run: 'impala/common/gen_functions.py')
# - header and implemention for above functions:
# - impala/be/src/generated-sources/opcode/functions.[h/cc]
# - python file that contains the metadata for theose functions:
# - impala/common/generated_functions.py
# Some aggregate types that are useful for defining functions
types = {
'BOOLEAN' : ['BOOLEAN'],
'TINYINT' : ['TINYINT'],
'SMALLINT' : ['SMALLINT'],
'INT' : ['INT'],
'BIGINT' : ['BIGINT'],
'FLOAT' : ['FLOAT'],
'DOUBLE' : ['DOUBLE'],
'STRING' : ['STRING'],
'INT_TYPES' : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'],
'NUMERIC_TYPES' : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
'NATIVE_TYPES' : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
'ALL_TYPES' : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', 'STRING'],
'MAX_TYPES' : ['BIGINT', 'DOUBLE'],
}
# Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]]
functions = [
# Arithmetic Expr
['Add', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']] ],
['Subtract', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']] ],
['Multiply', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']] ],
['Divide', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']] ],
['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
['BitXor', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
['BitOr', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
['BitNot', ['INT_TYPES'], [['INT_TYPES']] ],
# BinaryPredicates
['Eq', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ],
['Ne', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ],
['Gt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ],
['Lt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ],
['Ge', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ],
['Le', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ],
['Eq', ['BOOLEAN'], [['STRING'], ['STRING']] ],
['Ne', ['BOOLEAN'], [['STRING'], ['STRING']] ],
['Gt', ['BOOLEAN'], [['STRING'], ['STRING']] ],
['Lt', ['BOOLEAN'], [['STRING'], ['STRING']] ],
['Ge', ['BOOLEAN'], [['STRING'], ['STRING']] ],
['Le', ['BOOLEAN'], [['STRING'], ['STRING']] ],
# Casts
['Cast', ['BOOLEAN'], [['NATIVE_TYPES'], ['BOOLEAN']] ],
['Cast', ['TINYINT'], [['NATIVE_TYPES'], ['TINYINT']] ],
['Cast', ['SMALLINT'], [['NATIVE_TYPES'], ['SMALLINT']] ],
['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']] ],
['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']] ],
['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']] ],
['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']] ],
['Cast', ['NATIVE_TYPES'], [['STRING'], ['NATIVE_TYPES']] ],
['Cast', ['STRING'], [['NATIVE_TYPES'], ['STRING']] ],
]
native_types = {
'BOOLEAN' : 'bool',
'TINYINT' : 'char',
'SMALLINT' : 'short',
'INT' : 'int',
'BIGINT' : 'long',
'FLOAT' : 'float',
'DOUBLE' : 'double',
'STRING' : 'StringValue',
}
result_fields = {
'BOOLEAN' : 'bool_val',
'TINYINT' : 'tinyint_val',
'SMALLINT' : 'smallint_val',
'INT' : 'int_val',
'BIGINT' : 'bigint_val',
'FLOAT' : 'float_val',
'DOUBLE' : 'double_val',
'STRING' : 'string_val',
}
native_ops = {
'BITAND' : '&',
'BITNOT' : '~',
'BITOR' : '|',
'BITXOR' : '^',
'DIVIDE' : '/',
'EQ' : '==',
'GT' : '>',
'GE' : '>=',
'INT_DIVIDE' : '/',
'SUBTRACT' : '-',
'MOD' : '%',
'MULTIPLY' : '*',
'LT' : '<',
'LE' : '<=',
'NE' : '!=',
'ADD' : '+',
}
native_funcs = {
'EQ' : 'Eq',
'LE' : 'Le',
'LT' : 'Lt',
'NE' : 'Ne',
'GE' : 'Ge',
'GT' : 'Gt',
}
cc_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT.\n\
// To add new functions, see impala/common/gen_opcodes.py\n\
\n\
#include "opcode/functions.h"\n\
#include "exprs/expr.h"\n\
#include "runtime/tuple-row.h"\n\
#include <boost/lexical_cast.hpp>\n\
\n\
using namespace boost;\n\
using namespace std;\n\
\n\
namespace impala { \n\
\n'
cc_epilogue = '\
}\n'
h_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT IT.\n\
// To add new functions, see impala/common/gen_opcodes.py\n\
\n\
#ifndef IMPALA_OPCODE_FUNCTIONS_H\n\
#define IMPALA_OPCODE_FUNCTIONS_H\n\
\n\
namespace impala {\n\
class Expr;\n\
class OpcodeRegistry;\n\
class TupleRow;\n\
\n\
class ComputeFunctions {\n\
public:\n'
h_epilogue = '\
};\n\
\n\
}\n\
\n\
#endif\n'
python_preamble = '\
#!/usr/bin/env python\n\
# Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
# This is a generated file, DO NOT EDIT IT.\n\
# To add new functions, see impala/common/gen_opcodes.py\n\
\n\
functions = [\n'
python_epilogue = ']'
header_template = Template("\
static void* ${fn_signature}(Expr* e, TupleRow* row);\n")
unary_op = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op = e->children()[0];\n\
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
e->result_.${result_field} = ${native_op} *val;\n\
return &e->result_.${result_field};\n\
}\n\n")
binary_op = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op1 = e->children()[0];\n\
${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->GetValue(row));\n\
Expr* op2 = e->children()[1];\n\
${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->GetValue(row));\n\
if (val1 == NULL || val2 == NULL) return NULL;\n\
e->result_.${result_field} = (*val1 ${native_op} *val2);\n\
return &e->result_.${result_field};\n\
}\n\n")
binary_func = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op1 = e->children()[0];\n\
${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->GetValue(row));\n\
Expr* op2 = e->children()[1];\n\
${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->GetValue(row));\n\
if (val1 == NULL || val2 == NULL) return NULL;\n\
e->result_.${result_field} = val1->${native_func}(*val2);\n\
return &e->result_.${result_field};\n\
}\n\n")
cast = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op = e->children()[0];\n\
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
e->result_.${result_field} = *val;\n\
return &e->result_.${result_field};\n\
}\n\n")
string_to_numeric = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op = e->children()[0];\n\
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
string tmp(val->ptr, val->len);\n\
try {\n\
e->result_.${result_field} = lexical_cast<${native_type2}>(tmp);\n\
} catch (bad_lexical_cast &) {\n\
return NULL;\n\
}\n\
return &e->result_.${result_field};\n\
}\n\n")
numeric_to_string = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op = e->children()[0];\n\
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
e->result_.SetStringVal(lexical_cast<string>(*val));\n\
return &e->result_.${result_field};\n\
}\n\n")
python_template = Template("\
['${fn_name}', '${return_type}', [${args}], 'ComputeFunctions::${fn_signature}', []], \n")
# Mapping of function to template
templates = {
'Add' : binary_op,
'Subtract' : binary_op,
'Multiply' : binary_op,
'Divide' : binary_op,
'Int_Divide' : binary_op,
'Mod' : binary_op,
'BitAnd' : binary_op,
'BitXor' : binary_op,
'BitOr' : binary_op,
'BitNot' : unary_op,
'Eq' : binary_op,
'Ne' : binary_op,
'Ge' : binary_op,
'Gt' : binary_op,
'Lt' : binary_op,
'Le' : binary_op,
'Cast' : cast,
}
BE_PATH = "../be/generated-sources/opcode/"
if not os.path.exists(BE_PATH):
os.makedirs(BE_PATH)
# Expand the signature data for template substitution. Returns
# a dictionary with all the entries for all the templates used in this script
def initialize_sub(op, return_type, arg_types):
sub = {}
sub["fn_name"] = op
sub["fn_signature"] = op
sub["return_type"] = return_type
sub["result_field"] = result_fields[return_type]
sub["args"] = ""
if op.upper() in native_ops:
sub["native_op"] = native_ops[op.upper()]
for idx in range(0, len(arg_types)):
arg = arg_types[idx]
sub["fn_signature"] += "_" + native_types[arg]
sub["native_type" + repr(idx + 1)] = native_types[arg]
sub["args"] += "'" + arg + "', "
return sub
if __name__ == "__main__":
h_file = open(BE_PATH + 'functions.h', 'w')
cc_file = open(BE_PATH + 'functions.cc', 'w')
python_file = open('generated_functions.py', 'w')
h_file.write(h_preamble)
cc_file.write(cc_preamble)
python_file.write(python_preamble)
# Generate functions and headers
for func_data in functions:
# Skip functions with no template (shouldn't be auto-generated)
if not func_data[0] in templates:
continue
# Expand all arguments
op = func_data[0]
return_types = []
for ret in func_data[1]:
for t in types[ret]:
return_types.append(t)
signatures = []
for args in func_data[2]:
expanded_arg = []
for arg in args:
for t in types[arg]:
expanded_arg.append(t)
signatures.append(expanded_arg)
# Put arguments into substitution structure
num_functions = 0
for args in signatures:
num_functions = max(num_functions, len(args))
num_functions = max(num_functions, len(return_types))
num_args = len(signatures)
# Validate the input is correct
if len(return_types) != 1 and len(return_types) != num_functions:
print "Invalid Declaration: " + func_data
sys.exit(1)
for args in signatures:
if len(args) != 1 and len(args) != num_functions:
print "Invalid Declaration: " + func_data
sys.exit(1)
# Iterate over every function signature to generate
for i in range(0, num_functions):
if len(return_types) == 1:
return_type = return_types[0]
else:
return_type = return_types[i]
arg_types = []
for j in range(0, num_args):
if len(signatures[j]) == 1:
arg_types.append(signatures[j][0])
else:
arg_types.append(signatures[j][i])
# At this point, 'return_type' is a single type and 'arg_types' is a list of single types
sub = initialize_sub(op, return_type, arg_types)
template = templates[op]
# Code-gen for the bodies requires a bit more information
if (op == 'Eq' or op == 'Ne' or
op == 'Gt' or op == 'Lt' or
op == 'Ge' or op == 'Le') and arg_types[0] == 'STRING':
template = binary_func
sub["native_func"] = native_funcs[op.upper()]
if op == 'Cast' and return_type == 'STRING':
template = numeric_to_string
if op == 'Cast' and arg_types[0] == 'STRING':
template = string_to_numeric
h_file.write(header_template.substitute(sub))
cc_file.write(template.substitute(sub))
python_file.write(python_template.substitute(sub))
h_file.write(h_epilogue)
cc_file.write(cc_epilogue)
python_file.write(python_epilogue)
h_file.close()
cc_file.close()
python_file.close()

285
common/gen_opcodes.py Executable file
View File

@@ -0,0 +1,285 @@
#!/usr/bin/env python
# This script generates the necessary files to coordinate function calls between the FE
# and BE. In the FE, this creates a mapping between function signature (Operation &
# Arguments) to an opcode. The opcode is a thrift enum which is passed to the backend.
# The backend has all the information from just the opcode and does not need to worry
# about type checking.
#
# This scripts pulls function metadata input from
# - impala/common/impala_functions.py (manually maintained)
# - impala/common/generated_functions.py (auto-generated metadata)
#
# This script will generate 4 outputs
# 1. Thrift enum for all the opcodes
# - impala/fe/src/thrift/Opcodes.thrift
# 2. FE java operators (one per function, ignoring overloading)
# - impala/fe/target/generated-sources/gen-java/com/cloudera/impala/opcode/FunctionOperater.java
# 3 Java registry setup (registering all the functions with signatures)
# - impala/fe/target/generated-sources/gen-java/com/cloudera/impala/opcode/FunctionRegistry.java
# 4. BE registry setup (mapping opcodes to ComputeFunctions)
# - impala/be/generated-sources/opcode/opcode-registry-init.cc
#
# TODO: version the registry on the FE and BE so we can identify if they are out of sync
import sys
import os
from string import Template
import impala_functions
import generated_functions
native_types = {
'BOOLEAN' : 'bool',
'TINYINT' : 'char',
'SMALLINT' : 'short',
'INT' : 'int',
'BIGINT' : 'long',
'FLOAT' : 'float',
'DOUBLE' : 'double',
'STRING' : 'StringValue',
}
thrift_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT.\n\
// To add new functions, see impala/common/gen_opcodes.py\n\
\n\
namespace cpp impala\n\
namespace java com.cloudera.impala.thrift\n\
\n\
enum TExprOpcode {\n'
thrift_epilogue = '\
}\n\
\n'
cc_registry_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT.\n\
// To add new functions, see impala/common/gen_opcodes.py\n\
\n\
#include "exprs/opcode-registry.h"\n\
#include "exprs/expr.h"\n\
#include "exprs/compound-predicate.h"\n\
#include "exprs/like-predicate.h"\n\
#include "exprs/math-functions.h"\n\
#include "exprs/string-functions.h"\n\
#include "opcode/functions.h"\n\
\n\
namespace impala { \n\
\n\
void OpcodeRegistry::Init() {\n'
cc_registry_epilogue = '\
}\n\
\n\
}\n'
operator_file_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT.\n\
// To add new functions, see impala/common/gen_opcodes.py\n\
\n\
package com.cloudera.impala.opcode;\n\
\n\
public enum FunctionOperator {\n'
operator_file_epilogue = '\
}\n'
java_registry_preamble = '\
// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
// This is a generated file, DO NOT EDIT.\n\
// To add new functions, see impala/common/gen_opcodes.py\n\
\n\
package com.cloudera.impala.opcode;\n\
\n\
import com.cloudera.impala.analysis.OpcodeRegistry;\n\
import com.cloudera.impala.catalog.PrimitiveType;\n\
import com.cloudera.impala.thrift.TExprOpcode;\n\
import com.google.common.base.Preconditions;\n\
\n\
public class FunctionRegistry { \n\
public static void InitFunctions(OpcodeRegistry registry) { \n\
boolean result = true;\n\
\n'
java_registry_epilogue = '\
Preconditions.checkState(result); \n\
}\n\
}\n'
def initialize_sub(op, return_type, arg_types):
sub = {}
java_args = "PrimitiveType." + return_type
sub["fn_class"] = "GetValueFunctions"
sub["fn_signature"] = op
sub["num_args"] = len(arg_types)
for idx in range(0, len(arg_types)):
arg = arg_types[idx]
sub["fn_signature"] += "_" + native_types[arg]
sub["native_type" + repr(idx + 1)] = native_types[arg]
java_args += ", PrimitiveType." + arg
sub["thrift_enum"] = sub["fn_signature"].upper()
sub["java_output"] = "FunctionOperator." + op.upper() + ", TExprOpcode." + sub["thrift_enum"]
sub["java_output"] += ", " + java_args
return sub
FE_PATH = "../fe/target/generated-sources/gen-java/com/cloudera/impala/opcode/"
BE_PATH = "../be/generated-sources/opcode/"
THRIFT_PATH = "../fe/src/main/thrift/"
# This contains a list of all the opcodes that are built base on the
# function name from the input. Inputs can have multiple signatures
# with the same function name and the opcode is mangled using the
# arg types.
opcodes = []
# This contains a list of all the function names (no overloading/mangling)
operators = []
# This is a mapping of operators to a list of function meta data entries
# Each meta data entry is itself a map to store all the meta data
# - fn_name, ret_type, args, be_fn, sql_names
meta_data_entries = {}
# Read in the function and add it to the meta_data_entries map
def add_function(fn_meta_data):
fn_name = fn_meta_data[0]
ret_type = fn_meta_data[1]
args = fn_meta_data[2]
be_fn = fn_meta_data[3]
entry = {}
entry["fn_name"] = fn_meta_data[0]
entry["ret_type"] = fn_meta_data[1]
entry["args"] = fn_meta_data[2]
entry["be_fn"] = fn_meta_data[3]
entry["sql_names"] = fn_meta_data[4]
if fn_name in meta_data_entries:
meta_data_entries[fn_name].append(entry)
else:
fn_list = [entry]
meta_data_entries[fn_name] = fn_list
operators.append(fn_name.upper())
# Iterate over entries in the meta_data_entries map and generate opcodes. Some
# entries will have the same name at this stage, quality the name withe the
# signature to generate unique enums.
# Resulting opcode list is sorted with INVALID_OPCODE at beginning and LAST_OPCODE
# at end.
def generate_opcodes():
for fn in meta_data_entries:
entries = meta_data_entries[fn]
if len(entries) > 1:
for entry in entries:
opcode = fn.upper()
for arg in entry["args"]:
opcode += "_" + native_types[arg].upper()
opcodes.append(opcode)
entry["opcode"] = opcode
else:
opcodes.append(fn.upper())
entries[0]["opcode"] = fn.upper()
opcodes.sort()
opcodes.insert(0, 'INVALID_OPCODE')
opcodes.append('LAST_OPCODE')
# Generates the BE registry init file that will add all the compute functions
# to the registry. Outputs the generated-file to 'filename'
def generate_be_registry_init(filename):
cc_registry_file = open(filename, "w")
cc_registry_file.write(cc_registry_preamble)
for fn in meta_data_entries:
entries = meta_data_entries[fn]
for entry in entries:
opcode = entry["opcode"]
be_fn = entry["be_fn"]
cc_registry_file.write(" this->Add(TExprOpcode::%s, %s);\n" % (opcode, be_fn))
cc_registry_file.write(cc_registry_epilogue)
cc_registry_file.close()
# Generates the FE registry init file that registers all the functions. This file
# contains all the opcode->function signature mappings and all of the string->operator
# mappings for sql functions
def generate_fe_registry_init(filename):
java_registry_file = open(filename, "w")
java_registry_file.write(java_registry_preamble)
for fn in meta_data_entries:
entries = meta_data_entries[fn]
for entry in entries:
java_output = "FunctionOperator." + fn.upper()
java_output += ", TExprOpcode." + entry["opcode"]
java_output += ", PrimitiveType." + entry["ret_type"]
for arg in entry["args"]:
java_output += ", PrimitiveType." + arg
java_registry_file.write(" result &= registry.add(%s);\n" % java_output)
java_registry_file.write("\n")
mappings = {}
for fn in meta_data_entries:
entries = meta_data_entries[fn]
for entry in entries:
for name in entry["sql_names"]:
if name in mappings:
if mappings[name] != fn.upper():
print "Invalid mapping \"%s\" -> FunctionOperator.%s." % (name, mappings[name])
print "There is already a mapping \"%s\" -> FunctionOperator.%s.\n" % (name, fn.upper())
sys.exit(1)
continue
mappings[name] = fn.upper()
java_output = "\"%s\", FunctionOperator.%s" % (name, fn.upper())
java_registry_file.write(" result &= registry.addFunctionMapping(%s);\n" % java_output)
java_registry_file.write("\n")
java_registry_file.write(java_registry_epilogue)
java_registry_file.close()
# Read the function metadata inputs
for function in impala_functions.functions:
if len(function) != 5:
print "Invalid function entry in impala_functions.py:\n\t" + repr(function)
sys.exit(1)
add_function(function)
for function in generated_functions.functions:
if len(function) != 5:
print "Invalid function entry in generated_functions.py:\n\t" + repr(function)
sys.exit(1)
add_function(function)
generate_opcodes()
if not os.path.exists(BE_PATH):
os.makedirs(BE_PATH)
if not os.path.exists(FE_PATH):
os.makedirs(FE_PATH)
if not os.path.exists(THRIFT_PATH):
os.makedirs(THRIFT_PATH)
generate_be_registry_init(BE_PATH + "opcode-registry-init.cc")
generate_fe_registry_init(FE_PATH + "FunctionRegistry.java")
# Output the opcodes to thrift
thrift_file = open(THRIFT_PATH + "Opcodes.thrift", "w")
thrift_file.write(thrift_preamble)
for opcode in opcodes:
thrift_file.write(" %s,\n" % opcode)
thrift_file.write(thrift_epilogue)
thrift_file.close()
# Output the operators to java
operators.sort()
operators.insert(0, "INVALID_OPERATOR")
operator_java_file = open(FE_PATH + "FunctionOperator.java", "w")
operator_java_file.write(operator_file_preamble)
for op in operators:
operator_java_file.write(" %s,\n" % op)
operator_java_file.write(operator_file_epilogue)
operator_java_file.close()

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python
# This is a list of all the functions that are not auto-generated.
# It contains all the meta data that describes the function. The format is:
# <function name>, <return_type>, [<args>], <backend function name>, [<sql function aliases>]
#
# 'function name' is the base of what the opcode enum will be generated from. It does not
# have to be unique, the script will mangle the name with the signature if necessary.
#
# 'sql function aliases' are the function names that can be used from sql. They are optional
# and there can be multiple aliases for a function.
#
# This is combined with the list in generated_functions to code-gen the opcode
# registry in the FE and BE.
functions = [
['Compound_And', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'CompoundPredicate::AndComputeFunction', []],
['Compound_Or', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'CompoundPredicate::OrComputeFunction', []],
['Compound_Not', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'CompoundPredicate::NotComputeFunction', []],
['Constant_Regex', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'LikePredicate::ConstantRegexFn', []],
['Constant_Substring', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'LikePredicate::ConstantSubstringFn', []],
['Like', 'BOOLEAN', ['STRING', 'STRING'], 'LikePredicate::LikeFn', []],
['Regex', 'BOOLEAN', ['STRING', 'STRING'], 'LikePredicate::RegexFn', []],
['Math_Pi', 'DOUBLE', [], 'MathFunctions::Pi', ['pi']],
['String_Substring', 'STRING', ['STRING', 'INT'], 'StringFunctions::Substring', ['substr', 'substring']],
['String_Substring', 'STRING', ['STRING', 'INT', 'INT'], 'StringFunctions::Substring', ['substr', 'substring']],
]

3
fe/.gitignore vendored
View File

@@ -22,6 +22,9 @@ src/test/resources/hive-site.xml
# Generated hbase-site.xml file
src/test/resources/hbase-site.xml
# Generated thrift files
src/main/thrift/Opcodes.thrift
derby.log
TempStatsStore

View File

@@ -0,0 +1,5 @@
#Mon Nov 07 10:58:37 PST 2011
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

View File

@@ -138,7 +138,7 @@ terminal KW_AND, KW_AS, KW_ASC, KW_AVG, KW_BIGINT, KW_BOOLEAN, KW_BY,
KW_RLIKE, KW_RIGHT, KW_SELECT, KW_SEMI, KW_SMALLINT, KW_STRING, KW_SUM,
KW_TINYINT, KW_TRUE, KW_USING, KW_WHEN, KW_WHERE, KW_THEN, KW_TIMESTAMP,
KW_INSERT, KW_INTO, KW_OVERWRITE, KW_TABLE, KW_PARTITION;
terminal COMMA, DOT, STAR, LPAREN, RPAREN, DIVIDE, MOD, PLUS, MINUS;
terminal COMMA, DOT, STAR, LPAREN, RPAREN, DIVIDE, MOD, ADD, SUBTRACT;
terminal BITAND, BITOR, BITXOR, BITNOT;
terminal EQUAL, NOT, LESSTHAN, GREATERTHAN;
terminal String IDENT;
@@ -180,7 +180,7 @@ nonterminal TableRef table_ref;
nonterminal JoinOperator join_operator;
nonterminal opt_inner, opt_outer;
nonterminal PrimitiveType primitive_type;
nonterminal Expr minus_chain_expr;
nonterminal Expr subtract_chain_expr;
nonterminal BinaryPredicate.Operator binary_comparison_operator;
nonterminal InsertStmt insert_stmt;
nonterminal ArrayList<PartitionKeyValue> partition_clause;
@@ -192,7 +192,7 @@ precedence left KW_AND;
precedence left KW_NOT;
precedence left KW_LIKE, KW_RLIKE, KW_REGEXP;
precedence left EQUAL, LESSTHAN, GREATERTHAN;
precedence left PLUS, MINUS;
precedence left ADD, SUBTRACT;
precedence left STAR, DIVIDE, MOD, KW_DIV;
precedence left BITAND, BITOR, BITXOR, BITNOT;
precedence left RPAREN;
@@ -517,8 +517,8 @@ case_else_clause ::=
{: RESULT = null; :}
;
minus_chain_expr ::=
MINUS expr:e
subtract_chain_expr ::=
SUBTRACT expr:e
{:
// integrate signs into literals
if (e.isLiteral() && e.getType().isNumericType()) {
@@ -531,10 +531,12 @@ minus_chain_expr ::=
;
expr ::=
minus_chain_expr:e
subtract_chain_expr:e
{: RESULT = e; :}
| literal:l
{: RESULT = l; :}
| IDENT:functionName LPAREN RPAREN
{: RESULT = new FunctionCallExpr(functionName, new ArrayList<Expr>()); :}
| IDENT:functionName LPAREN expr_list:exprs RPAREN
{: RESULT = new FunctionCallExpr(functionName, exprs); :}
| cast_expr:c
@@ -560,10 +562,10 @@ arithmetic_expr ::=
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MOD, e1, e2); :}
| expr:e1 KW_DIV expr:e2
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.INT_DIVIDE, e1, e2); :}
| expr:e1 PLUS expr:e2
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.PLUS, e1, e2); :}
| expr:e1 MINUS expr:e2
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MINUS, e1, e2); :}
| expr:e1 ADD expr:e2
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.ADD, e1, e2); :}
| expr:e1 SUBTRACT expr:e2
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.SUBTRACT, e1, e2); :}
| expr:e1 BITAND expr:e2
{: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.BITAND, e1, e2); :}
| expr:e1 BITOR expr:e2

View File

@@ -7,24 +7,24 @@ import java.util.List;
import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.thrift.TAggregateExpr;
import com.cloudera.impala.thrift.TAggregationOp;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.cloudera.impala.thrift.TExprOperator;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
public class AggregateExpr extends Expr {
public enum Operator {
COUNT("COUNT", TExprOperator.AGG_COUNT),
MIN("MIN", TExprOperator.AGG_MIN),
MAX("MAX", TExprOperator.AGG_MAX),
SUM("SUM", TExprOperator.AGG_SUM),
AVG("AVG", TExprOperator.INVALID_OP);
COUNT("COUNT", TAggregationOp.COUNT),
MIN("MIN", TAggregationOp.MIN),
MAX("MAX", TAggregationOp.MAX),
SUM("SUM", TAggregationOp.SUM),
AVG("AVG", TAggregationOp.INVALID);
private final String description;
private final TExprOperator thriftOp;
private final TAggregationOp thriftOp;
private Operator(String description, TExprOperator thriftOp) {
private Operator(String description, TAggregationOp thriftOp) {
this.description = description;
this.thriftOp = thriftOp;
}
@@ -34,7 +34,7 @@ public class AggregateExpr extends Expr {
return description;
}
public TExprOperator toThrift() {
public TAggregationOp toThrift() {
return thriftOp;
}
}
@@ -111,8 +111,7 @@ public class AggregateExpr extends Expr {
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.AGG_EXPR;
msg.op = op.toThrift();
msg.agg_expr = new TAggregateExpr(isStar, isDistinct);
msg.agg_expr = new TAggregateExpr(isStar, isDistinct, op.toThrift());
}
@Override

View File

@@ -12,8 +12,6 @@ import com.cloudera.impala.catalog.Column;
import com.cloudera.impala.catalog.Db;
import com.cloudera.impala.catalog.Table;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.common.Pair;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

View File

@@ -4,34 +4,30 @@ package com.cloudera.impala.analysis;
import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.opcode.FunctionOperator;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.cloudera.impala.thrift.TExprOperator;
import com.google.common.base.Preconditions;
public class ArithmeticExpr extends Expr {
enum Operator {
MULTIPLY("*", TExprOperator.MULTIPLY),
DIVIDE("/", TExprOperator.DIVIDE),
MOD("%", TExprOperator.MOD),
INT_DIVIDE("DIV", TExprOperator.INT_DIVIDE),
PLUS("+", TExprOperator.PLUS),
MINUS("-", TExprOperator.MINUS),
BITAND("&", TExprOperator.BITAND),
BITOR("|", TExprOperator.BITOR),
BITXOR("^", TExprOperator.BITXOR),
BITNOT("~", TExprOperator.BITNOT);
MULTIPLY("*", FunctionOperator.MULTIPLY),
DIVIDE("/", FunctionOperator.DIVIDE),
MOD("%", FunctionOperator.MOD),
INT_DIVIDE("DIV", FunctionOperator.INT_DIVIDE),
ADD("+", FunctionOperator.ADD),
SUBTRACT("-", FunctionOperator.SUBTRACT),
BITAND("&", FunctionOperator.BITAND),
BITOR("|", FunctionOperator.BITOR),
BITXOR("^", FunctionOperator.BITXOR),
BITNOT("~", FunctionOperator.BITNOT);
private final String description;
private final TExprOperator thriftOp;
private final FunctionOperator functionOp;
private Operator(String description, TExprOperator thriftOp) {
private Operator(String description, FunctionOperator thriftOp) {
this.description = description;
this.thriftOp = thriftOp;
}
public boolean isBitwiseOperation() {
return this == BITAND || this == BITOR || this == BITXOR || this == BITNOT;
this.functionOp = thriftOp;
}
@Override
@@ -39,10 +35,11 @@ public class ArithmeticExpr extends Expr {
return description;
}
public TExprOperator toThrift() {
return thriftOp;
public FunctionOperator toFunctionOp() {
return functionOp;
}
}
private final Operator op;
public Operator getOp() {
@@ -74,7 +71,7 @@ public class ArithmeticExpr extends Expr {
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.ARITHMETIC_EXPR;
msg.op = op.toThrift();
msg.setOpcode(opcode);
}
@Override
@@ -82,7 +79,7 @@ public class ArithmeticExpr extends Expr {
if (!super.equals(obj)) {
return false;
}
return ((ArithmeticExpr) obj).op == op;
return ((ArithmeticExpr) obj).opcode == opcode;
}
@Override
@@ -98,12 +95,15 @@ public class ArithmeticExpr extends Expr {
// bitnot is the only unary op, deal with it here
if (op == Operator.BITNOT) {
PrimitiveType childType = getChild(0).getType();
if (!childType.isFixedPointType()) {
type = getChild(0).getType();
OpcodeRegistry.Signature match =
OpcodeRegistry.instance().getFunctionInfo(op.functionOp, type);
if (match == null) {
throw new AnalysisException("Bitwise operations only allowed on fixed-point types: "
+ toSql());
}
type = childType;
Preconditions.checkState(type == match.returnType);
opcode = match.opcode;
return;
}
@@ -112,8 +112,8 @@ public class ArithmeticExpr extends Expr {
switch (op) {
case MULTIPLY:
case PLUS:
case MINUS:
case ADD:
case SUBTRACT:
// numeric ops must be promoted to highest-resolution type
// (otherwise we can't guarantee that a <op> b won't result in an overflow/underflow)
type = PrimitiveType.getAssignmentCompatibleType(t1, t2).getMaxResolutionType();
@@ -134,8 +134,7 @@ public class ArithmeticExpr extends Expr {
"Invalid floating point argument to operation " +
op.toString() + ": " + this.toSql());
}
type =
PrimitiveType.getAssignmentCompatibleType(t1, t2);
type = PrimitiveType.getAssignmentCompatibleType(t1, t2);
// the result is always an integer
Preconditions.checkState(type.isFixedPointType());
break;
@@ -147,5 +146,8 @@ public class ArithmeticExpr extends Expr {
}
type = castBinaryOp(type);
OpcodeRegistry.Signature match =
OpcodeRegistry.instance().getFunctionInfo(op.toFunctionOp(), type, type);
this.opcode = match.opcode;
}
}

View File

@@ -4,9 +4,9 @@ package com.cloudera.impala.analysis;
import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.opcode.FunctionOperator;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.cloudera.impala.thrift.TExprOperator;
import com.google.common.base.Preconditions;
/**
@@ -15,19 +15,19 @@ import com.google.common.base.Preconditions;
*/
public class BinaryPredicate extends Predicate {
public enum Operator {
EQ("=", TExprOperator.EQ),
NE("!=", TExprOperator.NE),
LE("<=", TExprOperator.LE),
GE(">=", TExprOperator.GE),
LT("<", TExprOperator.LT),
GT(">", TExprOperator.GT);
EQ("=", FunctionOperator.EQ),
NE("!=", FunctionOperator.NE),
LE("<=", FunctionOperator.LE),
GE(">=", FunctionOperator.GE),
LT("<", FunctionOperator.LT),
GT(">", FunctionOperator.GT);
private final String description;
private final TExprOperator thriftOp;
private final FunctionOperator functionOp;
private Operator(String description, TExprOperator thriftOp) {
private Operator(String description, FunctionOperator functionOp) {
this.description = description;
this.thriftOp = thriftOp;
this.functionOp = functionOp;
}
@Override
@@ -35,10 +35,11 @@ public class BinaryPredicate extends Predicate {
return description;
}
public TExprOperator toThrift() {
return thriftOp;
public FunctionOperator toFunctionOp() {
return functionOp;
}
};
}
private final Operator op;
public Operator getOp() {
@@ -59,7 +60,7 @@ public class BinaryPredicate extends Predicate {
if (!super.equals(obj)) {
return false;
}
return ((BinaryPredicate) obj).op == op;
return ((BinaryPredicate) obj).opcode == this.opcode;
}
@Override
@@ -70,7 +71,7 @@ public class BinaryPredicate extends Predicate {
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.BINARY_PRED;
msg.op = op.toThrift();
msg.setOpcode(opcode);
}
@Override
@@ -80,7 +81,7 @@ public class BinaryPredicate extends Predicate {
PrimitiveType t1 = getChild(0).getType();
PrimitiveType t2 = getChild(1).getType();
PrimitiveType compatibleType = PrimitiveType.getAssignmentCompatibleType(t1, t2);
if (!compatibleType.isValid()) {
// there is no type to which both are assignment-compatible -> we can't compare them
throw new AnalysisException("operands are not comparable: " + this.toSql());
@@ -88,6 +89,12 @@ public class BinaryPredicate extends Predicate {
// Ignore return value because type is always bool for predicates.
castBinaryOp(compatibleType);
OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(
op.toFunctionOp(), compatibleType, compatibleType);
Preconditions.checkState(match != null);
Preconditions.checkState(match.returnType == PrimitiveType.BOOLEAN);
this.opcode = match.opcode;
}
/**

View File

@@ -4,11 +4,13 @@ package com.cloudera.impala.analysis;
import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.opcode.FunctionOperator;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.google.common.base.Preconditions;
public class CastExpr extends Expr {
private final PrimitiveType targetType;
/** true if this is a "pre-analyzed" implicit cast */
private final boolean isImplicit;
@@ -22,6 +24,11 @@ public class CastExpr extends Expr {
children.add(e);
if (isImplicit) {
type = targetType;
OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(
FunctionOperator.CAST, getChild(0).getType(), type);
Preconditions.checkState(match != null);
Preconditions.checkState(match.returnType == type);
this.opcode = match.opcode;
}
}
@@ -36,26 +43,34 @@ public class CastExpr extends Expr {
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.CAST_EXPR;
msg.setOpcode(opcode);
}
@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
super.analyze(analyzer);
if (!isImplicit) {
// cast was asked for in the query, check for validity of cast
PrimitiveType childType = getChild(0).getType();
PrimitiveType resultType =
PrimitiveType.getAssignmentCompatibleType(childType, targetType);
if (!resultType.isValid()) {
throw new AnalysisException("Invalid type cast from: " + childType.toString() +
" to " + targetType);
}
// this cast may result in loss of precision, but the user requested it
this.type = targetType;
if (isImplicit) {
return;
}
// cast was asked for in the query, check for validity of cast
PrimitiveType childType = getChild(0).getType();
PrimitiveType resultType =
PrimitiveType.getAssignmentCompatibleType(childType, targetType);
if (!resultType.isValid()) {
throw new AnalysisException("Invalid type cast of " + getChild(0).toSql() +
" from " + childType + " to " + targetType);
}
// this cast may result in loss of precision, but the user requested it
this.type = targetType;
OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(
FunctionOperator.CAST, getChild(0).getType(), type);
Preconditions.checkState(match != null);
Preconditions.checkState(match.returnType == type);
this.opcode = match.opcode;
}
@Override
@@ -64,6 +79,6 @@ public class CastExpr extends Expr {
return false;
}
CastExpr expr = (CastExpr) obj;
return targetType == expr.targetType;
return this.opcode == expr.opcode;
}
}

View File

@@ -4,7 +4,7 @@ package com.cloudera.impala.analysis;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.cloudera.impala.thrift.TExprOperator;
import com.cloudera.impala.thrift.TExprOpcode;
import com.google.common.base.Preconditions;
/**
@@ -13,14 +13,14 @@ import com.google.common.base.Preconditions;
*/
public class CompoundPredicate extends Predicate {
public enum Operator {
AND("AND", TExprOperator.AND),
OR("OR", TExprOperator.OR),
NOT("NOT", TExprOperator.NOT);
AND("AND", TExprOpcode.COMPOUND_AND),
OR("OR", TExprOpcode.COMPOUND_OR),
NOT("NOT", TExprOpcode.COMPOUND_NOT);
private final String description;
private final TExprOperator thriftOp;
private final TExprOpcode thriftOp;
private Operator(String description, TExprOperator thriftOp) {
private Operator(String description, TExprOpcode thriftOp) {
this.description = description;
this.thriftOp = thriftOp;
}
@@ -30,7 +30,7 @@ public class CompoundPredicate extends Predicate {
return description;
}
public TExprOperator toThrift() {
public TExprOpcode toThrift() {
return thriftOp;
}
}
@@ -73,6 +73,6 @@ public class CompoundPredicate extends Predicate {
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.COMPOUND_PRED;
msg.op = op.toThrift();
msg.setOpcode(op.toThrift());
}
}

View File

@@ -14,6 +14,7 @@ import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.common.TreeNode;
import com.cloudera.impala.thrift.TExpr;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprOpcode;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
@@ -25,16 +26,22 @@ import com.google.common.collect.Lists;
abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneable {
protected PrimitiveType type; // result of analysis
protected boolean isAnalyzed; // true after analyze() has been called
protected TExprOpcode opcode; // opcode for this expr
protected Expr() {
super();
type = PrimitiveType.INVALID_TYPE;
opcode = TExprOpcode.INVALID_OPCODE;
}
public PrimitiveType getType() {
return type;
}
public TExprOpcode getOpcode() {
return opcode;
}
/* Perform semantic analysis of node and all of its children.
* Throws exception if any errors found.
* @see com.cloudera.impala.parser.ParseNode#analyze(com.cloudera.impala.parser.Analyzer)

View File

@@ -4,7 +4,9 @@ package com.cloudera.impala.analysis;
import java.util.List;
import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.opcode.FunctionOperator;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.google.common.base.Joiner;
@@ -14,7 +16,7 @@ public class FunctionCallExpr extends Expr {
public FunctionCallExpr(String functionName, List<Expr> params) {
super();
this.functionName = functionName;
this.functionName = functionName.toLowerCase();
children.addAll(params);
}
@@ -23,7 +25,7 @@ public class FunctionCallExpr extends Expr {
if (!super.equals(obj)) {
return false;
}
return ((FunctionCallExpr) obj).functionName.equals(functionName);
return ((FunctionCallExpr) obj).opcode == this.opcode;
}
@Override
@@ -31,16 +33,38 @@ public class FunctionCallExpr extends Expr {
return functionName + "(" + Joiner.on(", ").join(childrenToSql()) + ")";
}
// TODO: we need to encode the actual function opcodes;
// this ties in with replacing TExpr.op with an opcode
// that resolves to a single compute function for the backend
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.FUNCTION_CALL;
msg.setOpcode(opcode);
}
@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
throw new AnalysisException("CAST not supported");
FunctionOperator op = OpcodeRegistry.instance().getFunctionOperator(functionName);
if (op == FunctionOperator.INVALID_OPERATOR) {
throw new AnalysisException(functionName + " unknown");
}
PrimitiveType[] argTypes = new PrimitiveType[this.children.size()];
for (int i = 0; i < this.children.size(); ++i) {
argTypes[i] = this.children.get(i).getType();
}
OpcodeRegistry.Signature match =
OpcodeRegistry.instance().getFunctionInfo(op, argTypes);
if (match == null) {
String error = "No matching function with those arguments: " + functionName
+ Joiner.on(", ").join(argTypes) + ")";
throw new AnalysisException(error);
}
this.opcode = match.opcode;
this.type = match.returnType;
// Implicitly cast all the children to match the function if necessary
for (int i = 0; i < argTypes.length; ++i) {
if (argTypes[i] != match.argTypes[i]) {
castChild(match.argTypes[i], i);
}
}
}
}

View File

@@ -9,20 +9,20 @@ import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.thrift.TExprNode;
import com.cloudera.impala.thrift.TExprNodeType;
import com.cloudera.impala.thrift.TExprOperator;
import com.cloudera.impala.thrift.TExprOpcode;
import com.cloudera.impala.thrift.TLikePredicate;
import com.google.common.base.Preconditions;
public class LikePredicate extends Predicate {
enum Operator {
LIKE("LIKE", TExprOperator.LIKE),
RLIKE("RLIKE", TExprOperator.REGEXP),
REGEXP("REGEXP", TExprOperator.REGEXP);
LIKE("LIKE", TExprOpcode.LIKE),
RLIKE("RLIKE", TExprOpcode.REGEX),
REGEXP("REGEXP", TExprOpcode.REGEX);
private final String description;
private final TExprOperator thriftOp;
private final TExprOpcode thriftOp;
private Operator(String description, TExprOperator thriftOp) {
private Operator(String description, TExprOpcode thriftOp) {
this.description = description;
this.thriftOp = thriftOp;
}
@@ -32,7 +32,7 @@ public class LikePredicate extends Predicate {
return description;
}
public TExprOperator toThrift() {
public TExprOpcode toThrift() {
return thriftOp;
}
}
@@ -63,7 +63,7 @@ public class LikePredicate extends Predicate {
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.LIKE_PRED;
msg.op = op.toThrift();
msg.setOpcode(op.toThrift());
msg.like_pred = new TLikePredicate("\\");
}

View File

@@ -0,0 +1,211 @@
package com.cloudera.impala.analysis;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.impala.catalog.PrimitiveType;
import com.cloudera.impala.common.Pair;
import com.cloudera.impala.opcode.FunctionOperator;
import com.cloudera.impala.opcode.FunctionRegistry;
import com.cloudera.impala.thrift.TExprOpcode;
import com.google.common.collect.Maps;
/**
* The OpcodeRegistry provides a mapping between function signatures and opcodes. The
* supported functions are code-gen'ed and added to the registry with an assigned opcode.
* The opcode is shared with the backend. The frontend can use the registry to look up
* a function's opcode.
*
* The OpcodeRegistry also contains a mapping between function names (as strings) to
* operators.
*
* The OpcodeRegistry is a singleton.
*
* TODO: The opcode registry should be versioned in the FE/BE.
*/
public class OpcodeRegistry {
private final static Logger LOG = LoggerFactory.getLogger(OpcodeRegistry.class);
private static OpcodeRegistry instance = new OpcodeRegistry();
/**
* This is a mapping of Operator,#args to signatures. The signature is defined by
* the operator enum and the arguments and is a one to one mapping to opcodes.
* The map is structured this way to more efficiently look for signature matches.
* Signatures that have the same number of arguments have a potential to be matches
* by allowing types to be implicitly cast.
*/
private final Map<Pair<FunctionOperator, Integer>, List<Signature>> operations;
/**
* This contains a mapping of function names to a FunctionOperator enum. This is used
* by FunctionCallExpr to go from the parser input to function opcodes.
* This is a many to one mapping (i.e. substr and substring both map to the same
* operation).
* The mappings are filled in in FunctionRegistry.java which is auto-generated.
*/
private final HashMap<String, FunctionOperator> functionNameMap;
// Singleton interface
public static OpcodeRegistry instance() {
return instance;
}
/**
* Static utility functions
*/
public static boolean isBitwiseOperation(FunctionOperator operator) {
return operator == FunctionOperator.BITAND || operator == FunctionOperator.BITNOT ||
operator == FunctionOperator.BITOR || operator == FunctionOperator.BITXOR;
}
/**
* Contains all the information about a function signature.
*/
public static class Signature {
public TExprOpcode opcode;
public FunctionOperator operator;
public PrimitiveType returnType;
public PrimitiveType argTypes[];
// Constructor for searching, specifying the op and arguments
public Signature(FunctionOperator operator, PrimitiveType[] args) {
this.operator = operator;
this.argTypes = args;
}
private Signature(TExprOpcode opcode, FunctionOperator operator,
PrimitiveType ret, PrimitiveType[] args) {
this.operator = operator;
this.opcode = opcode;
this.returnType = ret;
this.argTypes = args;
}
/**
* Returns if the 'this' signature is compatible with the 'other' signature. The op
* and number of arguments must match and it must be allowed to implicitly cast
* each argument of this signature to the matching argument in 'other'
*/
public boolean isCompatible(Signature other) {
if (other.argTypes.length != this.argTypes.length) {
return false;
}
for (int i = 0; i < this.argTypes.length; ++i) {
if (!PrimitiveType.isImplicitlyCastable(this.argTypes[i], other.argTypes[i])) {
return false;
}
}
return true;
}
@Override
/**
* Signature are equal with C++/Java function signature semantics. They are
* equal if the operation and all the arguments are the same.
*/
public boolean equals(Object o) {
if (o == null || !(o instanceof Signature)) {
return false;
}
Signature s = (Signature) o;
if (s.argTypes.length != this.argTypes.length) {
return false;
}
if (s.argTypes == null && this.argTypes == null) {
return true;
}
for (int i = 0; i < this.argTypes.length; ++i) {
if (s.argTypes[i] != this.argTypes[i]) {
return false;
}
}
return true;
}
}
/**
* Returns the function operator enum. The lookup is case insensitive.
* (i.e. "Substring" --> TExprOperator.STRING_SUBSTR).
* Returns INVALID_OP is that function name is unknown.
*/
public FunctionOperator getFunctionOperator(String fnName) {
String lookup = fnName.toLowerCase();
if (functionNameMap.containsKey(lookup)) {
return functionNameMap.get(lookup);
}
return FunctionOperator.INVALID_OPERATOR;
}
/**
* Query for a function in the registry, specifying the operation, 'op', and the arguments.
* If there is no matching signature, null will be returned.
* If there is a match, the matching signature will be returned. The matching signature does
* not have to match the input identically, implicit type promotion is allowed.
*/
public Signature getFunctionInfo(FunctionOperator op, PrimitiveType ... argTypes) {
Pair<FunctionOperator, Integer> lookup = Pair.create(op, argTypes.length);
if (operations.containsKey(lookup)) {
List<Signature> signatures = operations.get(lookup);
Signature compatibleMatch = null;
Signature search = new Signature(op, argTypes);
for (Signature signature : signatures) {
if (search.equals(signature)) {
return signature;
} else if (compatibleMatch == null && search.isCompatible(signature)) {
compatibleMatch = signature;
}
}
return compatibleMatch;
}
return null;
}
/**
* Add a function with the specified opcode/signature to the registry.
*/
public boolean add(FunctionOperator op, TExprOpcode opcode, PrimitiveType retType, PrimitiveType ... args) {
List<Signature> signatures;
Pair<FunctionOperator, Integer> lookup = Pair.create(op, args.length);
if (operations.containsKey(lookup)) {
signatures = operations.get(lookup);
} else {
signatures = new ArrayList<Signature>();
operations.put(lookup, signatures);
}
Signature signature = new Signature(opcode, op, retType, args);
if (signatures.contains(signature)) {
LOG.error("OpcodeRegistry: Function already exists: " + opcode);
return false;
}
signatures.add(signature);
return true;
}
public boolean addFunctionMapping(String functionName, FunctionOperator op) {
if (functionNameMap.containsKey(functionName)) {
LOG.error("OpcodeRegistry: Function mapping already exists: " + functionName);
return false;
}
functionNameMap.put(functionName, op);
return true;
}
// Singleton interface, don't call the constructor
private OpcodeRegistry() {
operations = Maps.newHashMap();
functionNameMap = Maps.newHashMap();
// Add all the function signatures to the registry and the function name(string)
// to FunctionOperator mapping
FunctionRegistry.InitFunctions(this);
}
}

View File

@@ -78,7 +78,7 @@ public class StringLiteral extends LiteralExpr {
// Currently we can't handle string literals containing full fledged expressions
// which are implicitly cast to a numeric literal. This would require invoking the parser.
sym = scanner.next_token();
while (sym.sym == SqlParserSymbols.MINUS) {
while (sym.sym == SqlParserSymbols.SUBTRACT) {
multiplier *= -1;
sym = scanner.next_token();
}

View File

@@ -4,7 +4,6 @@ package com.cloudera.impala.analysis;
import java.util.List;
import com.cloudera.impala.catalog.Catalog;
import com.cloudera.impala.catalog.Table;
import com.cloudera.impala.common.AnalysisException;
import com.google.common.base.Joiner;

View File

@@ -235,6 +235,14 @@ public enum PrimitiveType {
return result;
}
/**
* Returns if it is compatible to implicitly cast from t1 to t2 (casting from
* t1 to t2 results in no loss of precision.
*/
public static boolean isImplicitlyCastable(PrimitiveType t1, PrimitiveType t2) {
return getAssignmentCompatibleType(t1, t2) == t2;
}
// Returns the highest resolution type
// corresponding to the lexer symbol of numeric literals.
// Currently used to determine whether the literal is fixed or floating point.

View File

@@ -14,6 +14,26 @@ public class Pair<F, S> {
this.second = second;
}
@Override
/**
* A pair is equal if both parts are equal().
*/
public boolean equals(Object o) {
if (o instanceof Pair) {
Pair<F,S> other = (Pair<F,S>) o;
return this.first.equals(other.first) && this.second.equals(other.second);
}
return false;
}
@Override
public int hashCode() {
int hashFirst = first != null ? first.hashCode() : 0;
int hashSecond = second != null ? second.hashCode() : 0;
return (hashFirst + hashSecond) * hashSecond + hashFirst;
}
static public <F, S> Pair<F, S> create(F first, S second) {
return new Pair<F, S>(first, second);
}

View File

@@ -17,7 +17,6 @@ import com.cloudera.impala.analysis.Predicate;
import com.cloudera.impala.analysis.SlotDescriptor;
import com.cloudera.impala.analysis.StringLiteral;
import com.cloudera.impala.analysis.TupleDescriptor;
import com.cloudera.impala.analysis.BinaryPredicate.Operator;
import com.cloudera.impala.catalog.HBaseColumn;
import com.cloudera.impala.catalog.HBaseTable;
import com.cloudera.impala.catalog.PrimitiveType;
@@ -214,7 +213,7 @@ public class HBaseScanNode extends ScanNode {
return result.toString();
}
private static CompareFilter.CompareOp impalaOpToHBaseOp(Operator impalaOp) {
private static CompareFilter.CompareOp impalaOpToHBaseOp(BinaryPredicate.Operator impalaOp) {
switch(impalaOp) {
case EQ: return CompareFilter.CompareOp.EQUAL;
case NE: return CompareFilter.CompareOp.NOT_EQUAL;

View File

@@ -36,7 +36,7 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
public class Executor {
private final static Logger LOG = LoggerFactory.getLogger(ValueRange.class);
private final static Logger LOG = LoggerFactory.getLogger(Executor.class);
public static final boolean DEFAULT_ABORT_ON_ERROR = false;
public static final int DEFAULT_MAX_ERRORS = 100;

View File

@@ -116,7 +116,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols;
tokenIdMap.put(new Integer(SqlParserSymbols.UNMATCHED_STRING_LITERAL),
"UNMATCHED STRING LITERAL");
tokenIdMap.put(new Integer(SqlParserSymbols.MOD), "%");
tokenIdMap.put(new Integer(SqlParserSymbols.PLUS), "+");
tokenIdMap.put(new Integer(SqlParserSymbols.ADD), "+");
tokenIdMap.put(new Integer(SqlParserSymbols.DIVIDE), "/");
tokenIdMap.put(new Integer(SqlParserSymbols.EQUAL), "=");
tokenIdMap.put(new Integer(SqlParserSymbols.STAR), "*");
@@ -124,7 +124,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols;
tokenIdMap.put(new Integer(SqlParserSymbols.DOT), ".");
tokenIdMap.put(new Integer(SqlParserSymbols.STRING_LITERAL), "STRING LITERAL");
tokenIdMap.put(new Integer(SqlParserSymbols.EOF), "EOF");
tokenIdMap.put(new Integer(SqlParserSymbols.MINUS), "-");
tokenIdMap.put(new Integer(SqlParserSymbols.SUBTRACT), "-");
tokenIdMap.put(new Integer(SqlParserSymbols.BITAND), "&");
tokenIdMap.put(new Integer(SqlParserSymbols.error), "ERROR");
tokenIdMap.put(new Integer(SqlParserSymbols.BITXOR), "^");
@@ -171,8 +171,8 @@ EndOfLineComment = "--" {NonTerminator}* {LineTerminator}?
")" { return newToken(SqlParserSymbols.RPAREN, null); }
"/" { return newToken(SqlParserSymbols.DIVIDE, null); }
"%" { return newToken(SqlParserSymbols.MOD, null); }
"+" { return newToken(SqlParserSymbols.PLUS, null); }
"-" { return newToken(SqlParserSymbols.MINUS, null); }
"+" { return newToken(SqlParserSymbols.ADD, null); }
"-" { return newToken(SqlParserSymbols.SUBTRACT, null); }
"&" { return newToken(SqlParserSymbols.BITAND, null); }
"|" { return newToken(SqlParserSymbols.BITOR, null); }
"^" { return newToken(SqlParserSymbols.BITXOR, null); }

View File

@@ -4,6 +4,7 @@ namespace cpp impala
namespace java com.cloudera.impala.thrift
include "Types.thrift"
include "Opcodes.thrift"
enum TExprNodeType {
AGG_EXPR,
@@ -25,53 +26,18 @@ enum TExprNodeType {
STRING_LITERAL,
}
// op-codes for all expr operators
enum TExprOperator {
INVALID_OP,
// AggregateExpr
AGG_COUNT,
AGG_MIN,
AGG_MAX,
AGG_SUM,
// AGG_AVG is not executable
// ArithmeticExpr
MULTIPLY,
DIVIDE,
MOD,
INT_DIVIDE,
PLUS,
MINUS,
BITAND,
BITOR,
BITXOR,
BITNOT,
// BinaryPredicate
EQ,
NE,
LE,
GE,
LT,
GT,
// CompoundPredicate
AND,
OR,
NOT,
// LIKE predicate
LIKE,
REGEXP,
// function opcodes
enum TAggregationOp {
INVALID,
COUNT,
MAX,
MIN,
SUM,
}
struct TAggregateExpr {
1: required bool is_star
2: required bool is_distinct
3: required TAggregationOp op
}
struct TBoolLiteral {
@@ -120,7 +86,7 @@ struct TStringLiteral {
struct TExprNode {
1: required TExprNodeType node_type
2: required Types.TPrimitiveType type
3: optional TExprOperator op
3: optional Opcodes.TExprOpcode opcode
4: required i32 num_children
5: optional TAggregateExpr agg_expr

View File

@@ -664,13 +664,13 @@ public class AnalyzerTest {
PrimitiveType promotedType = compatibleType.getMaxResolutionType();
// +, -, *
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.PLUS, null,
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.ADD, null,
promotedType);
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.PLUS, null,
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.ADD, null,
promotedType);
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MINUS, null,
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.SUBTRACT, null,
promotedType);
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MINUS, null,
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.SUBTRACT, null,
promotedType);
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MULTIPLY, null,
promotedType);
@@ -683,16 +683,14 @@ public class AnalyzerTest {
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.DIVIDE, null,
PrimitiveType.DOUBLE);
// %
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MOD, null,
compatibleType);
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MOD, null,
compatibleType);
// div, &, |, ^ only for fixed-point types
// % div, &, |, ^ only for fixed-point types
if (!type1.isFixedPointType() || !type2.isFixedPointType()) {
continue;
}
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MOD, null,
compatibleType);
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MOD, null,
compatibleType);
typeCastTest(type1, type2, false, ArithmeticExpr.Operator.INT_DIVIDE, null,
compatibleType);
typeCastTest(type1, type2, true, ArithmeticExpr.Operator.INT_DIVIDE, null,

View File

@@ -54,6 +54,8 @@ public class ExecutorTest {
@Test
public void runTest() throws ImpalaException {
runTestSuccess("select substring(\"Hello World\", 0)", 1);
runTestSuccess("select int_col+bigint_col from alltypessmall limit 1", 1);
runTestSuccess("select year, tinyint_col, int_col, id from alltypessmall", 100);
runTestSuccess("select sum(double_col), count(double_col), avg(double_col) " +
"from alltypessmall", 1);