Implmented opcode registry. Added substr() and pi() functions. Added backend testing to buildall.sh

2025-12-19 18:12:08 -05:00 · 2011-11-16 15:49:42 -08:00
parent a8acd52281
commit b1833d4de8
70 changed files with 1493 additions and 1124 deletions
--- a/be/.gitignore
+++ b/be/.gitignore
@@ -7,9 +7,8 @@ CMakeFiles
 cmake_install.cmake
 CTestTestfile.cmake
 !CMakeLists.txt
+Testing/

 build
 generated-sources
-src/exprs/functions.cc
-src/exprs/functions.h

--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -114,6 +114,7 @@ add_subdirectory(src/testutil)
 add_subdirectory(src/util)

 add_subdirectory(generated-sources/gen-cpp)
+add_subdirectory(generated-sources/opcode)

 link_directories(
  ${CMAKE_SOURCE_DIR}/build/common
--- a/be/generated-sources/gen-cpp/CMakeLists.txt
+++ b/be/generated-sources/gen-cpp/CMakeLists.txt
@@ -20,12 +20,14 @@ add_library(ImpalaThrift
  ImpalaService_types.cpp
  Descriptors_constants.cpp
  Descriptors_types.cpp
+  Exprs_constants.cpp
+  Exprs_types.cpp
+  Opcodes_constants.cpp
+  Opcodes_types.cpp
  PlanNodes_constants.cpp
  PlanNodes_types.cpp
  Types_constants.cpp
  Types_types.cpp
-  Exprs_constants.cpp
-  Exprs_types.cpp
 )

 add_library(thrift STATIC IMPORTED)
--- a/be/src/exec/aggregation-node.cc
+++ b/be/src/exec/aggregation-node.cc
@@ -240,7 +240,7 @@ AggregationTuple* AggregationNode::ConstructAggTuple(TupleRow* row) {
  // (so that SUM(<col>) stays NULL if <col> only contains NULL values).
  for (int i = 0; i < aggregate_exprs_.size(); ++i, ++slot_d) {
    AggregateExpr* agg_expr = static_cast<AggregateExpr*>(aggregate_exprs_[i]);
-    if (agg_expr->op() == TExprOperator::AGG_COUNT) {
+    if (agg_expr->agg_op() == TAggregationOp::COUNT) {
      // we're only aggregating into bigint slots and never return NULL
      *reinterpret_cast<int64_t*>(agg_tuple->GetSlot((*slot_d)->tuple_offset())) = 0;
    } else {
@@ -360,7 +360,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
    }

    // deal with COUNT(*) separately (no need to check the actual child expr value)
-    if (agg_expr->op() == TExprOperator::AGG_COUNT && agg_expr->is_star()) {
+    if (agg_expr->agg_op() == TAggregationOp::COUNT && agg_expr->is_star()) {
      // we're only aggregating into bigint slots
      DCHECK_EQ((*slot_d)->type(), TYPE_BIGINT);
      ++*reinterpret_cast<int64_t*>(slot);
@@ -374,12 +374,12 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
      continue;
    }

-    switch (agg_expr->op()) {
-      case TExprOperator::AGG_COUNT:
+    switch (agg_expr->agg_op()) {
+      case TAggregationOp::COUNT:
        ++*reinterpret_cast<int64_t*>(slot);
        break;

-      case TExprOperator::AGG_MIN:
+      case TAggregationOp::MIN:
        switch (agg_expr->type()) {
          case TYPE_BOOLEAN:
            UpdateMinSlot<bool>(tuple, (*slot_d)->null_indicator_offset(), slot, value);
@@ -411,7 +411,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
        };
        break;

-      case TExprOperator::AGG_MAX:
+      case TAggregationOp::MAX:
        switch (agg_expr->type()) {
          case TYPE_BOOLEAN:
            UpdateMaxSlot<bool>(tuple, (*slot_d)->null_indicator_offset(), slot, value);
@@ -443,7 +443,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
        };
        break;

-      case TExprOperator::AGG_SUM:
+      case TAggregationOp::SUM:
        switch (agg_expr->type()) {
          case TYPE_BOOLEAN:
            UpdateSumSlot<bool>(tuple, (*slot_d)->null_indicator_offset(), slot, value);
@@ -472,7 +472,7 @@ void AggregationNode::UpdateAggTuple(AggregationTuple* agg_out_tuple, TupleRow*
        break;

      default:
-        DCHECK(false) << "bad aggregate operator: " << agg_expr->op();
+        DCHECK(false) << "bad aggregate operator: " << agg_expr->agg_op();
    }
  }
 }
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -18,14 +18,16 @@ add_library(Exprs
  expr.cc
  float-literal.cc
  function-call.cc
-  functions.cc
  int-literal.cc
  is-null-predicate.cc
  like-predicate.cc
  literal-predicate.cc
+  math-functions.cc
  null-literal.cc
+  opcode-registry.cc
  slot-ref.cc
  string-literal.cc
+  string-functions.cc
 )

 target_link_libraries(Exprs
@@ -33,22 +35,23 @@ target_link_libraries(Exprs
  boost_regex-mt
 )

-add_custom_command(
-  OUTPUT functions.cc
-  COMMAND "./gen-functions.py"
-)
-
 add_executable(expr-test
  expr-test.cc
 )

+# The order of link libaries matters.  Specifically:
+#  - Util must come before ImpalaThrift
+#  - Exprs & Opcode have a circular dependency (Exprs is included twice)
 target_link_libraries(expr-test
  TestUtil
  Service
  Exec
  Exprs
+  Opcode
+  Exprs
  Runtime
  Common
+  Util
  thrift
  ImpalaThrift
  MockHdfs
--- a/be/src/exprs/agg-expr.cc
+++ b/be/src/exprs/agg-expr.cc
@@ -11,14 +11,24 @@ namespace impala {

 AggregateExpr::AggregateExpr(const TExprNode& node)
  : Expr(node),
-    op_(node.op),
+    agg_op_(node.agg_expr.op),
    is_star_(node.agg_expr.is_star),
    is_distinct_(node.agg_expr.is_distinct) {
 }

+Status AggregateExpr::Prepare(RuntimeState* state, const RowDescriptor& desc) {
+  RETURN_IF_ERROR(Expr::PrepareChildren(state, desc));
+  if (agg_op_ == TAggregationOp::INVALID) {
+    stringstream out;
+    out << "AggregateExpr::Prepare: Invalid aggregation op: " << agg_op_;
+    return Status(out.str());
+  }
+  return Status::OK;
+}
+
 string AggregateExpr::DebugString() const {
  stringstream out;
-  out << "AggExpr(op=" << op_ << " star=" << is_star_ << " distinct=" << is_distinct_
+  out << "AggExpr(star=" << is_star_ << " distinct=" << is_distinct_
      << " " << Expr::DebugString() << ")";
  return out.str();
 }
--- a/be/src/exprs/agg-expr.h
+++ b/be/src/exprs/agg-expr.h
@@ -13,7 +13,7 @@ class TExprNode;

 class AggregateExpr: public Expr {
 public:
-  TExprOperator::type op() const { return op_; }
+  TAggregationOp::type agg_op() const { return agg_op_; }
  bool is_star() const { return is_star_; }
  bool is_distinct() const { return is_distinct_; }
  virtual std::string DebugString() const;
@@ -21,10 +21,11 @@ class AggregateExpr: public Expr {
 protected:
  friend class Expr;

+  virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
  AggregateExpr(const TExprNode& node);

 private:
-  const TExprOperator::type  op_;
+  const TAggregationOp::type agg_op_;
  const bool is_star_;
  const bool is_distinct_;
 };
--- a/be/src/exprs/arithmetic-expr.cc
+++ b/be/src/exprs/arithmetic-expr.cc
@@ -4,7 +4,6 @@
 #include <glog/logging.h>

 #include "exprs/arithmetic-expr.h"
-#include "exprs/functions.h"
 #include "util/debug-util.h"
 #include "gen-cpp/Exprs_types.h"

@@ -13,222 +12,17 @@ using namespace std;
 namespace impala {

 ArithmeticExpr::ArithmeticExpr(const TExprNode& node)
-  : Expr(node), op_(node.op) {
+  : Expr(node) {
 }

-// TODO: replace this with a generic function registry
-// (registered by opcode and parameter types)
-Status ArithmeticExpr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
-  DCHECK(type_ != INVALID_TYPE);
+Status ArithmeticExpr::Prepare(RuntimeState* state, const RowDescriptor& desc) {
  DCHECK_LE(children_.size(), 2);
-  DCHECK(children_.size() == 1 || children_[0]->type() == children_[1]->type());
-  switch (op_) {
-    case TExprOperator::MULTIPLY:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_multiply_double;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad MULTIPLY type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::DIVIDE:
-      // in "<expr> / <expr>", operands are always cast to double
-      assert(type_ == TYPE_DOUBLE
-          && children_[0]->type() == TYPE_DOUBLE 
-          && children_[1]->type() == TYPE_DOUBLE);
-      compute_function_ = GetValueFunctions::ArithmeticExpr_divide_double;
-      return Status::OK;
-
-    case TExprOperator::MOD:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_mod_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_mod_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_mod_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_mod_long;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad MOD type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::INT_DIVIDE:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_divide_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_divide_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_divide_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_divide_long;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad INT_DIVIDE type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::PLUS:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_add_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_add_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_add_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_add_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_add_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_add_double;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad PLUS type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::MINUS:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_subtract_double;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad MINUS type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::BITAND:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitand_long;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad BITAND type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::BITOR:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitor_long;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad BITOR type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::BITXOR:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitxor_long;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad BITXOR type: " << type();
-      }
-      return Status::OK;
-
-    case TExprOperator::BITNOT:
-      switch (type()) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::ArithmeticExpr_bitnot_long;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad BITNOT type: " << type();
-      }
-      return Status::OK;
-    default:
-      DCHECK(false) << "bad arithmetic op: " << op_;
-  }
-  return Status::OK;
+  return Expr::Prepare(state, desc);
 }

 string ArithmeticExpr::DebugString() const {
  stringstream out;
-  out << "ArithmeticExpr(op=" << op_ << " " << Expr::DebugString() << ")";
+  out << "ArithmeticExpr(" << Expr::DebugString() << ")";
  return out.str();
 }

--- a/be/src/exprs/arithmetic-expr.h
+++ b/be/src/exprs/arithmetic-expr.h
@@ -15,13 +15,10 @@ class ArithmeticExpr: public Expr {
 protected:
  friend class Expr;

+  virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
  ArithmeticExpr(const TExprNode& node);

-  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
  virtual std::string DebugString() const;
-
- private:
-  const TExprOperator::type op_;
 };

 }
--- a/be/src/exprs/binary-predicate.cc
+++ b/be/src/exprs/binary-predicate.cc
@@ -4,7 +4,6 @@
 #include <glog/logging.h>

 #include "exprs/binary-predicate.h"
-#include "exprs/functions.h"
 #include "util/debug-util.h"
 #include "gen-cpp/Exprs_types.h"

@@ -13,210 +12,17 @@ using namespace std;
 namespace impala {

 BinaryPredicate::BinaryPredicate(const TExprNode& node)
-  : Predicate(node), op_(node.op) {
+  : Predicate(node) {
 }

-Status BinaryPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
-  PrimitiveType op_type = children_[0]->type();
-  DCHECK(type_ != INVALID_TYPE);
+Status BinaryPredicate::Prepare(RuntimeState* state, const RowDescriptor& desc) {
  DCHECK_EQ(children_.size(), 2);
-  switch (op_) {
-    case TExprOperator::EQ:
-      switch (op_type) {
-        case TYPE_BOOLEAN:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_bool;
-          return Status::OK;
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::BinaryPredicate_eq_fn_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad EQ type: " << TypeToString(op_type);
-      }
-      return Status::OK;
-
-    case TExprOperator::NE:
-      switch (op_type) {
-        case TYPE_BOOLEAN:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_bool;
-          return Status::OK;
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ne_fn_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad NE type: " << TypeToString(op_type);
-      }
-      return Status::OK;
-
-    case TExprOperator::LE:
-      switch (op_type) {
-        case TYPE_BOOLEAN:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_bool;
-          return Status::OK;
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::BinaryPredicate_le_fn_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad LE type: " << TypeToString(op_type);
-      }
-      return Status::OK;
-
-    case TExprOperator::GE:
-      switch (op_type) {
-        case TYPE_BOOLEAN:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_bool;
-          return Status::OK;
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::BinaryPredicate_ge_fn_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad GE type: " << TypeToString(op_type);
-      }
-      return Status::OK;
-
-    case TExprOperator::LT:
-      switch (op_type) {
-        case TYPE_BOOLEAN:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_bool;
-          return Status::OK;
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::BinaryPredicate_lt_fn_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad LT type: " << TypeToString(op_type);
-      }
-      return Status::OK;
-
-    case TExprOperator::GT:
-      switch (op_type) {
-        case TYPE_BOOLEAN:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_bool;
-          return Status::OK;
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::BinaryPredicate_gt_fn_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad GT type: " << TypeToString(op_type);
-      }
-      return Status::OK;
-
-    default:
-      DCHECK(false) << "bad binary predicate op: " << op_;
-  }
-  return Status::OK;
+  return Expr::Prepare(state, desc);
 }

 string BinaryPredicate::DebugString() const {
  stringstream out;
-  out << "BinaryPredicate(op=" << op_ << " " << Expr::DebugString() << ")";
+  out << "BinaryPredicate(" << Expr::DebugString() << ")";
  return out.str();
 }

--- a/be/src/exprs/binary-predicate.h
+++ b/be/src/exprs/binary-predicate.h
@@ -15,11 +15,8 @@ class BinaryPredicate : public Predicate {

  BinaryPredicate(const TExprNode& node);

-  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
+  virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
  virtual std::string DebugString() const;
-
- private:
-  const TExprOperator::type  op_;
 };

 }
--- a/be/src/exprs/bool-literal.cc
+++ b/be/src/exprs/bool-literal.cc
@@ -26,7 +26,7 @@ void* BoolLiteral::ReturnValue(Expr* e, TupleRow* row) {
 }

 Status BoolLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  DCHECK_EQ(children_.size(), 0);
  compute_function_ = ReturnValue;
  return Status::OK;
 }
--- a/be/src/exprs/case-expr.cc
+++ b/be/src/exprs/case-expr.cc
@@ -20,7 +20,6 @@ CaseExpr::CaseExpr(const TExprNode& node)
 }

 Status CaseExpr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
  compute_function_ = ComputeFunction;
  return Status::OK;
 }
--- a/be/src/exprs/case-expr.h
+++ b/be/src/exprs/case-expr.h
@@ -23,6 +23,7 @@ class CaseExpr: public Expr {
  const bool has_case_expr_;
  const bool has_else_expr_;

+
  static void* ComputeFunction(Expr* e, TupleRow* row);
 };

--- a/be/src/exprs/cast-expr.cc
+++ b/be/src/exprs/cast-expr.cc
@@ -4,205 +4,19 @@
 #include <glog/logging.h>

 #include "exprs/cast-expr.h"
-#include "exprs/functions.h"
-
 #include "gen-cpp/Exprs_types.h"

 using namespace std;

 namespace impala {

-// TODO: generate cast eval functions between all legal combinations of source
-// and target type
-
 CastExpr::CastExpr(const TExprNode& node)
  : Expr(node) {
 }

-Status CastExpr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
-  DCHECK(type_ != INVALID_TYPE);
-  DCHECK_LE(children_.size(), 1);
-  switch (children_[0]->type()) {
-    case TYPE_TINYINT:
-      switch (type_) {
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::Cast_char_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::Cast_char_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::Cast_char_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::Cast_char_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::Cast_char_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::Cast_char_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    case TYPE_SMALLINT:
-      switch (type_) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::Cast_short_char;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::Cast_short_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::Cast_short_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::Cast_short_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::Cast_short_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::Cast_short_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    case TYPE_INT:
-      switch (type_) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::Cast_int_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::Cast_int_short;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::Cast_int_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::Cast_int_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::Cast_int_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::Cast_int_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    case TYPE_BIGINT:
-      switch (type_) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::Cast_long_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::Cast_long_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::Cast_long_int;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::Cast_long_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::Cast_long_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::Cast_long_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    case TYPE_FLOAT:
-      switch (type_) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::Cast_float_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::Cast_float_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::Cast_float_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::Cast_float_long;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::Cast_float_double;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::Cast_float_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    case TYPE_DOUBLE:
-      switch (type_) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::Cast_double_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::Cast_double_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::Cast_double_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::Cast_double_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::Cast_double_float;
-          return Status::OK;
-        case TYPE_STRING:
-          compute_function_ = GetValueFunctions::Cast_double_StringValue;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    case TYPE_STRING:
-      switch (type_) {
-        case TYPE_TINYINT:
-          compute_function_ = GetValueFunctions::Cast_StringValue_char;
-          return Status::OK;
-        case TYPE_SMALLINT:
-          compute_function_ = GetValueFunctions::Cast_StringValue_short;
-          return Status::OK;
-        case TYPE_INT:
-          compute_function_ = GetValueFunctions::Cast_StringValue_int;
-          return Status::OK;
-        case TYPE_BIGINT:
-          compute_function_ = GetValueFunctions::Cast_StringValue_long;
-          return Status::OK;
-        case TYPE_FLOAT:
-          compute_function_ = GetValueFunctions::Cast_StringValue_float;
-          return Status::OK;
-        case TYPE_DOUBLE:
-          compute_function_ = GetValueFunctions::Cast_StringValue_double;
-          return Status::OK;
-        default:
-          DCHECK(false) << "bad cast type: " << TypeToString(type_);
-      }
-      return Status::OK;
-
-    default:
-      DCHECK(false) << "bad cast child type: " << TypeToString(children_[0]->type());
-  }
-  return Status::OK;
+Status CastExpr::Prepare(RuntimeState* state, const RowDescriptor& desc) {
+  DCHECK_EQ(children_.size(), 1);
+  return Expr::Prepare(state, desc);
 }

 string CastExpr::DebugString() const {
--- a/be/src/exprs/cast-expr.h
+++ b/be/src/exprs/cast-expr.h
@@ -12,7 +12,7 @@ class TExprNode;

 class CastExpr: public Expr {
 public:
-  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
+  virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
  virtual std::string DebugString() const;

 protected:
--- a/be/src/exprs/compound-predicate.cc
+++ b/be/src/exprs/compound-predicate.cc
@@ -11,13 +11,18 @@ using namespace std;
 namespace impala {

 CompoundPredicate::CompoundPredicate(const TExprNode& node)
-  : Predicate(node), op_(node.op) {
+  : Predicate(node) {
+}
+
+Status CompoundPredicate::Prepare(RuntimeState* state, const RowDescriptor& desc) {
+  DCHECK_LE(children_.size(), 2);
+  return Expr::Prepare(state, desc);
 }

 void* CompoundPredicate::AndComputeFunction(Expr* e, TupleRow* row) {
  CompoundPredicate* p = static_cast<CompoundPredicate*>(e);
-  // assert(p->children_.size() == 2);
-  // assert(p->op_ == TExprOperator::AND);
+  DCHECK_EQ(p->children_.size(), 2);
+  DCHECK_EQ(p->opcode_, TExprOpcode::COMPOUND_AND);
  Expr* op1 = e->children()[0];
  bool* val1 = reinterpret_cast<bool*>(op1->GetValue(row));
  Expr* op2 = e->children()[1];
@@ -37,8 +42,8 @@ void* CompoundPredicate::AndComputeFunction(Expr* e, TupleRow* row) {

 void* CompoundPredicate::OrComputeFunction(Expr* e, TupleRow* row) {
  CompoundPredicate* p = static_cast<CompoundPredicate*>(e);
-  // assert(p->children_.size() == 2);
-  // assert(p->op_ == TExprOperator::OR);
+  DCHECK_EQ(p->children_.size(), 2);
+  DCHECK_EQ(p->opcode_, TExprOpcode::COMPOUND_OR);
  Expr* op1 = e->children()[0];
  bool* val1 = reinterpret_cast<bool*>(op1->GetValue(row));
  Expr* op2 = e->children()[1];
@@ -58,8 +63,8 @@ void* CompoundPredicate::OrComputeFunction(Expr* e, TupleRow* row) {

 void* CompoundPredicate::NotComputeFunction(Expr* e, TupleRow* row) {
  CompoundPredicate* p = static_cast<CompoundPredicate*>(e);
-  // assert(p->children_.size() == 1);
-  // assert(p->op_ == TExprOperator::NOT);
+  DCHECK_EQ(p->children_.size(), 1);
+  DCHECK_EQ(p->opcode_, TExprOpcode::COMPOUND_NOT);
  Expr* op = e->children()[0];
  bool* val = reinterpret_cast<bool*>(op->GetValue(row));
  if (val == NULL) return NULL;
@@ -67,29 +72,9 @@ void* CompoundPredicate::NotComputeFunction(Expr* e, TupleRow* row) {
  return &p->result_.bool_val;
 }

-Status CompoundPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
-  DCHECK(type_ != INVALID_TYPE);
-  DCHECK_LE(children_.size(), 2);
-  switch (op_) {
-    case TExprOperator::AND:
-      compute_function_ = AndComputeFunction;
-      return Status::OK;
-    case TExprOperator::OR:
-      compute_function_ = OrComputeFunction;
-      return Status::OK;
-    case TExprOperator::NOT:
-      compute_function_ = NotComputeFunction;
-      return Status::OK;
-    default:
-      DCHECK(false) << "Invalid compound predicate op: " << op_;
-  }
-  return Status::OK;
-}
-
 string CompoundPredicate::DebugString() const {
  stringstream out;
-  out << "CompoundPredicate(op=" << op_ << " " << Expr::DebugString() << ")";
+  out << "CompoundPredicate(" << Expr::DebugString() << ")";
  return out.str();
 }

--- a/be/src/exprs/compound-predicate.h
+++ b/be/src/exprs/compound-predicate.h
@@ -15,11 +15,11 @@ class CompoundPredicate: public Predicate {

  CompoundPredicate(const TExprNode& node);

-  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
+  virtual Status Prepare(RuntimeState* state, const RowDescriptor& desc);
  virtual std::string DebugString() const;

 private:
-  const TExprOperator::type op_;
+  friend class OpcodeRegistry;

  static void* AndComputeFunction(Expr* e, TupleRow* row);
  static void* OrComputeFunction(Expr* e, TupleRow* row);
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -558,6 +558,22 @@ TEST_F(ExprTest, LikePredicate) {
  TestValue("'\\\\a' LIKE '\\\\\\_'", TYPE_BOOLEAN, false);
 }

+TEST_F(ExprTest, StringFunctions) {
+  TestStringValue("substring('Hello', 1)", "Hello");
+  TestStringValue("substring('Hello', -2)", "lo");
+  TestStringValue("substring('Hello', 0)", "");
+  TestStringValue("substring('Hello', -5)", "Hello");
+  TestStringValue("substring('Hello', -6)", "");
+  TestStringValue("substring('Hello', 100)", "");
+  TestStringValue("substring('Hello', 1, 1)", "H");
+  TestStringValue("substring('Hello', 2, 100)", "ello");
+  TestStringValue("substring('Hello', -3, 2)", "ll");
+  //TODO: this NULLs, currently we can't parse them inside function calls
+}
+
+TEST_F(ExprTest, MathFunctions) {
+  TestValue("pi()", TYPE_DOUBLE, M_PI);
+}
 }

 int main(int argc, char **argv) {
--- a/be/src/exprs/expr.cc
+++ b/be/src/exprs/expr.cc
@@ -21,6 +21,7 @@
 #include "exprs/like-predicate.h"
 #include "exprs/literal-predicate.h"
 #include "exprs/null-literal.h"
+#include "exprs/opcode-registry.h"
 #include "exprs/string-literal.h"
 #include "gen-cpp/Exprs_types.h"
 #include "gen-cpp/ImpalaService_types.h"
@@ -37,17 +38,20 @@ bool ParseString(const string& str, T* val) {
 }

 Expr::Expr(PrimitiveType type)
-    : is_slotref_(false),
+    : opcode_(TExprOpcode::INVALID_OPCODE),
+      is_slotref_(false),
      type_(type) {
 }

 Expr::Expr(const TExprNode& node)
-    : is_slotref_(false),
+    : opcode_(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE),
+      is_slotref_(false),
      type_(ThriftToType(node.type)) {
 }

 Expr::Expr(const TExprNode& node, bool is_slotref)
-    : is_slotref_(is_slotref),
+    : opcode_(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE),
+      is_slotref_(is_slotref),
      type_(ThriftToType(node.type)) {
 }

@@ -190,16 +194,10 @@ Status Expr::CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** ex
      return Status::OK;
    }
    case TExprNodeType::ARITHMETIC_EXPR: {
-      if (!texpr_node.__isset.op) {
-        return Status("Arithmetic expression not set in thrift node");
-      }
      *expr = pool->Add(new ArithmeticExpr(texpr_node));
      return Status::OK;
    }
    case TExprNodeType::BINARY_PRED: {
-      if (!texpr_node.__isset.op) {
-        return Status("Binary predicate not set in thrift node");
-      }
      *expr = pool->Add(new BinaryPredicate(texpr_node));
      return Status::OK;
    }
@@ -258,9 +256,6 @@ Status Expr::CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** ex
      return Status::OK;
    }
    case TExprNodeType::LIKE_PRED: {
-      if (!texpr_node.__isset.op) {
-        return Status("Like predicate not set in thrift node");
-      }
      *expr = pool->Add(new LikePredicate(texpr_node));
      return Status::OK;
    }
@@ -356,13 +351,27 @@ void Expr::PrintValue(void* value, string* str) {
  RawValue::PrintValue(value, type_, str);
 }

-Status Expr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
+Status Expr::PrepareChildren(RuntimeState* state, const RowDescriptor& row_desc) {
+  DCHECK(type_ != INVALID_TYPE);
  for (int i = 0; i < children_.size(); ++i) {
    RETURN_IF_ERROR(children_[i]->Prepare(state, row_desc));
  }
  return Status::OK;
 }

+Status Expr::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
+  PrepareChildren(state, row_desc);
+  // Not all exprs have opcodes (i.e. literals, agg-exprs)
+  DCHECK(opcode_ != TExprOpcode::INVALID_OPCODE);
+  compute_function_ = OpcodeRegistry::Instance()->GetFunction(opcode_);
+  if (compute_function_ == NULL) {
+    stringstream out;
+    out << "Expr::Prepare(): Opcode: " << opcode_ << " does not have a registry entry. ";
+    return Status(out.str());
+  }
+  return Status::OK;
+}
+
 Status Expr::Prepare(const std::vector<Expr*>& exprs, RuntimeState* state,
                     const RowDescriptor& row_desc) {
  for (int i = 0; i < exprs.size(); ++i) {
@@ -375,6 +384,9 @@ string Expr::DebugString() const {
  // TODO: implement partial debug string for member vars
  stringstream out;
  out << "type=" << TypeToString(type_);
+  if (opcode_ != TExprOpcode::INVALID_OPCODE) {
+    out << " opcode=" << opcode_;
+  }
  if (!children_.empty()) {
    out << " children=" << DebugString(children_);
  }
--- a/be/src/exprs/expr.h
+++ b/be/src/exprs/expr.h
@@ -7,6 +7,7 @@
 #include <vector>

 #include "common/status.h"
+#include "gen-cpp/Opcodes_types.h"
 #include "runtime/descriptors.h"
 #include "runtime/tuple.h"
 #include "runtime/tuple-row.h"
@@ -14,6 +15,7 @@

 namespace impala {

+class Expr;
 class ObjectPool;
 class RowDescriptor;
 class RuntimeState;
@@ -75,8 +77,11 @@ struct ExprValue {
 // This is the superclass of all expr evaluation nodes.
 class Expr {
 public:
+  // typedef for compute functions.  
+  typedef void* (*ComputeFunction)(Expr*, TupleRow*);
+  
  // Prepare expr tree for evaluation. In particular, set compute_function_.
-  // This implementation simply invokes it recursively for the entire tree.
+  // Prepare should be invoked recurisvely on the expr tree.
  // Return OK if successful, otherwise return error status.
  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);

@@ -109,6 +114,8 @@ class Expr {
  PrimitiveType type() const { return type_; }
  const std::vector<Expr*>& children() const { return children_; }

+  TExprOpcode::type op() const { return opcode_; }
+  
  // Returns true if expr doesn't contain slotrefs, ie, can be evaluated
  // with GetValue(NULL). The default implementation returns true if all of
  // the children are constant.
@@ -143,16 +150,25 @@ class Expr {
  static std::string DebugString(const std::vector<Expr*>& exprs);

 protected:
-  friend class GetValueFunctions;
+  friend class ComputeFunctions;
+  friend class MathFunctions;
+  friend class StringFunctions;

  Expr(PrimitiveType type);
  Expr(const TExprNode& node);
  Expr(const TExprNode& node, bool is_slotref);

+  // Helper function that just calls prepare on all the children
+  // Does not do anything on the this expr.
+  // Return OK if successful, otherwise return error status.
+  Status PrepareChildren(RuntimeState* state, const RowDescriptor& row_desc);
+
  // function to evaluate expr; typically set in Prepare()
-  typedef void* (*ComputeFunction)(Expr*, TupleRow*);
  ComputeFunction compute_function_;

+  // function opcode
+  TExprOpcode::type opcode_;
+
  // recognize if this node is a slotref in order to speed up GetValue()
  const bool is_slotref_;
  // analysis is done, types are fixed at this point
--- a/be/src/exprs/float-literal.cc
+++ b/be/src/exprs/float-literal.cc
@@ -51,7 +51,7 @@ void* FloatLiteral::ReturnDoubleValue(Expr* e, TupleRow* row) {
 }

 Status FloatLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  DCHECK_EQ(children_.size(), 0);
  switch (type_) {
    case TYPE_FLOAT:
      compute_function_ = ReturnFloatValue;
--- a/be/src/exprs/function-call.cc
+++ b/be/src/exprs/function-call.cc
@@ -1,10 +1,9 @@
 // Copyright (c) 2011 Cloudera, Inc. All rights reserved.

-#include "function-call.h"
-
 #include <sstream>
+#include <glog/logging.h>

-#include "gen-cpp/Exprs_types.h"
+#include "exprs/function-call.h"

 using namespace std;

--- a/be/src/exprs/gen-functions.py
+++ b/be/src/exprs/gen-functions.py
@@ -1,256 +0,0 @@
-#!/usr/bin/env python
-
-from string import Template
-
-# operators/functions and their names
-operators = {
-    'add': '+',
-    'subtract': '-',
-    'multiply': '*',
-    'divide': '/',
-    'mod': '%',
-    'bitand': '&',
-    'bitor': '|',
-    'bitxor': '^',
-    'bitnot': '~',
-    'eq': '==',
-    'ne': '!=',
-    'le': '<=',
-    'ge': '>=',
-    'lt': '<',
-    'gt': '>',
-    'eq_fn': 'Eq',
-    'ne_fn': 'Ne',
-    'le_fn': 'Le',
-    'ge_fn': 'Ge',
-    'lt_fn': 'Lt',
-    'gt_fn': 'Gt',
-}
-
-# map of signatures (operand types and return type/result field)
-op_signatures = {    
-    'tinyint_op': ('char', 'tinyint_val'),
-    'smallint_op': ('short', 'smallint_val'),
-    'int_op': ('int', 'int_val'),
-    'bigint_op': ('long', 'bigint_val'),
-    'float_op': ('float', 'float_val'),
-    'double_op': ('double', 'double_val'),
-    'string_op': ('string', 'string_val'),
-    'bool_pred': ('bool', 'bool_val'),
-    'tinyint_pred': ('char', 'bool_val'),
-    'smallint_pred': ('short', 'bool_val'),
-    'int_pred': ('int', 'bool_val'),
-    'bigint_pred': ('long', 'bool_val'),
-    'float_pred': ('float', 'bool_val'),
-    'double_pred': ('double', 'bool_val'),
-    'string_pred': ('StringValue', 'bool_val'),
-}
-
-# map from native type to corresponding result field
-result_fields = {
-    'bool': 'bool_val',
-    'char': 'tinyint_val',
-    'short': 'smallint_val',
-    'int': 'int_val',
-    'long': 'bigint_val',
-    'float': 'float_val',
-    'double': 'double_val',
-    'StringValue': 'string_val'
-}
-
-binary_op_invocations = [
-    ('ArithmeticExpr',
-      ['add', 'subtract', 'multiply'],
-      ['tinyint_op', 'smallint_op', 'int_op', 'bigint_op', 'float_op', 'double_op']),
-    ('ArithmeticExpr',
-      ['divide'],
-      ['double_op']),
-    ('ArithmeticExpr',
-      ['mod', 'divide', 'bitand', 'bitor', 'bitxor'],
-      ['tinyint_op', 'smallint_op', 'int_op', 'bigint_op']),
-    ('BinaryPredicate',
-      ['eq', 'ne', 'le', 'ge', 'lt', 'gt'],
-      ['bool_pred', 'tinyint_pred', 'smallint_pred', 'int_pred', 'bigint_pred', 'float_pred', 'double_pred']),
-]
-
-member_fn_invocations = [
-    ('BinaryPredicate',
-      ['eq_fn', 'ne_fn', 'le_fn', 'ge_fn', 'lt_fn', 'gt_fn'],
-      ['string_pred']),
-]
-
-unary_op_invocations = [
-    ('ArithmeticExpr',
-      ['bitnot'],
-      ['tinyint_op', 'smallint_op', 'int_op', 'bigint_op']),
-]
-
-binary_op_template = Template("\
-void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
-  ${expr_class}* expr = static_cast<${expr_class}*>(e);\n\
-  // assert(p->children_.size() == 2);\n\
-  Expr* op1 = e->children()[0];\n\
-  ${native_type}* val1 = reinterpret_cast<${native_type}*>(op1->GetValue(row));\n\
-  Expr* op2 = e->children()[1];\n\
-  ${native_type}* val2 = reinterpret_cast<${native_type}*>(op2->GetValue(row));\n\
-  if (val1 == NULL || val2 == NULL) return NULL;\n\
-  expr->result_.${result_field} = *val1 ${op} *val2;\n\
-  return &expr->result_.${result_field};\n\
-}\n")
-
-member_fn_template = Template("\
-void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
-  ${expr_class}* expr = static_cast<${expr_class}*>(e);\n\
-  // assert(p->children_.size() == 2);\n\
-  Expr* op1 = e->children()[0];\n\
-  ${native_type}* val1 = reinterpret_cast<${native_type}*>(op1->GetValue(row));\n\
-  Expr* op2 = e->children()[1];\n\
-  ${native_type}* val2 = reinterpret_cast<${native_type}*>(op2->GetValue(row));\n\
-  if (val1 == NULL || val2 == NULL) return NULL;\n\
-  expr->result_.${result_field} = val1->${op}(*val2);\n\
-  return &expr->result_.${result_field};\n\
-}\n")
-
-unary_op_template = Template("\
-void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
-  ${expr_class}* expr = static_cast<${expr_class}*>(e);\n\
-  // assert(p->children_.size() == 1);\n\
-  Expr* op = e->children()[0];\n\
-  ${native_type}* val = reinterpret_cast<${native_type}*>(op->GetValue(row));\n\
-  if (val == NULL) return NULL;\n\
-  expr->result_.${result_field} = ${op} *val;\n\
-  return &expr->result_.${result_field};\n\
-}\n")
-
-cast_template = Template("\
-void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
-  // assert(p->children_.size() == 1);\n\
-  Expr* op = e->children()[0];\n\
-  ${native_type}* val = reinterpret_cast<${native_type}*>(op->GetValue(row));\n\
-  if (val == NULL) return NULL;\n\
-  e->result_.${result_field} = *val;\n\
-  return &e->result_.${result_field};\n\
-}\n")
-
-string_to_numeric_cast_template = Template("\
-void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
-  // assert(p->children_.size() == 1);\n\
-  Expr* op = e->children()[0];\n\
-  StringValue* val = reinterpret_cast<StringValue*>(op->GetValue(row));\n\
-  if (val == NULL) return NULL;\n\
-  std::string tmp(val->ptr, val->len);\n\
-  try {\n\
-    e->result_.${result_field} = boost::lexical_cast<${result_type}>(tmp);\n\
-  } catch (boost::bad_lexical_cast &) {\n\
-    return NULL;\n\
-  }\n\
-  return &e->result_.${result_field};\n\
-}\n")
-
-numeric_to_string_cast_template = Template("\
-void* GetValueFunctions::${function_name}(Expr* e, TupleRow* row) {\n\
-  // assert(p->children_.size() == 1);\n\
-  Expr* op = e->children()[0];\n\
-  ${native_type}* val = reinterpret_cast<${native_type}*>(op->GetValue(row));\n\
-  if (val == NULL) return NULL;\n\
-  e->result_.SetStringVal(boost::lexical_cast<std::string>(*val));\n\
-  return &e->result_.string_val;\n\
-}\n")
-
-op_invocations = [
-    (unary_op_invocations, unary_op_template),
-    (binary_op_invocations, binary_op_template),
-    (member_fn_invocations, member_fn_template),
-]
-
-# entry: src-type, dest-type, template
-cast_invocations = [
-    (['char', 'short', 'int', 'long', 'float', 'double'],
-     ['char', 'short', 'int', 'long', 'float', 'double'],
-     cast_template),
-    (['StringValue'],
-     ['char', 'short', 'int', 'long', 'float', 'double'],
-     string_to_numeric_cast_template),
-    (['char', 'short', 'int', 'long', 'float', 'double'],
-     ['StringValue'],
-     numeric_to_string_cast_template)
-]
-
-cc_preamble = '\
-// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
-// This is a generated file, DO NOT EDIT IT.\n\
-\n\
-#include "exprs/functions.h"\n\
-\n\
-#include <boost/lexical_cast.hpp>\n\
-#include <string>\n\
-\n\
-#include "exprs/arithmetic-expr.h"\n\
-#include "exprs/binary-predicate.h"\n\
-#include "runtime/tuple.h"\n\
-\n\
-namespace impala {\n\
-\n'
-
-cc_epilogue = '}\n'
-
-h_preamble = '\
-// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
-// This is a generated file, DO NOT EDIT IT.\n\
-\n\
-#ifndef IMPALA_EXPRS_FUNCTIONS_H\n\
-#define IMPALA_EXPRS_FUNCTIONS_H\n\
-\n\
-namespace impala {\n\
-class Expr;\n\
-class TupleRow;\n\
-\n\
-class GetValueFunctions {\n\
- public:\n'
-
-h_epilogue = '\
-};\n\
-\n\
-}\n\
-\n\
-#endif\n'
-
-cc_file = open('functions.cc', 'w')
-cc_file.write(cc_preamble)
-h_file = open('functions.h', 'w')
-h_file.write(h_preamble)
-
-for i in op_invocations:
-  for entry in i[0]:
-      for op in entry[1]:
-          for operand_type in entry[2]:
-              d = {}
-              fn_name= entry[0] + "_" + op + "_" + op_signatures[operand_type][0]
-              h_file.write("  static void* " + fn_name + "(Expr* e, TupleRow* row);\n")
-              d["function_name"] = fn_name
-              d["expr_class"] = entry[0]
-              d["native_type"] = op_signatures[operand_type][0]
-              d["result_field"] = op_signatures[operand_type][1]
-              d["op"] = operators[op]
-              cc_file.write(i[1].substitute(d))
-              cc_file.write('\n')
-
-for i in cast_invocations:
-  for src_type in i[0]:
-      for dest_type in i[1]:
-          if src_type == dest_type:
-            continue
-          d = {}
-          fn_name= "Cast_" + src_type + "_" + dest_type
-          h_file.write("  static void* " + fn_name + "(Expr* e, TupleRow* row);\n")
-          d["function_name"] = fn_name
-          d["native_type"] = src_type
-          d["result_type"] = dest_type
-          d["result_field"] = result_fields[dest_type]
-          cc_file.write(i[2].substitute(d))
-          cc_file.write('\n')
-
-cc_file.write(cc_epilogue)
-cc_file.close()
-h_file.write(h_epilogue)
-h_file.close()
--- a/be/src/exprs/int-literal.cc
+++ b/be/src/exprs/int-literal.cc
@@ -73,7 +73,7 @@ void* IntLiteral::ReturnBigintValue(Expr* e, TupleRow* row) {
 }

 Status IntLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  DCHECK_EQ(children_.size(), 0);
  switch (type_) {
    case TYPE_TINYINT:
      compute_function_ = ReturnTinyintValue;
--- a/be/src/exprs/is-null-predicate.cc
+++ b/be/src/exprs/is-null-predicate.cc
@@ -25,7 +25,7 @@ IsNullPredicate::IsNullPredicate(const TExprNode& node)
 }

 Status IsNullPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
  compute_function_ = ComputeFunction;
  return Status::OK;
 }
--- a/be/src/exprs/like-predicate.cc
+++ b/be/src/exprs/like-predicate.cc
@@ -15,7 +15,6 @@ namespace impala {

 LikePredicate::LikePredicate(const TExprNode& node)
  : Predicate(node),
-    op_(node.op),
    escape_char_(node.like_pred.escape_char[0]) {
  DCHECK_EQ(node.like_pred.escape_char.size(), 1);
 }
@@ -72,7 +71,7 @@ void* LikePredicate::RegexFn(Expr* e, TupleRow* row) {
 }

 Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
  DCHECK_EQ(children_.size(), 2);
  if (GetChild(1)->IsConstant()) {
    // determine pattern and decide on eval fn
@@ -80,14 +79,14 @@ Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc
    string pattern_str(pattern->ptr, pattern->len);
    regex substring_re("(%*)([^%_]*)(%*)", regex::extended);
    smatch match_res;
-    if (op_ == TExprOperator::LIKE
+    if (opcode_ == TExprOpcode::LIKE
        && regex_match(pattern_str, match_res, substring_re)) {
      // match_res.str(0) is the whole string, match_res.str(1) the first group, etc.
      substring_ = match_res.str(2);
      compute_function_ = ConstantSubstringFn;
    } else {
      string re_pattern;
-      if (op_ == TExprOperator::LIKE) {
+      if (opcode_ == TExprOpcode::LIKE) {
        ConvertLikePattern(pattern, &re_pattern);
      } else {
        re_pattern = pattern_str;
@@ -100,16 +99,16 @@ Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc
      compute_function_ = ConstantRegexFn;
    }
  } else {
-    switch (op_) {
-      case TExprOperator::LIKE:
+    switch (opcode_) {
+      case TExprOpcode::LIKE:
        compute_function_ = LikeFn;
        break;
-      case TExprOperator::REGEXP:
+      case TExprOpcode::REGEX:
        compute_function_ = RegexFn;
        break;
      default:
        stringstream error;
-        error << "Invalid LIKE operator: " << op_;
+        error << "Invalid LIKE operator: " << opcode_;
        return Status(error.str());
    }
  }
--- a/be/src/exprs/like-predicate.h
+++ b/be/src/exprs/like-predicate.h
@@ -15,13 +15,12 @@ namespace impala {
 class LikePredicate: public Predicate {
 protected:
  friend class Expr;
-
+  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
  LikePredicate(const TExprNode& node);

-  virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc);
-
 private:
-  const TExprOperator::type op_;
+  friend class OpcodeRegistry;
+
  char escape_char_;
  std::string substring_;
  boost::scoped_ptr<boost::regex> regex_;
--- a/be/src/exprs/literal-predicate.cc
+++ b/be/src/exprs/literal-predicate.cc
@@ -21,7 +21,7 @@ LiteralPredicate::LiteralPredicate(const TExprNode& node)
 }

 Status LiteralPredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc));
  compute_function_ = ComputeFunction;
  return Status::OK;
 }
--- a/be/src/exprs/math-functions.cc
+++ b/be/src/exprs/math-functions.cc
@@ -0,0 +1,18 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+
+#include "exprs/math-functions.h"
+
+#include <math.h>
+
+#include "exprs/expr.h"
+#include "runtime/tuple-row.h"
+
+namespace impala { 
+
+void* MathFunctions::Pi(Expr* e, TupleRow* row) {
+  e->result_.double_val = M_PI;
+  return &e->result_.double_val;
+}
+
+}
+
--- a/be/src/exprs/math-functions.h
+++ b/be/src/exprs/math-functions.h
@@ -0,0 +1,21 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+
+#ifndef IMPALA_EXPRS_MATH_FUNCTIONS_H
+#define IMPALA_EXPRS_MATH_FUNCTIONS_H
+
+namespace impala {
+
+class Expr;
+class OpcodeRegistry;
+class TupleRow;
+
+class MathFunctions {
+ public:
+  static void Init(OpcodeRegistry*);
+
+  static void* Pi(Expr* e, TupleRow* row);
+};
+
+}
+
+#endif
--- a/be/src/exprs/null-literal.cc
+++ b/be/src/exprs/null-literal.cc
@@ -15,6 +15,7 @@ void* NullLiteral::ReturnValue(Expr* e, TupleRow* row) {
 }

 Status NullLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
+  DCHECK_EQ(children_.size(), 0);
  return Status::OK;
 }

--- a/be/src/exprs/opcode-registry.cc
+++ b/be/src/exprs/opcode-registry.cc
@@ -0,0 +1,8 @@
+#include "exprs/opcode-registry.h"
+
+namespace impala {
+
+OpcodeRegistry* OpcodeRegistry::instance_ = NULL;
+
+}
+
--- a/be/src/exprs/opcode-registry.h
+++ b/be/src/exprs/opcode-registry.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+
+#ifndef IMPALA_EXPRS_OPCODE_REGISTRY_H
+#define IMPALA_EXPRS_OPCODE_REGISTRY_H
+
+#include <string>
+#include <vector>
+#include <glog/logging.h>
+#include "exprs/expr.h"   // For ComputeFunction typedef
+#include "gen-cpp/Opcodes_types.h"
+
+namespace impala {
+
+class Expr;
+class TupleRow;
+
+class OpcodeRegistry {
+ public:
+  // Returns the function for this opcode.  If the opcdoe is not valid,
+  // this function returns NULL
+   Expr::ComputeFunction GetFunction(TExprOpcode::type opcode) {
+    int index = static_cast<int>(opcode);
+    DCHECK_GE(index, 0);
+    DCHECK_LT(index, functions_.size());
+    return functions_[index];
+  }
+
+  // Registry is a singleton
+  static OpcodeRegistry* Instance() {
+    if (instance_ == NULL) {
+      instance_ = new OpcodeRegistry();
+      instance_->Init();
+    }
+    return instance_;
+  }
+
+ private:
+  // Private constructor, singleton interface
+  OpcodeRegistry() {
+    int num_opcodes = static_cast<int>(TExprOpcode::LAST_OPCODE);
+    functions_.resize(num_opcodes);
+  }
+
+  // Populates all of the registered functions. Implemented in
+  // opcode-registry-init.cc which is an auto-generated file 
+  void Init();
+
+  // Add a function to the registry.
+  void Add(TExprOpcode::type opcode, const Expr::ComputeFunction& function) {
+    int index = static_cast<int>(opcode);
+    DCHECK_LT(index, functions_.size());
+    DCHECK_GE(index, 0);
+    functions_[index] = function;
+  }
+
+  static OpcodeRegistry* instance_;
+  std::vector<Expr::ComputeFunction> functions_;
+};
+
+}
+
+#endif
+
--- a/be/src/exprs/slot-ref.cc
+++ b/be/src/exprs/slot-ref.cc
@@ -20,7 +20,7 @@ SlotRef::SlotRef(const TExprNode& node)
 }

 Status SlotRef::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  DCHECK_EQ(children_.size(), 0);
  const SlotDescriptor* slot_desc  = state->descs().GetSlotDescriptor(slot_id_);
  if (slot_desc == NULL) {
    // TODO: create macro MAKE_ERROR() that returns a stream
--- a/be/src/exprs/string-functions.cc
+++ b/be/src/exprs/string-functions.cc
@@ -0,0 +1,41 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+
+#include "exprs/string-functions.h"
+#include "exprs/expr.h"
+#include "runtime/tuple-row.h"
+
+using namespace boost;
+using namespace std;
+
+namespace impala { 
+
+// Implementation of Substr.  The signature is
+//    string substr(string input, int pos, int len)
+// This behaves identically to the mysql implemenation, namely:
+//  - 1-indexed positions
+//  - supported negative positions (count from the end of the string)
+//  - [optional] len.  No len indicates longest substr possible
+void* StringFunctions::Substring(Expr* e, TupleRow* row) {
+  DCHECK_GE(e->GetNumChildren(), 2);
+  Expr* op1 = e->children()[0];
+  Expr* op2 = e->children()[1];
+  Expr* op3 = NULL;
+  if (e->GetNumChildren() == 3) op3 = e->children()[2];
+  StringValue* str = reinterpret_cast<StringValue*>(op1->GetValue(row));
+  int* pos = reinterpret_cast<int*>(op2->GetValue(row));
+  int* len = op3 != NULL ? reinterpret_cast<int*>(op3->GetValue(row)) : NULL;
+  if (str == NULL || pos == NULL || (op3 != NULL && len == NULL)) return NULL;
+  string tmp(str->ptr, str->len);
+  int fixed_pos = *pos;
+  int fixed_len = (len == NULL ? str->len : *len);
+  string result; 
+  if (fixed_pos < 0) fixed_pos = str->len + fixed_pos + 1;
+  if (fixed_pos > 0 && fixed_pos <= str->len && fixed_len > 0) {
+    result = tmp.substr(fixed_pos - 1, fixed_len);
+  }
+  e->result_.SetStringVal(result);
+  return &e->result_.string_val;
+}
+
+}
+
--- a/be/src/exprs/string-functions.h
+++ b/be/src/exprs/string-functions.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+
+#ifndef IMPALA_EXPRS_STRING_FUNCTIONS_H
+#define IMPALA_EXPRS_STRING_FUNCTIONS_H
+
+namespace impala {
+
+class Expr;
+class OpcodeRegistry;
+class TupleRow;
+
+class StringFunctions {
+ public:
+  static void* Substring(Expr* e, TupleRow* row);
+};
+
+}
+
+#endif
--- a/be/src/exprs/string-literal.cc
+++ b/be/src/exprs/string-literal.cc
@@ -31,7 +31,7 @@ void* StringLiteral::ComputeFunction(Expr* e, TupleRow* row) {
 }

 Status StringLiteral::Prepare(RuntimeState* state, const RowDescriptor& row_desc) {
-  Expr::Prepare(state, row_desc);
+  DCHECK_EQ(children_.size(), 0);
  compute_function_ = ComputeFunction;
  return Status::OK;
 }
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -17,11 +17,17 @@ add_library(backend SHARED
  plan-executor-adaptor.cc
 )

+# The order of link libararies matter.  
+#  - Util must come before ImpalaThrift
+#  - Exprs & Opcode have a circular dependency (Exprs is included twice)
 target_link_libraries(backend
  Common
  Exec
  Exprs
+  Opcode
+  Exprs
  Runtime
+  Util
  thrift
  ImpalaThrift
  ${HDFS_LIBS}
@@ -40,13 +46,20 @@ add_executable(runquery
 # when linking statically the linker won't look for the needed symbols.
 # TODO: is there a better way to specify link dependencies without having to
 # list all included libs for each specific binary? any gcc flags to help with this?
+#
+# The order of link libararies matter.  
+#  - Util must come before ImpalaThrift
+#  - Exprs & Opcode have a circular dependency (Exprs is included twice)
 target_link_libraries(runquery
  TestUtil
  Service
  Exec
  Exprs
+  Opcode
+  Exprs
  Runtime
  Common
+  Util
  thrift
  ImpalaThrift
  MockHdfs
--- a/be/src/util/debug-util.cc
+++ b/be/src/util/debug-util.cc
@@ -7,14 +7,25 @@
 #include "runtime/descriptors.h"
 #include "runtime/raw-value.h"
 #include "runtime/tuple-row.h"
+#include "gen-cpp/Opcodes_types.h"

 using namespace std;

 namespace impala {

-ostream& operator<<(ostream& os, const TExprOperator::type& op) {
-  map<int, const char*>::const_iterator i = _TExprOperator_VALUES_TO_NAMES.find(op);
-  if (i != _TExprOperator_VALUES_TO_NAMES.end()) {
+ostream& operator<<(ostream& os, const TExprOpcode::type& op) {
+  map<int, const char*>::const_iterator i;
+  i = _TExprOpcode_VALUES_TO_NAMES.find(0);
+  if (i != _TExprOpcode_VALUES_TO_NAMES.end()) {
+    os << i->second;
+  }
+  return os;
+}
+
+ostream& operator<<(ostream& os, const TAggregationOp::type& op) {
+  map<int, const char*>::const_iterator i;
+  i = _TAggregationOp_VALUES_TO_NAMES.find(0);
+  if (i != _TAggregationOp_VALUES_TO_NAMES.end()) {
    os << i->second;
  }
  return os;
--- a/be/src/util/debug-util.h
+++ b/be/src/util/debug-util.h
@@ -6,6 +6,7 @@
 #include <ostream>
 #include <string>

+#include "gen-cpp/Opcodes_types.h"
 #include "gen-cpp/Exprs_types.h"

 namespace impala {
@@ -15,7 +16,8 @@ class TupleDescriptor;
 class Tuple;
 class TupleRow;

-std::ostream& operator<<(std::ostream& os, const TExprOperator::type& op);
+std::ostream& operator<<(std::ostream& os, const TExprOpcode::type& op);
+std::ostream& operator<<(std::ostream& os, const TAggregationOp::type& op);

 std::string PrintTuple(const Tuple* t, const TupleDescriptor& d);
 std::string PrintRow(TupleRow* row, const RowDescriptor& d);
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -35,6 +35,7 @@ fi

 export IMPALA_FE_DIR=$IMPALA_HOME/fe
 export IMPALA_BE_DIR=$IMPALA_HOME/be
+export IMPALA_COMMON_DIR=$IMPALA_HOME/common
 export PATH=$IMPALA_HOME/bin:$PATH

 export HADOOP_HOME=$IMPALA_HOME/thirdparty/hadoop-0.20.2-cdh3u1
--- a/buildall.sh
+++ b/buildall.sh
@@ -74,6 +74,11 @@ then

 fi

+# build common
+cd $IMPALA_COMMON_DIR
+./gen_functions.py
+./gen_opcodes.py
+
 # Generate hive-site.xml from template via env var substitution
 # TODO: Throw an error if the template references an undefined environment variable
 cd ${IMPALA_FE_DIR}/src/test/resources
@@ -138,5 +143,20 @@ then
    mvn test
 fi

+# run backend tests
+if [ $tests_action -eq 1 ] 
+then
+  cd $IMPALA_FE_DIR
+  mvn exec:java -Dexec.mainClass=com.cloudera.impala.testutil.PlanService \
+              -Dexec.classpathScope=test & 
+  PID=$!
+  # Wait for planner to startup TODO: can we do something better than wait arbitrarily for
+  # 3 seconds.  Not a huge deal if it's not long enough, BE tests will just wait a bit
+  sleep 3
+  cd $IMPALA_BE_DIR
+  make test
+  kill $PID
+fi
+
 # Generate list of files for Cscope to index
 $IMPALA_HOME/bin/gen-cscope.sh
--- a/common/.gitignore
+++ b/common/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+generated_functions.py
--- a/common/gen_functions.py
+++ b/common/gen_functions.py
@@ -0,0 +1,375 @@
+#!/usr/bin/env python
+
+from string import Template
+import os
+
+# This script will generate the implementation of the simple functions for the BE.
+# These include:
+#   - Arithmetic functions
+#   - Binary functions
+#   - Cast functions
+#
+# The script outputs (run: 'impala/common/gen_functions.py')
+#   - header and implemention for above functions: 
+#     - impala/be/src/generated-sources/opcode/functions.[h/cc]
+#   - python file that contains the metadata for theose functions: 
+#     - impala/common/generated_functions.py
+
+# Some aggregate types that are useful for defining functions
+types = {
+  'BOOLEAN'       : ['BOOLEAN'],
+  'TINYINT'       : ['TINYINT'],
+  'SMALLINT'      : ['SMALLINT'],
+  'INT'           : ['INT'],
+  'BIGINT'        : ['BIGINT'],
+  'FLOAT'         : ['FLOAT'],
+  'DOUBLE'        : ['DOUBLE'],
+  'STRING'        : ['STRING'],
+  'INT_TYPES'     : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'],
+  'NUMERIC_TYPES' : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
+  'NATIVE_TYPES'  : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
+  'ALL_TYPES'     : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', 'STRING'],
+  'MAX_TYPES'     : ['BIGINT', 'DOUBLE'],
+}
+
+# Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]]
+functions = [
+  # Arithmetic Expr
+  ['Add', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']] ],
+  ['Subtract', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']] ],
+  ['Multiply', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']] ],
+  ['Divide', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']] ],
+  ['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
+  ['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
+  ['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
+  ['BitXor', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
+  ['BitOr', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']] ],
+  ['BitNot', ['INT_TYPES'], [['INT_TYPES']] ], 
+  
+  # BinaryPredicates
+  ['Eq', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ], 
+  ['Ne', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ], 
+  ['Gt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ], 
+  ['Lt', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ], 
+  ['Ge', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ], 
+  ['Le', ['BOOLEAN'], [['NATIVE_TYPES'], ['NATIVE_TYPES']] ], 
+  ['Eq', ['BOOLEAN'], [['STRING'], ['STRING']] ], 
+  ['Ne', ['BOOLEAN'], [['STRING'], ['STRING']] ], 
+  ['Gt', ['BOOLEAN'], [['STRING'], ['STRING']] ], 
+  ['Lt', ['BOOLEAN'], [['STRING'], ['STRING']] ], 
+  ['Ge', ['BOOLEAN'], [['STRING'], ['STRING']] ], 
+  ['Le', ['BOOLEAN'], [['STRING'], ['STRING']] ], 
+
+  # Casts
+  ['Cast', ['BOOLEAN'], [['NATIVE_TYPES'], ['BOOLEAN']] ],
+  ['Cast', ['TINYINT'], [['NATIVE_TYPES'], ['TINYINT']] ],
+  ['Cast', ['SMALLINT'], [['NATIVE_TYPES'], ['SMALLINT']] ],
+  ['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']] ],
+  ['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']] ],
+  ['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']] ],
+  ['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']] ],
+  ['Cast', ['NATIVE_TYPES'], [['STRING'], ['NATIVE_TYPES']] ],
+  ['Cast', ['STRING'], [['NATIVE_TYPES'], ['STRING']] ],
+]
+
+native_types = {
+  'BOOLEAN'       : 'bool',
+  'TINYINT'       : 'char',
+  'SMALLINT'      : 'short',
+  'INT'           : 'int',
+  'BIGINT'        : 'long',
+  'FLOAT'         : 'float',
+  'DOUBLE'        : 'double',
+  'STRING'        : 'StringValue',
+}
+result_fields = {
+  'BOOLEAN'       : 'bool_val',
+  'TINYINT'       : 'tinyint_val',
+  'SMALLINT'      : 'smallint_val',
+  'INT'           : 'int_val',
+  'BIGINT'        : 'bigint_val',
+  'FLOAT'         : 'float_val',
+  'DOUBLE'        : 'double_val',
+  'STRING'        : 'string_val',
+}
+
+native_ops = {
+  'BITAND'     : '&',
+  'BITNOT'     : '~',
+  'BITOR'      : '|',
+  'BITXOR'     : '^',
+  'DIVIDE'     : '/', 
+  'EQ'         : '==',
+  'GT'         : '>',
+  'GE'         : '>=',
+  'INT_DIVIDE' : '/',
+  'SUBTRACT'   : '-', 
+  'MOD'        : '%',
+  'MULTIPLY'   : '*', 
+  'LT'         : '<',
+  'LE'         : '<=',
+  'NE'         : '!=',
+  'ADD'        : '+',
+}
+
+native_funcs = {
+  'EQ' : 'Eq',
+  'LE' : 'Le',
+  'LT' : 'Lt',
+  'NE' : 'Ne',
+  'GE' : 'Ge',
+  'GT' : 'Gt',
+}
+
+cc_preamble = '\
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+// This is a generated file, DO NOT EDIT.\n\
+// To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+#include "opcode/functions.h"\n\
+#include "exprs/expr.h"\n\
+#include "runtime/tuple-row.h"\n\
+#include <boost/lexical_cast.hpp>\n\
+\n\
+using namespace boost;\n\
+using namespace std;\n\
+\n\
+namespace impala { \n\
+\n'
+
+cc_epilogue = '\
+}\n'
+
+h_preamble = '\
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+// This is a generated file, DO NOT EDIT IT.\n\
+// To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+#ifndef IMPALA_OPCODE_FUNCTIONS_H\n\
+#define IMPALA_OPCODE_FUNCTIONS_H\n\
+\n\
+namespace impala {\n\
+class Expr;\n\
+class OpcodeRegistry;\n\
+class TupleRow;\n\
+\n\
+class ComputeFunctions {\n\
+ public:\n'
+
+h_epilogue = '\
+};\n\
+\n\
+}\n\
+\n\
+#endif\n'
+
+python_preamble = '\
+#!/usr/bin/env python\n\
+# Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+# This is a generated file, DO NOT EDIT IT.\n\
+# To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+functions = [\n'
+
+python_epilogue = ']'
+
+header_template = Template("\
+  static void* ${fn_signature}(Expr* e, TupleRow* row);\n")
+
+unary_op = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op = e->children()[0];\n\
+  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
+  if (val == NULL) return NULL;\n\
+  e->result_.${result_field} = ${native_op} *val;\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
+binary_op = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op1 = e->children()[0];\n\
+  ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->GetValue(row));\n\
+  Expr* op2 = e->children()[1];\n\
+  ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->GetValue(row));\n\
+  if (val1 == NULL || val2 == NULL) return NULL;\n\
+  e->result_.${result_field} = (*val1 ${native_op} *val2);\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
+binary_func = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op1 = e->children()[0];\n\
+  ${native_type1}* val1 = reinterpret_cast<${native_type1}*>(op1->GetValue(row));\n\
+  Expr* op2 = e->children()[1];\n\
+  ${native_type2}* val2 = reinterpret_cast<${native_type2}*>(op2->GetValue(row));\n\
+  if (val1 == NULL || val2 == NULL) return NULL;\n\
+  e->result_.${result_field} = val1->${native_func}(*val2);\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
+cast = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op = e->children()[0];\n\
+  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
+  if (val == NULL) return NULL;\n\
+  e->result_.${result_field} = *val;\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
+string_to_numeric = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op = e->children()[0];\n\
+  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
+  if (val == NULL) return NULL;\n\
+  string tmp(val->ptr, val->len);\n\
+  try {\n\
+    e->result_.${result_field} = lexical_cast<${native_type2}>(tmp);\n\
+  } catch (bad_lexical_cast &) {\n\
+    return NULL;\n\
+  }\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
+numeric_to_string = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op = e->children()[0];\n\
+  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
+  if (val == NULL) return NULL;\n\
+  e->result_.SetStringVal(lexical_cast<string>(*val));\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
+python_template = Template("\
+  ['${fn_name}', '${return_type}', [${args}], 'ComputeFunctions::${fn_signature}', []], \n")
+
+# Mapping of function to template
+templates = {
+  'Add'         : binary_op,
+  'Subtract'    : binary_op,
+  'Multiply'    : binary_op,
+  'Divide'      : binary_op,
+  'Int_Divide'  : binary_op,
+  'Mod'         : binary_op,
+  'BitAnd'      : binary_op,
+  'BitXor'      : binary_op,
+  'BitOr'       : binary_op,
+  'BitNot'      : unary_op,
+  'Eq'          : binary_op,
+  'Ne'          : binary_op,
+  'Ge'          : binary_op,
+  'Gt'          : binary_op,
+  'Lt'          : binary_op,
+  'Le'          : binary_op,
+  'Cast'        : cast,
+}
+
+BE_PATH = "../be/generated-sources/opcode/"
+if not os.path.exists(BE_PATH):
+  os.makedirs(BE_PATH)
+
+# Expand the signature data for template substitution.  Returns 
+# a dictionary with all the entries for all the templates used in this script
+def initialize_sub(op, return_type, arg_types):
+  sub = {}
+  sub["fn_name"] = op
+  sub["fn_signature"] = op
+  sub["return_type"] = return_type
+  sub["result_field"] = result_fields[return_type]
+  sub["args"] = ""
+  if op.upper() in native_ops:
+    sub["native_op"] = native_ops[op.upper()]
+  for idx in range(0, len(arg_types)):
+    arg = arg_types[idx]
+    sub["fn_signature"] += "_" + native_types[arg]
+    sub["native_type" + repr(idx + 1)] = native_types[arg]
+    sub["args"] += "'" + arg + "', "
+  return sub
+
+if __name__ == "__main__":
+  h_file = open(BE_PATH + 'functions.h', 'w')
+  cc_file = open(BE_PATH + 'functions.cc', 'w')
+  python_file = open('generated_functions.py', 'w')
+  h_file.write(h_preamble)
+  cc_file.write(cc_preamble)
+  python_file.write(python_preamble)
+
+  # Generate functions and headers
+  for func_data in functions:
+    
+    # Skip functions with no template (shouldn't be auto-generated)
+    if not func_data[0] in templates:
+      continue
+
+    # Expand all arguments
+    op = func_data[0]
+    return_types = []
+    for ret in func_data[1]:
+      for t in types[ret]:
+        return_types.append(t)
+    signatures = []
+    for args in func_data[2]:
+      expanded_arg = []
+      for arg in args:
+        for t in types[arg]:
+          expanded_arg.append(t)
+      signatures.append(expanded_arg)
+
+    # Put arguments into substitution structure
+    num_functions = 0
+    for args in signatures:
+      num_functions = max(num_functions, len(args))
+    num_functions = max(num_functions, len(return_types))
+    num_args = len(signatures)
+
+    # Validate the input is correct
+    if len(return_types) != 1 and len(return_types) != num_functions:
+      print "Invalid Declaration: " + func_data
+      sys.exit(1)
+    
+    for args in signatures:
+      if len(args) != 1 and len(args) != num_functions:
+        print "Invalid Declaration: " + func_data
+        sys.exit(1)
+    
+    # Iterate over every function signature to generate
+    for i in range(0, num_functions):
+      if len(return_types) == 1:
+        return_type = return_types[0]
+      else:
+        return_type = return_types[i]
+
+      arg_types = []
+      for j in range(0, num_args):
+        if len(signatures[j]) == 1:
+          arg_types.append(signatures[j][0])
+        else:
+          arg_types.append(signatures[j][i])
+      
+      # At this point, 'return_type' is a single type and 'arg_types' is a list of single types
+      sub = initialize_sub(op, return_type, arg_types)
+      template = templates[op]
+
+      # Code-gen for the bodies requires a bit more information
+      if (op == 'Eq' or op == 'Ne' or 
+          op == 'Gt' or op == 'Lt' or 
+          op == 'Ge' or op == 'Le') and arg_types[0] == 'STRING':
+          template = binary_func
+          sub["native_func"] = native_funcs[op.upper()]
+
+      if op == 'Cast' and return_type == 'STRING':
+        template = numeric_to_string
+
+      if op == 'Cast' and arg_types[0] == 'STRING':
+        template = string_to_numeric
+
+      h_file.write(header_template.substitute(sub))
+      cc_file.write(template.substitute(sub))
+      python_file.write(python_template.substitute(sub))
+
+  h_file.write(h_epilogue)
+  cc_file.write(cc_epilogue)
+  python_file.write(python_epilogue)
+  h_file.close()
+  cc_file.close()
+  python_file.close()
+
--- a/common/gen_opcodes.py
+++ b/common/gen_opcodes.py
@@ -0,0 +1,285 @@
+#!/usr/bin/env python
+
+# This script generates the necessary files to coordinate function calls between the FE 
+# and BE. In the FE, this creates a mapping between function signature (Operation & 
+# Arguments) to an opcode. The opcode is a thrift enum which is passed to the backend. 
+# The backend has all the information from just the opcode and does not need to worry 
+# about type checking.
+#
+# This scripts pulls function metadata input from 
+#   - impala/common/impala_functions.py (manually maintained)
+#   - impala/common/generated_functions.py (auto-generated metadata)
+#
+# This script will generate 4 outputs
+#  1. Thrift enum for all the opcodes
+#  - impala/fe/src/thrift/Opcodes.thrift
+#  2. FE java operators (one per function, ignoring overloading)
+#  - impala/fe/target/generated-sources/gen-java/com/cloudera/impala/opcode/FunctionOperater.java
+#  3  Java registry setup (registering all the functions with signatures)
+#  - impala/fe/target/generated-sources/gen-java/com/cloudera/impala/opcode/FunctionRegistry.java
+#  4. BE registry setup (mapping opcodes to ComputeFunctions)
+#  - impala/be/generated-sources/opcode/opcode-registry-init.cc
+#
+# TODO: version the registry on the FE and BE so we can identify if they are out of sync
+
+import sys
+import os
+from string import Template
+import impala_functions
+import generated_functions
+
+native_types = {
+  'BOOLEAN'       : 'bool',
+  'TINYINT'       : 'char',
+  'SMALLINT'      : 'short',
+  'INT'           : 'int',
+  'BIGINT'        : 'long',
+  'FLOAT'         : 'float',
+  'DOUBLE'        : 'double',
+  'STRING'        : 'StringValue',
+}
+
+thrift_preamble = '\
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+// This is a generated file, DO NOT EDIT.\n\
+// To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+namespace cpp impala\n\
+namespace java com.cloudera.impala.thrift\n\
+\n\
+enum TExprOpcode {\n'
+
+thrift_epilogue = '\
+}\n\
+\n'
+
+cc_registry_preamble = '\
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+// This is a generated file, DO NOT EDIT.\n\
+// To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+#include "exprs/opcode-registry.h"\n\
+#include "exprs/expr.h"\n\
+#include "exprs/compound-predicate.h"\n\
+#include "exprs/like-predicate.h"\n\
+#include "exprs/math-functions.h"\n\
+#include "exprs/string-functions.h"\n\
+#include "opcode/functions.h"\n\
+\n\
+namespace impala { \n\
+\n\
+void OpcodeRegistry::Init() {\n'
+
+cc_registry_epilogue = '\
+}\n\
+\n\
+}\n'
+
+operator_file_preamble = '\
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+// This is a generated file, DO NOT EDIT.\n\
+// To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+package com.cloudera.impala.opcode;\n\
+\n\
+public enum FunctionOperator {\n'
+
+operator_file_epilogue = '\
+}\n'
+
+java_registry_preamble = '\
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.\n\
+// This is a generated file, DO NOT EDIT.\n\
+// To add new functions, see impala/common/gen_opcodes.py\n\
+\n\
+package com.cloudera.impala.opcode;\n\
+\n\
+import com.cloudera.impala.analysis.OpcodeRegistry;\n\
+import com.cloudera.impala.catalog.PrimitiveType;\n\
+import com.cloudera.impala.thrift.TExprOpcode;\n\
+import com.google.common.base.Preconditions;\n\
+\n\
+public class FunctionRegistry { \n\
+  public static void InitFunctions(OpcodeRegistry registry) { \n\
+    boolean result = true;\n\
+\n'
+
+java_registry_epilogue = '\
+    Preconditions.checkState(result); \n\
+  }\n\
+}\n'
+
+
+def initialize_sub(op, return_type, arg_types):
+  sub = {}
+  java_args = "PrimitiveType." + return_type 
+  sub["fn_class"] = "GetValueFunctions"
+  sub["fn_signature"] = op
+  sub["num_args"] = len(arg_types)
+  for idx in range(0, len(arg_types)):
+    arg = arg_types[idx]
+    sub["fn_signature"] += "_" + native_types[arg]
+    sub["native_type" + repr(idx + 1)] = native_types[arg]
+    java_args += ", PrimitiveType." + arg
+  sub["thrift_enum"] = sub["fn_signature"].upper()
+  sub["java_output"] = "FunctionOperator." + op.upper() + ", TExprOpcode." + sub["thrift_enum"] 
+  sub["java_output"] += ", " + java_args
+  return sub
+
+FE_PATH = "../fe/target/generated-sources/gen-java/com/cloudera/impala/opcode/"
+BE_PATH = "../be/generated-sources/opcode/"
+THRIFT_PATH = "../fe/src/main/thrift/"
+
+# This contains a list of all the opcodes that are built base on the
+# function name from the input.  Inputs can have multiple signatures
+# with the same function name and the opcode is mangled using the
+# arg types.
+opcodes = []
+
+# This contains a list of all the function names (no overloading/mangling)
+operators = []
+
+# This is a mapping of operators to a list of function meta data entries
+# Each meta data entry is itself a map to store all the meta data
+#   - fn_name, ret_type, args, be_fn, sql_names
+meta_data_entries = {}
+
+# Read in the function and add it to the meta_data_entries map
+def add_function(fn_meta_data):
+  fn_name = fn_meta_data[0]
+  ret_type = fn_meta_data[1]
+  args = fn_meta_data[2]
+  be_fn = fn_meta_data[3]
+
+  entry = {}
+  entry["fn_name"] = fn_meta_data[0]
+  entry["ret_type"] = fn_meta_data[1]
+  entry["args"] = fn_meta_data[2]
+  entry["be_fn"] = fn_meta_data[3]
+  entry["sql_names"] = fn_meta_data[4]
+
+  if fn_name in meta_data_entries:
+    meta_data_entries[fn_name].append(entry)
+  else:
+    fn_list = [entry]
+    meta_data_entries[fn_name] = fn_list
+    operators.append(fn_name.upper())
+  
+# Iterate over entries in the meta_data_entries map and generate opcodes.  Some
+# entries will have the same name at this stage, quality the name withe the
+# signature  to generate unique enums.
+# Resulting opcode list is sorted with INVALID_OPCODE at beginning and LAST_OPCODE 
+# at end.
+def generate_opcodes():
+  for fn in meta_data_entries:
+    entries = meta_data_entries[fn]
+    if len(entries) > 1:
+      for entry in entries:
+        opcode = fn.upper()
+        for arg in entry["args"]:
+          opcode += "_" + native_types[arg].upper()
+        opcodes.append(opcode)
+        entry["opcode"] = opcode
+    else:
+      opcodes.append(fn.upper())
+      entries[0]["opcode"] = fn.upper()
+  opcodes.sort()
+  opcodes.insert(0, 'INVALID_OPCODE')
+  opcodes.append('LAST_OPCODE')
+
+# Generates the BE registry init file that will add all the compute functions
+# to the registry.  Outputs the generated-file to 'filename'
+def generate_be_registry_init(filename):
+  cc_registry_file = open(filename, "w")
+  cc_registry_file.write(cc_registry_preamble)
+  
+  for fn in meta_data_entries:
+    entries = meta_data_entries[fn]
+    for entry in entries:
+      opcode = entry["opcode"]
+      be_fn = entry["be_fn"]
+      cc_registry_file.write("  this->Add(TExprOpcode::%s, %s);\n" % (opcode, be_fn))
+
+  cc_registry_file.write(cc_registry_epilogue)
+  cc_registry_file.close()
+
+# Generates the FE registry init file that registers all the functions.  This file
+# contains all the opcode->function signature mappings and all of the string->operator
+# mappings for sql functions
+def generate_fe_registry_init(filename):
+  java_registry_file = open(filename, "w")
+  java_registry_file.write(java_registry_preamble)
+
+  for fn in meta_data_entries:
+    entries = meta_data_entries[fn]
+    for entry in entries:
+      java_output = "FunctionOperator." + fn.upper()
+      java_output += ", TExprOpcode." + entry["opcode"]
+      java_output += ", PrimitiveType." + entry["ret_type"]
+      for arg in entry["args"]:
+        java_output += ", PrimitiveType." + arg
+      java_registry_file.write("    result &= registry.add(%s);\n" % java_output)
+  java_registry_file.write("\n")
+  
+  mappings = {}
+
+  for fn in meta_data_entries:
+    entries = meta_data_entries[fn]
+    for entry in entries:
+      for name in entry["sql_names"]:
+        if name in mappings:
+          if mappings[name] != fn.upper():
+            print "Invalid mapping \"%s\" -> FunctionOperator.%s." % (name, mappings[name])
+            print "There is already a mapping \"%s\" -> FunctionOperator.%s.\n" % (name, fn.upper())
+            sys.exit(1)
+          continue
+        mappings[name] = fn.upper()
+        java_output = "\"%s\", FunctionOperator.%s" % (name, fn.upper())
+        java_registry_file.write("    result &= registry.addFunctionMapping(%s);\n" % java_output)
+  java_registry_file.write("\n")
+
+  java_registry_file.write(java_registry_epilogue)
+  java_registry_file.close()
+
+# Read the function metadata inputs
+for function in impala_functions.functions:
+  if len(function) != 5:
+    print "Invalid function entry in impala_functions.py:\n\t" + repr(function)
+    sys.exit(1)
+  add_function(function)
+for function in generated_functions.functions:
+  if len(function) != 5:
+    print "Invalid function entry in generated_functions.py:\n\t" + repr(function)
+    sys.exit(1)
+  add_function(function)
+
+generate_opcodes()
+
+if not os.path.exists(BE_PATH):
+  os.makedirs(BE_PATH)
+if not os.path.exists(FE_PATH):
+  os.makedirs(FE_PATH)
+if not os.path.exists(THRIFT_PATH):
+  os.makedirs(THRIFT_PATH)
+
+generate_be_registry_init(BE_PATH + "opcode-registry-init.cc")
+generate_fe_registry_init(FE_PATH + "FunctionRegistry.java")
+
+# Output the opcodes to thrift
+thrift_file = open(THRIFT_PATH + "Opcodes.thrift", "w")
+thrift_file.write(thrift_preamble)
+for opcode in opcodes:
+  thrift_file.write("  %s,\n" % opcode)
+thrift_file.write(thrift_epilogue)
+thrift_file.close()
+
+# Output the operators to java
+operators.sort()
+operators.insert(0, "INVALID_OPERATOR")
+operator_java_file = open(FE_PATH + "FunctionOperator.java", "w")
+operator_java_file.write(operator_file_preamble)
+for op in operators:
+  operator_java_file.write("  %s,\n" % op)
+operator_java_file.write(operator_file_epilogue)
+operator_java_file.close()
+
--- a/common/impala_functions.py
+++ b/common/impala_functions.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+# This is a list of all the functions that are not auto-generated.
+# It contains all the meta data that describes the function.  The format is:
+# <function name>, <return_type>, [<args>], <backend function name>, [<sql function aliases>]
+# 
+# 'function name' is the base of what the opcode enum will be generated from.  It does not
+# have to be unique, the script will mangle the name with the signature if necessary.
+#
+# 'sql function aliases' are the function names that can be used from sql.  They are optional
+# and there can be multiple aliases for a function.
+#
+# This is combined with the list in generated_functions to code-gen the opcode
+# registry in the FE and BE.
+
+functions = [
+  ['Compound_And', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'CompoundPredicate::AndComputeFunction', []],
+  ['Compound_Or', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'CompoundPredicate::OrComputeFunction', []],
+  ['Compound_Not', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'CompoundPredicate::NotComputeFunction', []],
+  
+  ['Constant_Regex', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'LikePredicate::ConstantRegexFn', []],
+  ['Constant_Substring', 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'LikePredicate::ConstantSubstringFn', []],
+  ['Like', 'BOOLEAN', ['STRING', 'STRING'], 'LikePredicate::LikeFn', []],
+  ['Regex', 'BOOLEAN', ['STRING', 'STRING'], 'LikePredicate::RegexFn', []],
+
+  ['Math_Pi', 'DOUBLE', [], 'MathFunctions::Pi', ['pi']],
+
+  ['String_Substring', 'STRING', ['STRING', 'INT'], 'StringFunctions::Substring', ['substr', 'substring']],
+  ['String_Substring', 'STRING', ['STRING', 'INT', 'INT'], 'StringFunctions::Substring', ['substr', 'substring']],
+]
--- a/fe/.gitignore
+++ b/fe/.gitignore
@@ -22,6 +22,9 @@ src/test/resources/hive-site.xml
 # Generated hbase-site.xml file
 src/test/resources/hbase-site.xml

+# Generated thrift files
+src/main/thrift/Opcodes.thrift
+
 derby.log

 TempStatsStore
--- a/fe/.settings/org.eclipse.m2e.core.prefs
+++ b/fe/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,5 @@
+#Mon Nov 07 10:58:37 PST 2011
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
--- a/fe/src/main/cup/sql-parser.y
+++ b/fe/src/main/cup/sql-parser.y
@@ -138,7 +138,7 @@ terminal KW_AND, KW_AS, KW_ASC, KW_AVG, KW_BIGINT, KW_BOOLEAN, KW_BY,
  KW_RLIKE, KW_RIGHT, KW_SELECT, KW_SEMI, KW_SMALLINT, KW_STRING, KW_SUM,
  KW_TINYINT, KW_TRUE, KW_USING, KW_WHEN, KW_WHERE, KW_THEN, KW_TIMESTAMP,
  KW_INSERT, KW_INTO, KW_OVERWRITE, KW_TABLE, KW_PARTITION;
-terminal COMMA, DOT, STAR, LPAREN, RPAREN, DIVIDE, MOD, PLUS, MINUS;
+terminal COMMA, DOT, STAR, LPAREN, RPAREN, DIVIDE, MOD, ADD, SUBTRACT;
 terminal BITAND, BITOR, BITXOR, BITNOT;
 terminal EQUAL, NOT, LESSTHAN, GREATERTHAN;
 terminal String IDENT;
@@ -180,7 +180,7 @@ nonterminal TableRef table_ref;
 nonterminal JoinOperator join_operator;
 nonterminal opt_inner, opt_outer;
 nonterminal PrimitiveType primitive_type;
-nonterminal Expr minus_chain_expr;
+nonterminal Expr subtract_chain_expr;
 nonterminal BinaryPredicate.Operator binary_comparison_operator;
 nonterminal InsertStmt insert_stmt;
 nonterminal ArrayList<PartitionKeyValue> partition_clause;
@@ -192,7 +192,7 @@ precedence left KW_AND;
 precedence left KW_NOT;
 precedence left KW_LIKE, KW_RLIKE, KW_REGEXP;
 precedence left EQUAL, LESSTHAN, GREATERTHAN;
-precedence left PLUS, MINUS;
+precedence left ADD, SUBTRACT;
 precedence left STAR, DIVIDE, MOD, KW_DIV;
 precedence left BITAND, BITOR, BITXOR, BITNOT;
 precedence left RPAREN;
@@ -517,8 +517,8 @@ case_else_clause ::=
  {: RESULT = null; :}
  ;

-minus_chain_expr ::=    
-  MINUS expr:e
+subtract_chain_expr ::=    
+  SUBTRACT expr:e
  {:      
    // integrate signs into literals 
    if (e.isLiteral() && e.getType().isNumericType()) {
@@ -531,10 +531,12 @@ minus_chain_expr ::=
  ;

 expr ::=
-  minus_chain_expr:e
+  subtract_chain_expr:e
  {: RESULT = e; :}  
  | literal:l
  {: RESULT = l; :}
+  | IDENT:functionName LPAREN RPAREN
+  {: RESULT = new FunctionCallExpr(functionName, new ArrayList<Expr>()); :}
  | IDENT:functionName LPAREN expr_list:exprs RPAREN
  {: RESULT = new FunctionCallExpr(functionName, exprs); :}
  | cast_expr:c
@@ -560,10 +562,10 @@ arithmetic_expr ::=
  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MOD, e1, e2); :}
  | expr:e1 KW_DIV expr:e2
  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.INT_DIVIDE, e1, e2); :}
-  | expr:e1 PLUS expr:e2
-  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.PLUS, e1, e2); :}
-  | expr:e1 MINUS expr:e2
-  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MINUS, e1, e2); :}     
+  | expr:e1 ADD expr:e2
+  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.ADD, e1, e2); :}
+  | expr:e1 SUBTRACT expr:e2
+  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.SUBTRACT, e1, e2); :}     
  | expr:e1 BITAND expr:e2
  {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.BITAND, e1, e2); :}
  | expr:e1 BITOR expr:e2
--- a/fe/src/main/java/com/cloudera/impala/analysis/AggregateExpr.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/AggregateExpr.java
@@ -7,24 +7,24 @@ import java.util.List;
 import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.AnalysisException;
 import com.cloudera.impala.thrift.TAggregateExpr;
+import com.cloudera.impala.thrift.TAggregationOp;
 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
-import com.cloudera.impala.thrift.TExprOperator;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;

 public class AggregateExpr extends Expr {
  public enum Operator {
-    COUNT("COUNT", TExprOperator.AGG_COUNT),
-    MIN("MIN", TExprOperator.AGG_MIN),
-    MAX("MAX", TExprOperator.AGG_MAX),
-    SUM("SUM", TExprOperator.AGG_SUM),
-    AVG("AVG", TExprOperator.INVALID_OP);
+    COUNT("COUNT", TAggregationOp.COUNT),
+    MIN("MIN", TAggregationOp.MIN),
+    MAX("MAX", TAggregationOp.MAX),
+    SUM("SUM", TAggregationOp.SUM),
+    AVG("AVG", TAggregationOp.INVALID);

    private final String description;
-    private final TExprOperator thriftOp;
+    private final TAggregationOp thriftOp;

-    private Operator(String description, TExprOperator thriftOp) {
+    private Operator(String description, TAggregationOp thriftOp) {
      this.description = description;
      this.thriftOp = thriftOp;
    }
@@ -34,7 +34,7 @@ public class AggregateExpr extends Expr {
      return description;
    }

-    public TExprOperator toThrift() {
+    public TAggregationOp toThrift() {
      return thriftOp;
    }
  }
@@ -111,8 +111,7 @@ public class AggregateExpr extends Expr {
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.AGG_EXPR;
-    msg.op = op.toThrift();
-    msg.agg_expr = new TAggregateExpr(isStar, isDistinct);
+    msg.agg_expr = new TAggregateExpr(isStar, isDistinct, op.toThrift());
  }

  @Override
--- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java
@@ -12,8 +12,6 @@ import com.cloudera.impala.catalog.Column;
 import com.cloudera.impala.catalog.Db;
 import com.cloudera.impala.catalog.Table;
 import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.common.Pair;
-import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;

--- a/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java
@@ -4,34 +4,30 @@ package com.cloudera.impala.analysis;

 import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.AnalysisException;
+import com.cloudera.impala.opcode.FunctionOperator;
 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
-import com.cloudera.impala.thrift.TExprOperator;
 import com.google.common.base.Preconditions;

 public class ArithmeticExpr extends Expr {
  enum Operator {
-    MULTIPLY("*", TExprOperator.MULTIPLY),
-    DIVIDE("/", TExprOperator.DIVIDE),
-    MOD("%", TExprOperator.MOD),
-    INT_DIVIDE("DIV", TExprOperator.INT_DIVIDE),
-    PLUS("+", TExprOperator.PLUS),
-    MINUS("-", TExprOperator.MINUS),
-    BITAND("&", TExprOperator.BITAND),
-    BITOR("|", TExprOperator.BITOR),
-    BITXOR("^", TExprOperator.BITXOR),
-    BITNOT("~", TExprOperator.BITNOT);
+    MULTIPLY("*", FunctionOperator.MULTIPLY),
+    DIVIDE("/", FunctionOperator.DIVIDE),
+    MOD("%", FunctionOperator.MOD),
+    INT_DIVIDE("DIV", FunctionOperator.INT_DIVIDE),
+    ADD("+", FunctionOperator.ADD),
+    SUBTRACT("-", FunctionOperator.SUBTRACT),
+    BITAND("&", FunctionOperator.BITAND),
+    BITOR("|", FunctionOperator.BITOR),
+    BITXOR("^", FunctionOperator.BITXOR),
+    BITNOT("~", FunctionOperator.BITNOT);

    private final String description;
-    private final TExprOperator thriftOp;
+    private final FunctionOperator functionOp;

-    private Operator(String description, TExprOperator thriftOp) {
+    private Operator(String description, FunctionOperator thriftOp) {
      this.description = description;
-      this.thriftOp = thriftOp;
-    }
-
-    public boolean isBitwiseOperation() {
-      return this == BITAND || this == BITOR || this == BITXOR || this == BITNOT;
+      this.functionOp = thriftOp;
    }

    @Override
@@ -39,10 +35,11 @@ public class ArithmeticExpr extends Expr {
      return description;
    }

-    public TExprOperator toThrift() {
-      return thriftOp;
+    public FunctionOperator toFunctionOp() {
+      return functionOp;
    }
  }
+
  private final Operator op;

  public Operator getOp() {
@@ -74,7 +71,7 @@ public class ArithmeticExpr extends Expr {
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.ARITHMETIC_EXPR;
-    msg.op = op.toThrift();
+    msg.setOpcode(opcode);
  }

  @Override
@@ -82,7 +79,7 @@ public class ArithmeticExpr extends Expr {
    if (!super.equals(obj)) {
      return false;
    }
-    return ((ArithmeticExpr) obj).op == op;
+    return ((ArithmeticExpr) obj).opcode == opcode;
  }

  @Override
@@ -98,12 +95,15 @@ public class ArithmeticExpr extends Expr {

    // bitnot is the only unary op, deal with it here
    if (op == Operator.BITNOT) {
-      PrimitiveType childType = getChild(0).getType();
-      if (!childType.isFixedPointType()) {
+      type = getChild(0).getType();
+      OpcodeRegistry.Signature match =
+        OpcodeRegistry.instance().getFunctionInfo(op.functionOp, type);
+      if (match == null) {
        throw new AnalysisException("Bitwise operations only allowed on fixed-point types: "
            + toSql());
      }
-      type = childType;
+      Preconditions.checkState(type == match.returnType);
+      opcode = match.opcode;
      return;
    }

@@ -112,8 +112,8 @@ public class ArithmeticExpr extends Expr {

    switch (op) {
      case MULTIPLY:
-      case PLUS:
-      case MINUS:
+      case ADD:
+      case SUBTRACT:
        // numeric ops must be promoted to highest-resolution type
        // (otherwise we can't guarantee that a <op> b won't result in an overflow/underflow)
        type = PrimitiveType.getAssignmentCompatibleType(t1, t2).getMaxResolutionType();
@@ -134,8 +134,7 @@ public class ArithmeticExpr extends Expr {
              "Invalid floating point argument to operation " +
              op.toString() + ": " + this.toSql());
        }
-        type =
-          PrimitiveType.getAssignmentCompatibleType(t1, t2);
+        type = PrimitiveType.getAssignmentCompatibleType(t1, t2);
        // the result is always an integer
        Preconditions.checkState(type.isFixedPointType());
        break;
@@ -147,5 +146,8 @@ public class ArithmeticExpr extends Expr {
    }

    type = castBinaryOp(type);
+    OpcodeRegistry.Signature match =
+      OpcodeRegistry.instance().getFunctionInfo(op.toFunctionOp(), type, type);
+    this.opcode = match.opcode;
  }
 }
--- a/fe/src/main/java/com/cloudera/impala/analysis/BinaryPredicate.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/BinaryPredicate.java
@@ -4,9 +4,9 @@ package com.cloudera.impala.analysis;

 import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.AnalysisException;
+import com.cloudera.impala.opcode.FunctionOperator;
 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
-import com.cloudera.impala.thrift.TExprOperator;
 import com.google.common.base.Preconditions;

 /**
@@ -15,19 +15,19 @@ import com.google.common.base.Preconditions;
 */
 public class BinaryPredicate extends Predicate {
  public enum Operator {
-    EQ("=", TExprOperator.EQ),
-    NE("!=", TExprOperator.NE),
-    LE("<=", TExprOperator.LE),
-    GE(">=", TExprOperator.GE),
-    LT("<", TExprOperator.LT),
-    GT(">", TExprOperator.GT);
+    EQ("=", FunctionOperator.EQ),
+    NE("!=", FunctionOperator.NE),
+    LE("<=", FunctionOperator.LE),
+    GE(">=", FunctionOperator.GE),
+    LT("<", FunctionOperator.LT),
+    GT(">", FunctionOperator.GT);

    private final String description;
-    private final TExprOperator thriftOp;
+    private final FunctionOperator functionOp;

-    private Operator(String description, TExprOperator thriftOp) {
+    private Operator(String description, FunctionOperator functionOp) {
      this.description = description;
-      this.thriftOp = thriftOp;
+      this.functionOp = functionOp;
    }

    @Override
@@ -35,10 +35,11 @@ public class BinaryPredicate extends Predicate {
      return description;
    }

-    public TExprOperator toThrift() {
-      return thriftOp;
+    public FunctionOperator toFunctionOp() {
+      return functionOp;
    }
-  };
+  }
+
  private final Operator op;

  public Operator getOp() {
@@ -59,7 +60,7 @@ public class BinaryPredicate extends Predicate {
    if (!super.equals(obj)) {
      return false;
    }
-    return ((BinaryPredicate) obj).op == op;
+    return ((BinaryPredicate) obj).opcode == this.opcode;
  }

  @Override
@@ -70,7 +71,7 @@ public class BinaryPredicate extends Predicate {
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.BINARY_PRED;
-    msg.op = op.toThrift();
+    msg.setOpcode(opcode);
  }

  @Override
@@ -80,7 +81,7 @@ public class BinaryPredicate extends Predicate {
    PrimitiveType t1 = getChild(0).getType();
    PrimitiveType t2 = getChild(1).getType();
    PrimitiveType compatibleType = PrimitiveType.getAssignmentCompatibleType(t1, t2);
-  
+
    if (!compatibleType.isValid()) {
      // there is no type to which both are assignment-compatible -> we can't compare them
      throw new AnalysisException("operands are not comparable: " + this.toSql());
@@ -88,6 +89,12 @@ public class BinaryPredicate extends Predicate {

    // Ignore return value because type is always bool for predicates.
    castBinaryOp(compatibleType);
+
+    OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(
+        op.toFunctionOp(), compatibleType, compatibleType);
+    Preconditions.checkState(match != null);
+    Preconditions.checkState(match.returnType == PrimitiveType.BOOLEAN);
+    this.opcode = match.opcode;
  }

  /**
--- a/fe/src/main/java/com/cloudera/impala/analysis/CastExpr.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/CastExpr.java
@@ -4,11 +4,13 @@ package com.cloudera.impala.analysis;

 import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.AnalysisException;
+import com.cloudera.impala.opcode.FunctionOperator;
 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
 import com.google.common.base.Preconditions;

 public class CastExpr extends Expr {
+
  private final PrimitiveType targetType;
  /** true if this is a "pre-analyzed" implicit cast */
  private final boolean isImplicit;
@@ -22,6 +24,11 @@ public class CastExpr extends Expr {
    children.add(e);
    if (isImplicit) {
      type = targetType;
+      OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(
+          FunctionOperator.CAST, getChild(0).getType(), type);
+      Preconditions.checkState(match != null);
+      Preconditions.checkState(match.returnType == type);
+      this.opcode = match.opcode;
    }
  }

@@ -36,26 +43,34 @@ public class CastExpr extends Expr {
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.CAST_EXPR;
+    msg.setOpcode(opcode);
  }

  @Override
  public void analyze(Analyzer analyzer) throws AnalysisException {
    super.analyze(analyzer);

-    if (!isImplicit) {
-      // cast was asked for in the query, check for validity of cast
-      PrimitiveType childType = getChild(0).getType();
-      PrimitiveType resultType =
-        PrimitiveType.getAssignmentCompatibleType(childType, targetType);
-
-      if (!resultType.isValid()) {
-        throw new AnalysisException("Invalid type cast from: " + childType.toString() +
-            " to " + targetType);
-      }
-
-      // this cast may result in loss of precision, but the user requested it
-      this.type = targetType;
+    if (isImplicit) {
+      return;
    }
+
+    // cast was asked for in the query, check for validity of cast
+    PrimitiveType childType = getChild(0).getType();
+    PrimitiveType resultType =
+      PrimitiveType.getAssignmentCompatibleType(childType, targetType);
+
+    if (!resultType.isValid()) {
+      throw new AnalysisException("Invalid type cast of " + getChild(0).toSql() +
+          " from " + childType + " to " + targetType);
+    }
+
+    // this cast may result in loss of precision, but the user requested it
+    this.type = targetType;
+    OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(
+        FunctionOperator.CAST, getChild(0).getType(), type);
+    Preconditions.checkState(match != null);
+    Preconditions.checkState(match.returnType == type);
+    this.opcode = match.opcode;
  }

  @Override
@@ -64,6 +79,6 @@ public class CastExpr extends Expr {
      return false;
    }
    CastExpr expr = (CastExpr) obj;
-    return targetType == expr.targetType;
+    return this.opcode == expr.opcode;
  }
 }
--- a/fe/src/main/java/com/cloudera/impala/analysis/CompoundPredicate.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/CompoundPredicate.java
@@ -4,7 +4,7 @@ package com.cloudera.impala.analysis;

 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
-import com.cloudera.impala.thrift.TExprOperator;
+import com.cloudera.impala.thrift.TExprOpcode;
 import com.google.common.base.Preconditions;

 /**
@@ -13,14 +13,14 @@ import com.google.common.base.Preconditions;
 */
 public class CompoundPredicate extends Predicate {
  public enum Operator {
-    AND("AND", TExprOperator.AND),
-    OR("OR", TExprOperator.OR),
-    NOT("NOT", TExprOperator.NOT);
+    AND("AND", TExprOpcode.COMPOUND_AND),
+    OR("OR", TExprOpcode.COMPOUND_OR),
+    NOT("NOT", TExprOpcode.COMPOUND_NOT);

    private final String description;
-    private final TExprOperator thriftOp;
+    private final TExprOpcode thriftOp;

-    private Operator(String description, TExprOperator thriftOp) {
+    private Operator(String description, TExprOpcode thriftOp) {
      this.description = description;
      this.thriftOp = thriftOp;
    }
@@ -30,7 +30,7 @@ public class CompoundPredicate extends Predicate {
      return description;
    }

-    public TExprOperator toThrift() {
+    public TExprOpcode toThrift() {
      return thriftOp;
    }
  }
@@ -73,6 +73,6 @@ public class CompoundPredicate extends Predicate {
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.COMPOUND_PRED;
-    msg.op = op.toThrift();
+    msg.setOpcode(op.toThrift());
  }
 }
--- a/fe/src/main/java/com/cloudera/impala/analysis/Expr.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/Expr.java
@@ -14,6 +14,7 @@ import com.cloudera.impala.common.AnalysisException;
 import com.cloudera.impala.common.TreeNode;
 import com.cloudera.impala.thrift.TExpr;
 import com.cloudera.impala.thrift.TExprNode;
+import com.cloudera.impala.thrift.TExprOpcode;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -25,16 +26,22 @@ import com.google.common.collect.Lists;
 abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneable {
  protected PrimitiveType type;  // result of analysis
  protected boolean isAnalyzed;  // true after analyze() has been called
+  protected TExprOpcode opcode;  // opcode for this expr

  protected Expr() {
    super();
    type = PrimitiveType.INVALID_TYPE;
+    opcode = TExprOpcode.INVALID_OPCODE;
  }

  public PrimitiveType getType() {
    return type;
  }

+  public TExprOpcode getOpcode() {
+    return opcode;
+  }
+
  /* Perform semantic analysis of node and all of its children.
   * Throws exception if any errors found.
   * @see com.cloudera.impala.parser.ParseNode#analyze(com.cloudera.impala.parser.Analyzer)
--- a/fe/src/main/java/com/cloudera/impala/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/FunctionCallExpr.java
@@ -4,7 +4,9 @@ package com.cloudera.impala.analysis;

 import java.util.List;

+import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.AnalysisException;
+import com.cloudera.impala.opcode.FunctionOperator;
 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
 import com.google.common.base.Joiner;
@@ -14,7 +16,7 @@ public class FunctionCallExpr extends Expr {

  public FunctionCallExpr(String functionName, List<Expr> params) {
    super();
-    this.functionName = functionName;
+    this.functionName = functionName.toLowerCase();
    children.addAll(params);
  }

@@ -23,7 +25,7 @@ public class FunctionCallExpr extends Expr {
    if (!super.equals(obj)) {
      return false;
    }
-    return ((FunctionCallExpr) obj).functionName.equals(functionName);
+    return ((FunctionCallExpr) obj).opcode == this.opcode;
  }

  @Override
@@ -31,16 +33,38 @@ public class FunctionCallExpr extends Expr {
    return functionName + "(" + Joiner.on(", ").join(childrenToSql()) + ")";
  }

-  // TODO: we need to encode the actual function opcodes;
-  // this ties in with replacing TExpr.op with an opcode
-  // that resolves to a single compute function for the backend
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.FUNCTION_CALL;
+    msg.setOpcode(opcode);
  }

  @Override
  public void analyze(Analyzer analyzer) throws AnalysisException {
-    throw new AnalysisException("CAST not supported");
+    FunctionOperator op = OpcodeRegistry.instance().getFunctionOperator(functionName);
+    if (op == FunctionOperator.INVALID_OPERATOR) {
+      throw new AnalysisException(functionName + " unknown");
+    }
+
+    PrimitiveType[] argTypes = new PrimitiveType[this.children.size()];
+    for (int i = 0; i < this.children.size(); ++i) {
+      argTypes[i] = this.children.get(i).getType();
+    }
+    OpcodeRegistry.Signature match =
+      OpcodeRegistry.instance().getFunctionInfo(op, argTypes);
+    if (match == null) {
+      String error = "No matching function with those arguments: " + functionName
+        + Joiner.on(", ").join(argTypes) + ")";
+      throw new AnalysisException(error);
+    }
+    this.opcode = match.opcode;
+    this.type = match.returnType;
+
+    // Implicitly cast all the children to match the function if necessary
+    for (int i = 0; i < argTypes.length; ++i) {
+      if (argTypes[i] != match.argTypes[i]) {
+        castChild(match.argTypes[i], i);
+      }
+    }
  }
 }
--- a/fe/src/main/java/com/cloudera/impala/analysis/LikePredicate.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/LikePredicate.java
@@ -9,20 +9,20 @@ import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.AnalysisException;
 import com.cloudera.impala.thrift.TExprNode;
 import com.cloudera.impala.thrift.TExprNodeType;
-import com.cloudera.impala.thrift.TExprOperator;
+import com.cloudera.impala.thrift.TExprOpcode;
 import com.cloudera.impala.thrift.TLikePredicate;
 import com.google.common.base.Preconditions;

 public class LikePredicate extends Predicate {
  enum Operator {
-    LIKE("LIKE", TExprOperator.LIKE),
-    RLIKE("RLIKE", TExprOperator.REGEXP),
-    REGEXP("REGEXP", TExprOperator.REGEXP);
+    LIKE("LIKE", TExprOpcode.LIKE),
+    RLIKE("RLIKE", TExprOpcode.REGEX),
+    REGEXP("REGEXP", TExprOpcode.REGEX);

    private final String description;
-    private final TExprOperator thriftOp;
+    private final TExprOpcode thriftOp;

-    private Operator(String description, TExprOperator thriftOp) {
+    private Operator(String description, TExprOpcode thriftOp) {
      this.description = description;
      this.thriftOp = thriftOp;
    }
@@ -32,7 +32,7 @@ public class LikePredicate extends Predicate {
      return description;
    }

-    public TExprOperator toThrift() {
+    public TExprOpcode toThrift() {
      return thriftOp;
    }
  }
@@ -63,7 +63,7 @@ public class LikePredicate extends Predicate {
  @Override
  protected void toThrift(TExprNode msg) {
    msg.node_type = TExprNodeType.LIKE_PRED;
-    msg.op = op.toThrift();
+    msg.setOpcode(op.toThrift());
    msg.like_pred = new TLikePredicate("\\");
  }

--- a/fe/src/main/java/com/cloudera/impala/analysis/OpcodeRegistry.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/OpcodeRegistry.java
@@ -0,0 +1,211 @@
+package com.cloudera.impala.analysis;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.cloudera.impala.catalog.PrimitiveType;
+import com.cloudera.impala.common.Pair;
+import com.cloudera.impala.opcode.FunctionOperator;
+import com.cloudera.impala.opcode.FunctionRegistry;
+import com.cloudera.impala.thrift.TExprOpcode;
+import com.google.common.collect.Maps;
+
+/**
+ * The OpcodeRegistry provides a mapping between function signatures and opcodes. The
+ * supported functions are code-gen'ed and added to the registry with an assigned opcode.
+ * The opcode is shared with the backend.  The frontend can use the registry to look up
+ * a function's opcode.
+ *
+ * The OpcodeRegistry also contains a mapping between function names (as strings) to
+ * operators.
+ *
+ * The OpcodeRegistry is a singleton.
+ *
+ * TODO: The opcode registry should be versioned in the FE/BE.
+ */
+public class OpcodeRegistry {
+
+  private final static Logger LOG = LoggerFactory.getLogger(OpcodeRegistry.class);
+  private static OpcodeRegistry instance = new OpcodeRegistry();
+
+  /**
+   * This is a mapping of Operator,#args to signatures.  The signature is defined by
+   * the operator enum and the arguments and is a one to one mapping to opcodes.
+   * The map is structured this way to more efficiently look for signature matches.
+   * Signatures that have the same number of arguments have a potential to be matches
+   * by allowing types to be implicitly cast.
+   */
+  private final Map<Pair<FunctionOperator, Integer>, List<Signature>> operations;
+
+  /**
+   * This contains a mapping of function names to a FunctionOperator enum.  This is used
+   * by FunctionCallExpr to go from the parser input to function opcodes.
+   * This is a many to one mapping (i.e. substr and substring both map to the same
+   * operation).
+   * The mappings are filled in in FunctionRegistry.java which is auto-generated.
+   */
+  private final HashMap<String, FunctionOperator> functionNameMap;
+
+  // Singleton interface
+  public static OpcodeRegistry instance() {
+    return instance;
+  }
+
+  /**
+   * Static utility functions
+   */
+  public static boolean isBitwiseOperation(FunctionOperator operator) {
+    return operator == FunctionOperator.BITAND || operator == FunctionOperator.BITNOT ||
+           operator == FunctionOperator.BITOR || operator == FunctionOperator.BITXOR;
+  }
+
+  /**
+   * Contains all the information about a function signature.
+   */
+  public static class Signature {
+    public TExprOpcode opcode;
+    public FunctionOperator operator;
+    public PrimitiveType returnType;
+    public PrimitiveType argTypes[];
+
+    // Constructor for searching, specifying the op and arguments
+    public Signature(FunctionOperator operator, PrimitiveType[] args) {
+      this.operator = operator;
+      this.argTypes = args;
+    }
+
+    private Signature(TExprOpcode opcode, FunctionOperator operator,
+        PrimitiveType ret, PrimitiveType[] args) {
+      this.operator = operator;
+      this.opcode = opcode;
+      this.returnType = ret;
+      this.argTypes = args;
+    }
+
+    /**
+     * Returns if the 'this' signature is compatible with the 'other' signature. The op
+     * and number of arguments must match and it must be allowed to implicitly cast
+     * each argument of this signature to the matching argument in 'other'
+     */
+    public boolean isCompatible(Signature other) {
+      if (other.argTypes.length != this.argTypes.length) {
+        return false;
+      }
+      for (int i = 0; i < this.argTypes.length; ++i) {
+        if (!PrimitiveType.isImplicitlyCastable(this.argTypes[i], other.argTypes[i])) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    @Override
+    /**
+     * Signature are equal with C++/Java function signature semantics.  They are
+     * equal if the operation and all the arguments are the same.
+     */
+    public boolean equals(Object o) {
+      if (o == null || !(o instanceof Signature)) {
+        return false;
+      }
+      Signature s = (Signature) o;
+      if (s.argTypes.length != this.argTypes.length) {
+        return false;
+      }
+      if (s.argTypes == null && this.argTypes == null) {
+        return true;
+      }
+
+      for (int i = 0; i < this.argTypes.length; ++i) {
+        if (s.argTypes[i] != this.argTypes[i]) {
+          return false;
+        }
+      }
+      return true;
+    }
+  }
+
+  /**
+   * Returns the function operator enum.  The lookup is case insensitive.
+   * (i.e. "Substring" --> TExprOperator.STRING_SUBSTR).
+   * Returns INVALID_OP is that function name is unknown.
+   */
+  public FunctionOperator getFunctionOperator(String fnName) {
+    String lookup = fnName.toLowerCase();
+    if (functionNameMap.containsKey(lookup)) {
+      return functionNameMap.get(lookup);
+    }
+    return FunctionOperator.INVALID_OPERATOR;
+  }
+
+  /**
+   * Query for a function in the registry, specifying the operation, 'op', and the arguments.
+   * If there is no matching signature, null will be returned.
+   * If there is a match, the matching signature will be returned.  The matching signature does
+   * not have to match the input identically, implicit type promotion is allowed.
+   */
+  public Signature getFunctionInfo(FunctionOperator op, PrimitiveType ... argTypes) {
+    Pair<FunctionOperator, Integer> lookup = Pair.create(op, argTypes.length);
+    if (operations.containsKey(lookup)) {
+      List<Signature> signatures = operations.get(lookup);
+      Signature compatibleMatch = null;
+      Signature search = new Signature(op, argTypes);
+      for (Signature signature : signatures) {
+        if (search.equals(signature)) {
+          return signature;
+        } else if (compatibleMatch == null && search.isCompatible(signature)) {
+          compatibleMatch = signature;
+        }
+      }
+      return compatibleMatch;
+    }
+    return null;
+  }
+
+  /**
+   * Add a function with the specified opcode/signature to the registry.
+   */
+  public boolean add(FunctionOperator op, TExprOpcode opcode, PrimitiveType retType, PrimitiveType ... args) {
+    List<Signature> signatures;
+    Pair<FunctionOperator, Integer> lookup = Pair.create(op, args.length);
+    if (operations.containsKey(lookup)) {
+      signatures = operations.get(lookup);
+    } else {
+      signatures = new ArrayList<Signature>();
+      operations.put(lookup, signatures);
+    }
+
+    Signature signature = new Signature(opcode, op, retType, args);
+    if (signatures.contains(signature)) {
+      LOG.error("OpcodeRegistry: Function already exists: " + opcode);
+      return false;
+    }
+    signatures.add(signature);
+
+    return true;
+  }
+
+  public boolean addFunctionMapping(String functionName, FunctionOperator op) {
+    if (functionNameMap.containsKey(functionName)) {
+      LOG.error("OpcodeRegistry: Function mapping already exists: " + functionName);
+      return false;
+    }
+    functionNameMap.put(functionName, op);
+    return true;
+  }
+
+  // Singleton interface, don't call the constructor
+  private OpcodeRegistry() {
+    operations = Maps.newHashMap();
+    functionNameMap = Maps.newHashMap();
+
+    // Add all the function signatures to the registry and the function name(string)
+    // to FunctionOperator mapping
+    FunctionRegistry.InitFunctions(this);
+  }
+}
--- a/fe/src/main/java/com/cloudera/impala/analysis/StringLiteral.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/StringLiteral.java
@@ -78,7 +78,7 @@ public class StringLiteral extends LiteralExpr {
      // Currently we can't handle string literals containing full fledged expressions
      // which are implicitly cast to a numeric literal. This would require invoking the parser.
      sym = scanner.next_token();
-      while (sym.sym == SqlParserSymbols.MINUS) {
+      while (sym.sym == SqlParserSymbols.SUBTRACT) {
        multiplier *= -1;
        sym = scanner.next_token();
      }
--- a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
@@ -4,7 +4,6 @@ package com.cloudera.impala.analysis;

 import java.util.List;

-import com.cloudera.impala.catalog.Catalog;
 import com.cloudera.impala.catalog.Table;
 import com.cloudera.impala.common.AnalysisException;
 import com.google.common.base.Joiner;
--- a/fe/src/main/java/com/cloudera/impala/catalog/PrimitiveType.java
+++ b/fe/src/main/java/com/cloudera/impala/catalog/PrimitiveType.java
@@ -235,6 +235,14 @@ public enum PrimitiveType {
    return result;
  }

+  /**
+   * Returns if it is compatible to implicitly cast from t1 to t2 (casting from
+   * t1 to t2 results in no loss of precision.
+   */
+  public static boolean isImplicitlyCastable(PrimitiveType t1, PrimitiveType t2) {
+      return getAssignmentCompatibleType(t1, t2) == t2;
+  }
+
  // Returns the highest resolution type
  // corresponding to the lexer symbol of numeric literals.
  // Currently used to determine whether the literal is fixed or floating point.
--- a/fe/src/main/java/com/cloudera/impala/common/Pair.java
+++ b/fe/src/main/java/com/cloudera/impala/common/Pair.java
@@ -14,6 +14,26 @@ public class Pair<F, S> {
    this.second = second;
  }

+  @Override
+  /**
+   * A pair is equal if both parts are equal().
+   */
+  public boolean equals(Object o) {
+    if (o instanceof Pair) {
+      Pair<F,S> other = (Pair<F,S>) o;
+      return this.first.equals(other.first) && this.second.equals(other.second);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    int hashFirst = first != null ? first.hashCode() : 0;
+    int hashSecond = second != null ? second.hashCode() : 0;
+
+    return (hashFirst + hashSecond) * hashSecond + hashFirst;
+  }
+
  static public <F, S> Pair<F, S> create(F first, S second) {
    return new Pair<F, S>(first, second);
  }
--- a/fe/src/main/java/com/cloudera/impala/planner/HBaseScanNode.java
+++ b/fe/src/main/java/com/cloudera/impala/planner/HBaseScanNode.java
@@ -17,7 +17,6 @@ import com.cloudera.impala.analysis.Predicate;
 import com.cloudera.impala.analysis.SlotDescriptor;
 import com.cloudera.impala.analysis.StringLiteral;
 import com.cloudera.impala.analysis.TupleDescriptor;
-import com.cloudera.impala.analysis.BinaryPredicate.Operator;
 import com.cloudera.impala.catalog.HBaseColumn;
 import com.cloudera.impala.catalog.HBaseTable;
 import com.cloudera.impala.catalog.PrimitiveType;
@@ -214,7 +213,7 @@ public class HBaseScanNode extends ScanNode {
    return result.toString();
  }

-  private static CompareFilter.CompareOp impalaOpToHBaseOp(Operator impalaOp) {
+  private static CompareFilter.CompareOp impalaOpToHBaseOp(BinaryPredicate.Operator impalaOp) {
    switch(impalaOp) {
      case EQ: return CompareFilter.CompareOp.EQUAL;
      case NE: return CompareFilter.CompareOp.NOT_EQUAL;
--- a/fe/src/main/java/com/cloudera/impala/service/Executor.java
+++ b/fe/src/main/java/com/cloudera/impala/service/Executor.java
@@ -36,7 +36,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;

 public class Executor {
-  private final static Logger LOG = LoggerFactory.getLogger(ValueRange.class);
+  private final static Logger LOG = LoggerFactory.getLogger(Executor.class);

  public static final boolean DEFAULT_ABORT_ON_ERROR = false;
  public static final int DEFAULT_MAX_ERRORS = 100;
--- a/fe/src/main/jflex/sql-scanner.flex
+++ b/fe/src/main/jflex/sql-scanner.flex
@@ -116,7 +116,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols;
    tokenIdMap.put(new Integer(SqlParserSymbols.UNMATCHED_STRING_LITERAL),
        "UNMATCHED STRING LITERAL");
    tokenIdMap.put(new Integer(SqlParserSymbols.MOD), "%");
-    tokenIdMap.put(new Integer(SqlParserSymbols.PLUS), "+");
+    tokenIdMap.put(new Integer(SqlParserSymbols.ADD), "+");
    tokenIdMap.put(new Integer(SqlParserSymbols.DIVIDE), "/");
    tokenIdMap.put(new Integer(SqlParserSymbols.EQUAL), "=");
    tokenIdMap.put(new Integer(SqlParserSymbols.STAR), "*");
@@ -124,7 +124,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols;
    tokenIdMap.put(new Integer(SqlParserSymbols.DOT), ".");
    tokenIdMap.put(new Integer(SqlParserSymbols.STRING_LITERAL), "STRING LITERAL");
    tokenIdMap.put(new Integer(SqlParserSymbols.EOF), "EOF");
-    tokenIdMap.put(new Integer(SqlParserSymbols.MINUS), "-");
+    tokenIdMap.put(new Integer(SqlParserSymbols.SUBTRACT), "-");
    tokenIdMap.put(new Integer(SqlParserSymbols.BITAND), "&");
    tokenIdMap.put(new Integer(SqlParserSymbols.error), "ERROR");
    tokenIdMap.put(new Integer(SqlParserSymbols.BITXOR), "^");
@@ -171,8 +171,8 @@ EndOfLineComment = "--" {NonTerminator}* {LineTerminator}?
 ")" { return newToken(SqlParserSymbols.RPAREN, null); }
 "/" { return newToken(SqlParserSymbols.DIVIDE, null); }
 "%" { return newToken(SqlParserSymbols.MOD, null); }
-"+" { return newToken(SqlParserSymbols.PLUS, null); }
-"-" { return newToken(SqlParserSymbols.MINUS, null); }
+"+" { return newToken(SqlParserSymbols.ADD, null); }
+"-" { return newToken(SqlParserSymbols.SUBTRACT, null); }
 "&" { return newToken(SqlParserSymbols.BITAND, null); }
 "|" { return newToken(SqlParserSymbols.BITOR, null); }
 "^" { return newToken(SqlParserSymbols.BITXOR, null); }
--- a/fe/src/main/thrift/Exprs.thrift
+++ b/fe/src/main/thrift/Exprs.thrift
@@ -4,6 +4,7 @@ namespace cpp impala
 namespace java com.cloudera.impala.thrift

 include "Types.thrift"
+include "Opcodes.thrift"

 enum TExprNodeType {
  AGG_EXPR,
@@ -25,53 +26,18 @@ enum TExprNodeType {
  STRING_LITERAL,
 }

-// op-codes for all expr operators
-enum TExprOperator {
-  INVALID_OP,
-
-  // AggregateExpr
-  AGG_COUNT,
-  AGG_MIN,
-  AGG_MAX,
-  AGG_SUM,
-  // AGG_AVG is not executable
-
-  // ArithmeticExpr
-  MULTIPLY,
-  DIVIDE,
-  MOD,
-  INT_DIVIDE,
-  PLUS,
-  MINUS,
-  BITAND,
-  BITOR,
-  BITXOR,
-  BITNOT,
-
-  // BinaryPredicate
-  EQ,
-  NE,
-  LE,
-  GE,
-  LT,
-  GT,
-
-  // CompoundPredicate
-  AND,
-  OR,
-  NOT,
-
-  // LIKE predicate
-  LIKE,
-  REGEXP,
-
-  // function opcodes
-
+enum TAggregationOp {
+  INVALID,
+  COUNT,
+  MAX,
+  MIN,
+  SUM,
 }

 struct TAggregateExpr {
  1: required bool is_star
  2: required bool is_distinct
+  3: required TAggregationOp op
 }

 struct TBoolLiteral {
@@ -120,7 +86,7 @@ struct TStringLiteral {
 struct TExprNode {
  1: required TExprNodeType node_type
  2: required Types.TPrimitiveType type
-  3: optional TExprOperator op
+  3: optional Opcodes.TExprOpcode opcode
  4: required i32 num_children

  5: optional TAggregateExpr agg_expr
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java
@@ -664,13 +664,13 @@ public class AnalyzerTest {
        PrimitiveType promotedType = compatibleType.getMaxResolutionType();

        // +, -, *
-        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.PLUS, null,
+        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.ADD, null,
                      promotedType);
-        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.PLUS, null,
+        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.ADD, null,
                      promotedType);
-        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MINUS, null,
+        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.SUBTRACT, null,
                      promotedType);
-        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MINUS, null,
+        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.SUBTRACT, null,
                      promotedType);
        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MULTIPLY, null,
                      promotedType);
@@ -683,16 +683,14 @@ public class AnalyzerTest {
        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.DIVIDE, null,
                      PrimitiveType.DOUBLE);

-        // %
-        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MOD, null,
-                      compatibleType);
-        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MOD, null,
-                      compatibleType);
-
-        // div, &, |, ^ only for fixed-point types
+        // % div, &, |, ^ only for fixed-point types
        if (!type1.isFixedPointType() || !type2.isFixedPointType()) {
          continue;
        }
+        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MOD, null,
+            compatibleType);
+        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MOD, null,
+            compatibleType);
        typeCastTest(type1, type2, false, ArithmeticExpr.Operator.INT_DIVIDE, null,
                      compatibleType);
        typeCastTest(type1, type2, true, ArithmeticExpr.Operator.INT_DIVIDE, null,
--- a/fe/src/test/java/com/cloudera/impala/service/ExecutorTest.java
+++ b/fe/src/test/java/com/cloudera/impala/service/ExecutorTest.java
@@ -54,6 +54,8 @@ public class ExecutorTest {

  @Test
  public void runTest() throws ImpalaException {
+    runTestSuccess("select substring(\"Hello World\", 0)", 1);
+    runTestSuccess("select int_col+bigint_col from alltypessmall limit 1", 1);
    runTestSuccess("select year, tinyint_col, int_col, id from alltypessmall", 100);
    runTestSuccess("select sum(double_col), count(double_col), avg(double_col) " +
                   "from alltypessmall", 1);