IMPALA-10851: Codegen for structs

IMPALA-9495 added support for struct types in SELECT lists but only with codegen turned off. This commit implements codegen for struct types. To facilitate this, code generation for reading and writing 'AnyVal's has been refactored. A new class, 'CodegenAnyValReadWriteInfo' is introduced. This class is an interface between sources and destinations, one of which is an 'AnyVal' object: sources generate an instance of this class and destinations take that instance and use it to write the value. The other side can for example be tuples from which we read (in the case of 'SlotRef') or tuples we write into (in case of materialisation, see Tuple::CodegenMaterializeExprs()). The main advantage is that sources do not have to know how to write their destinations, only how to read the values (and vice versa). Before this change, many tests that involve structs ran only with codegen turned off. Now that codegen is supported in these cases, these tests are also run with codegen on. Testing: - enabed tests for structs in the select list with codegen on in tests/query_test/test_nested_types.py - enabled codegen in other tests where it used to be disabled because it was not supported. Change-Id: I5272c3f095fd9f07877104ee03c8e43d0c4ec0b6 Reviewed-on: http://gerrit.cloudera.org:8080/18526 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-12-25 02:03:09 -05:00 · 2022-04-27 16:23:29 +02:00
parent 175dae33d9
commit b73847f178
41 changed files with 2234 additions and 823 deletions
--- a/be/src/codegen/CMakeLists.txt
+++ b/be/src/codegen/CMakeLists.txt
@@ -28,6 +28,7 @@ set(LEGACY_AVX_IR_C_FILE $ENV{IMPALA_HOME}/be/generated-sources/impala-ir/impala

 add_library(CodeGen
  codegen-anyval.cc
+  codegen-anyval-read-write-info.cc
  codegen-callgraph.cc
  codegen-symbol-emitter.cc
  codegen-util.cc
--- a/be/src/codegen/codegen-anyval-read-write-info.cc
+++ b/be/src/codegen/codegen-anyval-read-write-info.cc
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "codegen/codegen-anyval-read-write-info.h"
+
+#include "codegen/codegen-anyval.h"
+#include "codegen/llvm-codegen.h"
+
+namespace impala {
+
+void NonWritableBasicBlock::BranchTo(LlvmBuilder* builder) const {
+  DCHECK(builder != nullptr);
+  builder->CreateBr(basic_block_);
+}
+
+void NonWritableBasicBlock::BranchToIf(LlvmBuilder* builder,
+    llvm::Value* condition, const NonWritableBasicBlock& else_block) const {
+  DCHECK(builder != nullptr);
+  builder->CreateCondBr(condition, basic_block_, else_block.basic_block_);
+}
+
+void NonWritableBasicBlock::BranchToIfNot(LlvmBuilder* builder,
+    llvm::Value* condition, const NonWritableBasicBlock& then_block) const {
+  DCHECK(builder != nullptr);
+  builder->CreateCondBr(condition, then_block.basic_block_, basic_block_);
+}
+
+llvm::Value* CodegenAnyValReadWriteInfo::GetSimpleVal() const {
+  llvm::Value* const * val = std::get_if<llvm::Value*>(&data_);
+  DCHECK(val != nullptr);
+  return *val;
+}
+
+const CodegenAnyValReadWriteInfo::PtrLenStruct& CodegenAnyValReadWriteInfo::GetPtrAndLen()
+    const {
+  const PtrLenStruct* ptr_len_struct = std::get_if<PtrLenStruct>(&data_);
+  DCHECK(ptr_len_struct != nullptr);
+  return *ptr_len_struct;
+}
+
+const CodegenAnyValReadWriteInfo::TimestampStruct&
+    CodegenAnyValReadWriteInfo::GetTimeAndDate() const {
+  const TimestampStruct* timestamp_struct = std::get_if<TimestampStruct>(&data_);
+  DCHECK(timestamp_struct != nullptr);
+  return *timestamp_struct;
+}
+
+void CodegenAnyValReadWriteInfo::SetSimpleVal(llvm::Value* val) {
+  DCHECK(val != nullptr);
+  DCHECK(!is_data_initialized() || holds_simple_val());
+
+  data_ = val;
+}
+
+void CodegenAnyValReadWriteInfo::SetPtrAndLen(llvm::Value* ptr, llvm::Value* len) {
+  DCHECK(ptr != nullptr);
+  DCHECK(len != nullptr);
+  DCHECK(!is_data_initialized() || holds_ptr_and_len());
+
+  PtrLenStruct val;
+  val.ptr = ptr;
+  val.len = len;
+  data_ = val;
+}
+
+void CodegenAnyValReadWriteInfo::SetTimeAndDate(llvm::Value* time_of_day,
+    llvm::Value* date) {
+  DCHECK(time_of_day != nullptr);
+  DCHECK(date != nullptr);
+  DCHECK(!is_data_initialized() || holds_timestamp());
+
+  TimestampStruct val;
+  val.time_of_day = time_of_day;
+  val.date = date;
+  data_ = val;
+}
+
+void CodegenAnyValReadWriteInfo::SetEval(llvm::Value* eval) {
+  DCHECK(eval != nullptr);
+  eval_ = eval;
+}
+
+void CodegenAnyValReadWriteInfo::SetFnCtxIdx(int fn_ctx_idx) {
+  DCHECK_GE(fn_ctx_idx, -1);
+  fn_ctx_idx_ = fn_ctx_idx;
+}
+
+void CodegenAnyValReadWriteInfo::SetBlocks(llvm::BasicBlock* entry_block,
+    llvm::BasicBlock* null_block, llvm::BasicBlock* non_null_block) {
+  DCHECK(entry_block != nullptr);
+  DCHECK(null_block != nullptr);
+  DCHECK(non_null_block != nullptr);
+
+  entry_block_ = entry_block;
+  null_block_ = null_block;
+  non_null_block_ = non_null_block;
+}
+
+llvm::ConstantStruct* CodegenAnyValReadWriteInfo::GetIrType() const {
+  // Delete the vectors in 'type_copy' because they are not used here and because they
+  // cannot be converted to IR.
+  // TODO IMPALA-11643: Revisit this.
+  ColumnType type_copy = type_;
+  type_copy.children.clear();
+  type_copy.field_names.clear();
+  type_copy.field_ids.clear();
+  return type_copy.ToIR(codegen_);
+}
+
+void CodegenAnyValReadWriteInfo::CodegenConvertToCanonicalForm() {
+  switch(type_.type) {
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE: {
+      llvm::Value* new_val = CodegenAnyVal::ConvertToCanonicalForm(codegen_, builder_,
+          type_, GetSimpleVal());
+      SetSimpleVal(new_val);
+    }
+    default:
+      ;
+  }
+}
+
+llvm::Value* CodegenAnyValReadWriteInfo::CodegenGetFnCtx() const {
+  llvm::Function* const get_func_ctx_fn =
+      codegen_->GetFunction(IRFunction::GET_FUNCTION_CTX, false);
+  return builder_->CreateCall(get_func_ctx_fn,
+      {eval_, codegen_->GetI32Constant(fn_ctx_idx_)}, "fn_ctx");
+}
+
+llvm::PHINode* CodegenAnyValReadWriteInfo::CodegenNullPhiNode(llvm::Value* non_null_value,
+    llvm::Value* null_value, std::string name) {
+  return LlvmCodeGen::CreateBinaryPhiNode(builder_, non_null_value, null_value,
+      non_null_block_, null_block_);
+}
+
+llvm::PHINode* CodegenAnyValReadWriteInfo::CodegenIsNullPhiNode(std::string name) {
+  return CodegenNullPhiNode(codegen_->false_value(), codegen_->true_value());
+}
+
+bool CodegenAnyValReadWriteInfo::is_data_initialized() const {
+  return std::get_if<std::monostate>(&data_) == nullptr;
+}
+
+bool CodegenAnyValReadWriteInfo::holds_simple_val() const {
+  return std::get_if<llvm::Value*>(&data_) != nullptr;
+}
+
+bool CodegenAnyValReadWriteInfo::holds_ptr_and_len() const {
+  return std::get_if<PtrLenStruct>(&data_) != nullptr;
+}
+
+bool CodegenAnyValReadWriteInfo::holds_timestamp() const {
+  return std::get_if<TimestampStruct>(&data_) != nullptr;
+}
+
+} // namespace impala
--- a/be/src/codegen/codegen-anyval-read-write-info.h
+++ b/be/src/codegen/codegen-anyval-read-write-info.h
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+#include <variant>
+
+#include "common/logging.h"
+
+namespace llvm {
+class ConstantStruct;
+class BasicBlock;
+class PHINode;
+class Value;
+}
+
+namespace impala {
+
+struct ColumnType;
+class LlvmBuilder;
+class LlvmCodeGen;
+
+/// This class wraps an 'llvm::BasicBlock*' and provides a const interface to it extended
+/// with the possibility of branching to it (either conditionally or not).
+/// This is useful for the entry blocks of 'CodegenAnyValReadWriteInfo' objects as we do
+/// not want these blocks to be writable but we want to be able to branch to them.
+///
+/// We cannot use a simple const pointer because the branching functions
+/// 'LlvmBuilder::Create[Cond]Br()' take a non-const pointer.
+class NonWritableBasicBlock {
+ public:
+  explicit NonWritableBasicBlock(llvm::BasicBlock* basic_block)
+    : basic_block_(basic_block)
+  {}
+
+  const llvm::BasicBlock* get() { return basic_block_; }
+
+  void BranchTo(LlvmBuilder* builder) const;
+
+  /// Branch to this basic block if 'condition' is true, otherwise branch to 'else_block'.
+  void BranchToIf(LlvmBuilder* builder, llvm::Value* condition,
+      const NonWritableBasicBlock& else_block) const;
+
+  /// Branch to this basic block if 'condition' if false, otherwise branch to
+  /// 'then_block'.
+  void BranchToIfNot(LlvmBuilder* builder, llvm::Value* condition,
+      const NonWritableBasicBlock& then_block) const;
+ private:
+  llvm::BasicBlock* basic_block_;
+};
+
+
+/// This class is used in conversions to and from 'CodegenAnyVal', i.e. 'AnyVal' objects
+/// in codegen code. This class is an interface between sources and destinations: sources
+/// generate an instance of this class and destinations take that instance and use it to
+/// write the value.
+///
+/// The other side can for example be tuples from which we read (in the
+/// case of 'SlotRef'), tuples we write into (in case of materialisation, see
+/// Tuple::CodegenMaterializeExprs()) but other cases exist, too. The main advantage is
+/// that sources do not have to know how to write their destinations, only how to read the
+/// values (and vice versa). This also makes it possible, should there be need for it, to
+/// leave out 'CodegenAnyVal' and convert directly between a source and a destination that
+/// know how to read and write 'CodegenAnyValReadWriteInfo's.
+///
+/// An instance of 'CodegenAnyValReadWriteInfo' represents a value but also contains
+/// information about how it is read and written in LLVM IR.
+///
+/// A source (for example 'SlotRef') should generate IR that starts in 'entry_block' (so
+/// that other IR code can branch to it), perform NULL checking and branch to 'null_block'
+/// and 'non_null_block' accordingly. The source is responsible for creating these blocks.
+/// It is allowed to create more blocks, but these blocks should not be missing.
+///
+/// A destination should be able to rely on this structure, i.e. it should be able to
+/// branch to 'entry_block' and to generate code in 'null_block' and 'non_null_block' to
+/// write the value. It is also allowed to generate additional blocks but it should not
+/// write into 'entry_block' or assume that the source only used the above mentioned
+/// blocks.
+///
+/// Structs are represented recursively. The fields 'codegen', 'builder' and 'type' should
+/// be filled by the source so that the destination can use them to generate IR code.
+/// Other fields, such as 'fn_ctx_idx' and 'eval' may be needed in some cases but not in
+/// others.
+class CodegenAnyValReadWriteInfo {
+ public:
+  // Used for String and collection types.
+  struct PtrLenStruct {
+    llvm::Value* ptr = nullptr;
+    llvm::Value* len = nullptr;
+  };
+
+  // Used for Timestamp.
+  struct TimestampStruct {
+    llvm::Value* time_of_day = nullptr;
+    llvm::Value* date = nullptr;
+  };
+
+  CodegenAnyValReadWriteInfo(LlvmCodeGen* codegen, LlvmBuilder* builder,
+      const ColumnType& type)
+    : codegen_(codegen),
+      builder_(builder),
+      type_(type)
+  {
+    DCHECK(codegen != nullptr);
+    DCHECK(builder != nullptr);
+  }
+
+  LlvmCodeGen* codegen() const { return codegen_; }
+  LlvmBuilder* builder() const { return builder_; }
+  const ColumnType& type() const {return type_; }
+
+  llvm::Value* GetSimpleVal() const;
+  const PtrLenStruct& GetPtrAndLen() const;
+  const TimestampStruct& GetTimeAndDate() const;
+
+  llvm::Value* GetEval() const { return eval_; }
+  int GetFnCtxIdx() const { return fn_ctx_idx_; }
+
+  NonWritableBasicBlock entry_block() const {
+    return NonWritableBasicBlock(entry_block_);
+  }
+
+  llvm::BasicBlock* null_block() const { return null_block_; }
+  llvm::BasicBlock* non_null_block() const { return non_null_block_; }
+
+  // Only one setter should only be called in the lifetime of this object as changing the
+  // type is not supported. The same setter can be called multiple times.
+  void SetSimpleVal(llvm::Value* val);
+  void SetPtrAndLen(llvm::Value* ptr, llvm::Value* len);
+  void SetTimeAndDate(llvm::Value* time_of_day, llvm::Value* date);
+
+  void SetEval(llvm::Value* eval);
+  void SetFnCtxIdx(int fn_ctx_idx);
+
+  void SetBlocks(llvm::BasicBlock* entry_block, llvm::BasicBlock* null_block,
+      llvm::BasicBlock* non_null_block);
+
+  const std::vector<CodegenAnyValReadWriteInfo>& children() const { return children_; }
+  std::vector<CodegenAnyValReadWriteInfo>& children() { return children_; }
+
+  llvm::ConstantStruct* GetIrType() const;
+
+  llvm::Value* CodegenGetFnCtx() const;
+
+  // See CodegenAnyVal::ConvertToCanonicalForm.
+  void CodegenConvertToCanonicalForm();
+
+  // Creates a PHI node that will have the value 'non_null_value' if the incoming block is
+  // 'non_null_block_' and the value 'null_value' if it is 'null_block_'.
+  llvm::PHINode* CodegenNullPhiNode(llvm::Value* non_null_value, llvm::Value* null_value,
+      std::string name = "");
+
+  // Creates a PHI node the value of which tells whether it was reached from the non-null
+  // or the null path, i.e. whether this CodegenAnyValReadWriteInfo is null.
+  llvm::PHINode* CodegenIsNullPhiNode(std::string name = "");
+
+ private:
+  LlvmCodeGen* const codegen_;
+  LlvmBuilder* const builder_;
+  const ColumnType& type_;
+
+  // The stored data is one of the variants below.
+  std::variant<
+      std::monostate, // Initial state - no value has been set
+      llvm::Value*,   // Simple native types
+      PtrLenStruct,   // String and collection types
+      TimestampStruct // Timestamp
+      > data_;
+
+  // Pointer to the ScalarExprEvaluator in LLVM code.
+  llvm::Value* eval_ = nullptr;
+
+  // Index of the FunctionContext belonging to this value, in the ScalarExprEvaluator.
+  int fn_ctx_idx_ = -1;
+
+  // The block where codegen'd code for this object begins.
+  llvm::BasicBlock* entry_block_ = nullptr;
+
+  // The block we branch to if the read value is null.
+  llvm::BasicBlock* null_block_ = nullptr;
+
+  // The block we branch to if the read value is not null.
+  llvm::BasicBlock* non_null_block_ = nullptr;
+
+  // Vector of 'CodegenAnyValReadWriteInfo's for children in case this one refers to a
+  // struct.
+  std::vector<CodegenAnyValReadWriteInfo> children_;
+
+  bool is_data_initialized() const;
+  bool holds_simple_val() const;
+  bool holds_ptr_and_len() const;
+  bool holds_timestamp() const;
+};
+
+} // namespace impala
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -18,6 +18,8 @@
 #include "codegen/codegen-anyval.h"

 #include "codegen/codegen-util.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "runtime/multi-precision.h"
 #include "runtime/raw-value.h"
 #include "common/names.h"
@@ -25,6 +27,7 @@
 using namespace impala;
 using namespace impala_udf;

+const char* CodegenAnyVal::LLVM_ANYVAL_NAME       = "struct.impala_udf::AnyVal";
 const char* CodegenAnyVal::LLVM_BOOLEANVAL_NAME   = "struct.impala_udf::BooleanVal";
 const char* CodegenAnyVal::LLVM_TINYINTVAL_NAME   = "struct.impala_udf::TinyIntVal";
 const char* CodegenAnyVal::LLVM_SMALLINTVAL_NAME  = "struct.impala_udf::SmallIntVal";
@@ -80,6 +83,7 @@ llvm::Type* CodegenAnyVal::GetLoweredType(LlvmCodeGen* cg, const ColumnType& typ
    case TYPE_FIXED_UDA_INTERMEDIATE: // { i64, i8* }
    case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal.
    case TYPE_MAP: // CollectionVal has same memory layout as StringVal.
+    case TYPE_STRUCT: // StructVal has same memory layout as StringVal.
 #ifndef __aarch64__
      return llvm::StructType::get(cg->i64_type(), cg->ptr_type());
 #else
@@ -163,6 +167,10 @@ llvm::PointerType* CodegenAnyVal::GetUnloweredPtrType(
  return GetUnloweredType(cg, type)->getPointerTo();
 }

+llvm::PointerType* CodegenAnyVal::GetAnyValPtrType(LlvmCodeGen* cg) {
+  return cg->GetNamedType(LLVM_ANYVAL_NAME)->getPointerTo();
+}
+
 llvm::Value* CodegenAnyVal::CreateCall(LlvmCodeGen* cg, LlvmBuilder* builder,
    llvm::Function* fn, llvm::ArrayRef<llvm::Value*> args, const char* name,
    llvm::Value* result_ptr) {
@@ -248,7 +256,8 @@ llvm::Value* CodegenAnyVal::GetIsNull(const char* name) const {
    case TYPE_FIXED_UDA_INTERMEDIATE:
    case TYPE_TIMESTAMP:
    case TYPE_ARRAY:
-    case TYPE_MAP: {
+    case TYPE_MAP:
+    case TYPE_STRUCT: {
      // Lowered type is of form { i64, *}. Get the first byte of the i64 value.
      llvm::Value* v = builder_->CreateExtractValue(value_, 0);
      DCHECK(v->getType() == codegen_->i64_type());
@@ -300,7 +309,8 @@ void CodegenAnyVal::SetIsNull(llvm::Value* is_null) {
    case TYPE_FIXED_UDA_INTERMEDIATE:
    case TYPE_TIMESTAMP:
    case TYPE_ARRAY:
-    case TYPE_MAP: {
+    case TYPE_MAP:
+    case TYPE_STRUCT: {
      // Lowered type is of the form { i64, * }. Set the first byte of the i64 value to
      // 'is_null'
      llvm::Value* v = builder_->CreateExtractValue(value_, 0);
@@ -340,6 +350,10 @@ llvm::Value* CodegenAnyVal::GetVal(const char* name) {
      << "Use GetPtr and GetLen for FixedUdaIntermediate";
  DCHECK(type_.type != TYPE_TIMESTAMP)
      << "Use GetDate and GetTimeOfDay for TimestampVals";
+  DCHECK(!type_.IsCollectionType())
+      << "Use GetPtr and GetLen for CollectionVal";
+  DCHECK(!type_.IsStructType())
+      << "Use GetPtr and GetLen for StructVal";
  switch(type_.type) {
    case TYPE_BOOLEAN:
    case TYPE_TINYINT:
@@ -399,6 +413,7 @@ void CodegenAnyVal::SetVal(llvm::Value* val) {
  DCHECK(type_.type != TYPE_TIMESTAMP)
      << "Use SetDate and SetTimeOfDay for TimestampVals";
  DCHECK(!type_.IsCollectionType()) << "Use SetPtr and SetLen for CollectionVal";
+  DCHECK(!type_.IsStructType()) << "Use SetPtr and SetLen for StructVal";
  switch(type_.type) {
    case TYPE_BOOLEAN:
    case TYPE_TINYINT:
@@ -490,7 +505,7 @@ void CodegenAnyVal::SetVal(double val) {

 llvm::Value* CodegenAnyVal::GetPtr() {
  // Set the second pointer value to 'ptr'.
-  DCHECK(type_.IsStringType() || type_.IsCollectionType());
+  DCHECK(type_.IsStringType() || type_.IsCollectionType() || type_.IsStructType());
  llvm::Value* val = builder_->CreateExtractValue(value_, 1, name_);
 #ifdef __aarch64__
  val = builder_->CreateIntToPtr(val, codegen_->ptr_type());
@@ -500,7 +515,7 @@ llvm::Value* CodegenAnyVal::GetPtr() {

 llvm::Value* CodegenAnyVal::GetLen() {
  // Get the high bytes of the first value.
-  DCHECK(type_.IsStringType() || type_.IsCollectionType());
+  DCHECK(type_.IsStringType() || type_.IsCollectionType() || type_.IsStructType());
  llvm::Value* v = builder_->CreateExtractValue(value_, 0);
  return GetHighBits(32, v);
 }
@@ -508,7 +523,7 @@ llvm::Value* CodegenAnyVal::GetLen() {
 void CodegenAnyVal::SetPtr(llvm::Value* ptr) {
  // Set the second pointer value to 'ptr'.
  DCHECK(type_.IsStringType() || type_.type == TYPE_FIXED_UDA_INTERMEDIATE
-      || type_.IsCollectionType());
+      || type_.IsCollectionType() || type_.IsStructType());
 #ifdef __aarch64__
  ptr = builder_->CreatePtrToInt(ptr, codegen_->i64_type());
 #endif
@@ -518,7 +533,7 @@ void CodegenAnyVal::SetPtr(llvm::Value* ptr) {
 void CodegenAnyVal::SetLen(llvm::Value* len) {
  // Set the high bytes of the first value to 'len'.
  DCHECK(type_.IsStringType() || type_.type == TYPE_FIXED_UDA_INTERMEDIATE
-      || type_.IsCollectionType());
+      || type_.IsCollectionType() || type_.IsStructType());
  llvm::Value* v = builder_->CreateExtractValue(value_, 0);
  v = SetHighBits(32, len, v);
  value_ = builder_->CreateInsertValue(value_, v, 0, name_);
@@ -551,38 +566,53 @@ void CodegenAnyVal::SetDate(llvm::Value* date) {
  value_ = builder_->CreateInsertValue(value_, v, 0, name_);
 }

-llvm::Value* CodegenAnyVal::ConvertToPositiveZero(llvm::Value* val) {
-  // Replaces negative zero with positive, leaves everything else unchanged.
-  llvm::Value* is_negative_zero = builder_->CreateFCmpOEQ(
-      val, llvm::ConstantFP::getNegativeZero(val->getType()), "cmp_zero");
-  return builder_->CreateSelect(is_negative_zero,
-                llvm::ConstantFP::get(val->getType(), 0.0), val);
-}
-
-void CodegenAnyVal::ConvertToCanonicalForm() {
+llvm::Value* CodegenAnyVal::ConvertToCanonicalForm(LlvmCodeGen* codegen,
+      LlvmBuilder* builder, const ColumnType& type, llvm::Value* val) {
  // Convert the value to a bit pattern that is unambiguous.
  // Specifically, for floating point type values, NaN values are converted to
  // the same bit pattern, and -0 is converted to +0.
+  switch(type.type) {
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE: {
+      llvm::Value* canonical_val;
+      if (type.type == TYPE_FLOAT) {
+        canonical_val = llvm::ConstantFP::getNaN(codegen->float_type());
+      } else {
+        canonical_val = llvm::ConstantFP::getNaN(codegen->double_type());
+      }
+      DCHECK(val != nullptr);
+      llvm::Value* is_nan = builder->CreateFCmpUNO(val, val, "cmp_nan");
+
+      return builder->CreateSelect(is_nan, canonical_val,
+          ConvertToPositiveZero(builder, val));
+    }
+    default:
+      return val;
+  }
+}
+
+void CodegenAnyVal::ConvertToCanonicalForm() {
  switch(type_.type) {
    case TYPE_FLOAT:
    case TYPE_DOUBLE: {
-      llvm::Value* raw = GetVal();
-      llvm::Value* canonical_val;
-      if (type_.type == TYPE_FLOAT) {
-        canonical_val = llvm::ConstantFP::getNaN(codegen_->float_type());
-      } else {
-        canonical_val = llvm::ConstantFP::getNaN(codegen_->double_type());
-      }
-      llvm::Value* is_nan = builder_->CreateFCmpUNO(raw, raw, "cmp_nan");
-
-      SetVal(builder_->CreateSelect(is_nan, canonical_val, ConvertToPositiveZero(raw)));
-      break;
+      llvm::Value* new_val = ConvertToCanonicalForm(codegen_,
+          builder_, type_, GetVal());
+      SetVal(new_val);
    }
    default:
      ;
  }
 }

+llvm::Value* CodegenAnyVal::ConvertToPositiveZero(LlvmBuilder* builder,
+    llvm::Value* val) {
+  // Replaces negative zero with positive, leaves everything else unchanged.
+  llvm::Value* is_negative_zero = builder->CreateFCmpOEQ(
+      val, llvm::ConstantFP::getNegativeZero(val->getType()), "cmp_zero");
+  return builder->CreateSelect(is_negative_zero,
+                llvm::ConstantFP::get(val->getType(), 0.0), val);
+}
+
 llvm::Value* CodegenAnyVal::GetLoweredPtr(const string& name) const {
  llvm::Value* lowered_ptr =
      codegen_->CreateEntryBlockAlloca(*builder_, value_->getType(), name.c_str());
@@ -600,192 +630,9 @@ llvm::Value* CodegenAnyVal::GetUnloweredPtr(const string& name) const {
      GetLoweredPtr(), GetUnloweredPtrType(codegen_, type_), name);
 }

-void CodegenAnyVal::LoadFromNativePtr(llvm::Value* raw_val_ptr) {
-  DCHECK(raw_val_ptr->getType()->isPointerTy());
-  llvm::Type* raw_val_type = raw_val_ptr->getType()->getPointerElementType();
-  DCHECK_EQ(raw_val_type, codegen_->GetSlotType(type_))
-      << endl
-      << LlvmCodeGen::Print(raw_val_ptr) << endl
-      << type_ << " => " << LlvmCodeGen::Print(
-          codegen_->GetSlotType(type_));
-  switch (type_.type) {
-    case TYPE_STRING:
-    case TYPE_VARCHAR: {
-      // Convert StringValue to StringVal
-      llvm::Value* string_value = builder_->CreateLoad(raw_val_ptr, "string_value");
-      SetPtr(builder_->CreateExtractValue(string_value, 0, "ptr"));
-      SetLen(builder_->CreateExtractValue(string_value, 1, "len"));
-      break;
-    }
-    case TYPE_CHAR:
-    case TYPE_FIXED_UDA_INTERMEDIATE: {
-      // Convert fixed-size slot to StringVal.
-      SetPtr(builder_->CreateBitCast(raw_val_ptr, codegen_->ptr_type()));
-      SetLen(codegen_->GetI32Constant(type_.len));
-      break;
-    }
-    case TYPE_TIMESTAMP: {
-      // Convert TimestampValue to TimestampVal
-      // TimestampValue has type
-      //   { boost::posix_time::time_duration, boost::gregorian::date }
-      // = { {{{i64}}}, {{i32}} }
-
-      llvm::Value* ts_value = builder_->CreateLoad(raw_val_ptr, "ts_value");
-      // Extract time_of_day i64 from boost::posix_time::time_duration.
-      uint32_t time_of_day_idxs[] = {0, 0, 0, 0};
-      llvm::Value* time_of_day =
-          builder_->CreateExtractValue(ts_value, time_of_day_idxs, "time_of_day");
-      DCHECK(time_of_day->getType()->isIntegerTy(64));
-      SetTimeOfDay(time_of_day);
-      // Extract i32 from boost::gregorian::date
-      uint32_t date_idxs[] = {1, 0, 0};
-      llvm::Value* date = builder_->CreateExtractValue(ts_value, date_idxs, "date");
-      DCHECK(date->getType()->isIntegerTy(32));
-      SetDate(date);
-      break;
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-    case TYPE_DECIMAL:
-    case TYPE_DATE:
-      SetVal(builder_->CreateLoad(raw_val_ptr, "raw_val"));
-      break;
-    default:
-      DCHECK(false) << "NYI: " << type_.DebugString();
-      break;
-  }
-}
-
-void CodegenAnyVal::StoreToNativePtr(llvm::Value* raw_val_ptr, llvm::Value* pool_val) {
-  llvm::Type* raw_type = codegen_->GetSlotType(type_);
-  switch (type_.type) {
-    case TYPE_STRING:
-    case TYPE_VARCHAR:
-    case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal.
-    case TYPE_MAP: { // CollectionVal has same memory layout as StringVal.
-      // Convert StringVal to StringValue
-      llvm::Value* string_value = llvm::Constant::getNullValue(raw_type);
-      llvm::Value* len = GetLen();
-      string_value = builder_->CreateInsertValue(string_value, len, 1);
-      if (pool_val == nullptr) {
-        // Set string_value.ptr from this->ptr
-        string_value = builder_->CreateInsertValue(string_value, GetPtr(), 0);
-      } else {
-        // Allocate string_value.ptr from 'pool_val' and copy this->ptr
-        llvm::Value* new_ptr =
-            codegen_->CodegenMemPoolAllocate(builder_, pool_val, len, "new_ptr");
-        codegen_->CodegenMemcpy(builder_, new_ptr, GetPtr(), len);
-        string_value = builder_->CreateInsertValue(string_value, new_ptr, 0);
-      }
-      builder_->CreateStore(string_value, raw_val_ptr);
-      break;
-    }
-    case TYPE_CHAR:
-      codegen_->CodegenMemcpy(builder_, raw_val_ptr, GetPtr(), type_.len);
-      break;
-    case TYPE_FIXED_UDA_INTERMEDIATE:
-      DCHECK(false) << "FIXED_UDA_INTERMEDIATE does not need to be copied: the "
-                    << "StringVal must be set up to point to the output slot";
-      break;
-    case TYPE_TIMESTAMP: {
-      // Convert TimestampVal to TimestampValue
-      // TimestampValue has type
-      //   { boost::posix_time::time_duration, boost::gregorian::date }
-      // = { {{{i64}}}, {{i32}} }
-      llvm::Value* timestamp_value = llvm::Constant::getNullValue(raw_type);
-      uint32_t time_of_day_idxs[] = {0, 0, 0, 0};
-      timestamp_value =
-          builder_->CreateInsertValue(timestamp_value, GetTimeOfDay(), time_of_day_idxs);
-      uint32_t date_idxs[] = {1, 0, 0};
-      timestamp_value =
-          builder_->CreateInsertValue(timestamp_value, GetDate(), date_idxs);
-      builder_->CreateStore(timestamp_value, raw_val_ptr);
-      break;
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-    case TYPE_DECIMAL:
-    case TYPE_DATE:
-      // The representations of the types match - just store the value.
-      builder_->CreateStore(GetVal(), raw_val_ptr);
-      break;
-    default:
-      DCHECK(false) << "NYI: " << type_.DebugString();
-      break;
-  }
-}
-
-llvm::Value* CodegenAnyVal::ToNativePtr(llvm::Value* pool_val) {
-  llvm::Value* native_ptr = codegen_->CreateEntryBlockAlloca(*builder_,
-      codegen_->GetSlotType(type_));
-  StoreToNativePtr(native_ptr, pool_val);
-  return native_ptr;
-}
-
-// Example output for materializing an int slot:
-//
-//   ; [insert point starts here]
-//   %is_null = trunc i64 %src to i1
-//   br i1 %is_null, label %null, label %non_null ;
-//
-// non_null:                                         ; preds = %entry
-//   %slot = getelementptr inbounds { i8, i32, %"struct.impala::StringValue" }* %tuple,
-//       i32 0, i32 1
-//   %2 = ashr i64 %src, 32
-//   %3 = trunc i64 %2 to i32
-//   store i32 %3, i32* %slot
-//   br label %end_write
-//
-// null:                                             ; preds = %entry
-//   call void @SetNull6({ i8, i32, %"struct.impala::StringValue" }* %tuple)
-//   br label %end_write
-//
-// end_write:                                        ; preds = %null, %non_null
-//   ; [insert point ends here]
-void CodegenAnyVal::WriteToSlot(const SlotDescriptor& slot_desc, llvm::Value* tuple_val,
-    llvm::Value* pool_val, llvm::BasicBlock* insert_before) {
-  DCHECK(tuple_val->getType()->isPointerTy());
-  DCHECK(tuple_val->getType()->getPointerElementType()->isStructTy());
-  llvm::LLVMContext& context = codegen_->context();
-  llvm::Function* fn = builder_->GetInsertBlock()->getParent();
-
-  // Create new block that will come after conditional blocks if necessary
-  if (insert_before == nullptr) {
-    insert_before = llvm::BasicBlock::Create(context, "end_write", fn);
-  }
-
-  // Create new basic blocks and br instruction
-  llvm::BasicBlock* non_null_block =
-      llvm::BasicBlock::Create(context, "non_null", fn, insert_before);
-  llvm::BasicBlock* null_block =
-      llvm::BasicBlock::Create(context, "null", fn, insert_before);
-  builder_->CreateCondBr(GetIsNull(), null_block, non_null_block);
-
-  // Non-null block: write slot
-  builder_->SetInsertPoint(non_null_block);
-  llvm::Value* slot =
-      builder_->CreateStructGEP(nullptr, tuple_val, slot_desc.llvm_field_idx(), "slot");
-  StoreToNativePtr(slot, pool_val);
-  builder_->CreateBr(insert_before);
-
-  // Null block: set null bit
-  builder_->SetInsertPoint(null_block);
-  slot_desc.CodegenSetNullIndicator(
-      codegen_, builder_, tuple_val, codegen_->true_value());
-  builder_->CreateBr(insert_before);
-
-  // Leave builder_ after conditional blocks
-  builder_->SetInsertPoint(insert_before);
+llvm::Value* CodegenAnyVal::GetAnyValPtr(const std::string& name) const {
+  return builder_->CreateBitCast(
+      GetLoweredPtr(), GetAnyValPtrType(codegen_), name);
 }

 llvm::Value* CodegenAnyVal::Eq(CodegenAnyVal* other) {
@@ -880,9 +727,9 @@ llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr,

 llvm::Value* CodegenAnyVal::Compare(CodegenAnyVal* other, const char* name) {
  DCHECK_EQ(type_, other->type_);
-  llvm::Value* v1 = ToNativePtr();
+  llvm::Value* v1 = SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(*this);
  llvm::Value* void_v1 = builder_->CreateBitCast(v1, codegen_->ptr_type());
-  llvm::Value* v2 = other->ToNativePtr();
+  llvm::Value* v2 = SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(*other);
  llvm::Value* void_v2 = builder_->CreateBitCast(v2, codegen_->ptr_type());
  // Create a global constant of the values' ColumnType. It needs to be a constant
  // for constant propagation and dead code elimination in 'compare_fn'.
@@ -1001,3 +848,337 @@ CodegenAnyVal CodegenAnyVal::GetNonNullVal(LlvmCodeGen* codegen, LlvmBuilder* bu
  llvm::Value* value = llvm::Constant::getNullValue(val_type);
  return CodegenAnyVal(codegen, builder, type, value, name);
 }
+
+// Returns the last block generated so we can set it as a predecessor in PHI nodes.
+llvm::BasicBlock* CodegenAnyVal::CreateStructValFromReadWriteInfo(
+    const CodegenAnyValReadWriteInfo& read_write_info,
+    llvm::Value** ptr, llvm::Value** len, llvm::BasicBlock* struct_produce_value_block) {
+  LlvmCodeGen* codegen = read_write_info.codegen();
+  LlvmBuilder* builder = read_write_info.builder();
+
+  DCHECK(read_write_info.type().IsStructType() && read_write_info.children().size() > 0);
+  DCHECK(read_write_info.GetEval() != nullptr);
+  DCHECK_GT(read_write_info.GetFnCtxIdx(), -1);
+
+  // Cross-compiled functions this hand-crafted function will call.
+  llvm::Function* const allocate_for_results_fn =
+         codegen->GetFunction(IRFunction::FN_CTX_ALLOCATE_FOR_RESULTS, false);
+  llvm::Function* const store_result_in_eval_fn =
+         codegen->GetFunction(IRFunction::STORE_RESULT_IN_EVALUATOR, false);
+
+  builder->SetInsertPoint(read_write_info.non_null_block());
+  std::size_t num_children = read_write_info.children().size();
+  DCHECK_GT(num_children, 0);
+  llvm::Value* fn_ctx = read_write_info.CodegenGetFnCtx();
+
+  // Allocate a buffer for the child pointers. If allocation fails, the struct will be
+  // null.
+  llvm::Value* children_ptrs_buffer = builder->CreateCall(allocate_for_results_fn,
+      {fn_ctx, codegen->GetI64Constant(num_children * sizeof(uint8_t*))},
+      "children_ptrs_buffer");
+  llvm::Value* cast_children_ptrs_buffer = builder->CreateBitCast(
+      children_ptrs_buffer, codegen->ptr_ptr_type(), "cast_children_ptrs_buffer");
+  llvm::Value* buffer_is_null = builder->CreateIsNull(
+      cast_children_ptrs_buffer, "buffer_is_null");
+
+  // Branch based on 'buffer_is_null'.
+  read_write_info.children()[0].entry_block().BranchToIfNot(builder, buffer_is_null,
+      NonWritableBasicBlock(read_write_info.null_block()));
+  for (std::size_t i = 0; i < num_children; ++i) {
+    const CodegenAnyValReadWriteInfo& child_codegen_value_read_write_info =
+        read_write_info.children()[i];
+    CodegenAnyVal child_any_val =
+        CreateFromReadWriteInfo(child_codegen_value_read_write_info);
+
+    llvm::ConstantStruct* child_type_ir =
+        child_codegen_value_read_write_info.GetIrType();
+    llvm::Value* child_type_ir_ptr = codegen->CreateEntryBlockAlloca(
+        *builder, child_type_ir->getType(), "child_type_ptr");
+    builder->CreateStore(child_type_ir, child_type_ir_ptr);
+
+    llvm::Value* child_any_val_ptr = child_any_val.GetAnyValPtr("child_ptr");
+
+    // Convert and store the child in the corresponding ScalarExprEvaluator - this takes
+    // care of the lifetime of the object.
+    llvm::Value* stored_child_ptr = builder->CreateCall(store_result_in_eval_fn,
+        {child_codegen_value_read_write_info.GetEval(), child_any_val_ptr,
+        child_type_ir_ptr},
+        "stored_value");
+
+    // The address where the child pointer should be written. This is in the pointer list
+    // of the StructVal.
+    llvm::Value* dst_child_ptr_addr = builder->CreateInBoundsGEP(
+        cast_children_ptrs_buffer, codegen->GetI32Constant(i), "child_ptr_addr");
+    builder->CreateStore(stored_child_ptr, dst_child_ptr_addr);
+
+    if (i < num_children - 1) {
+      // Do not add a new block after the last child.
+      read_write_info.children()[i+1].entry_block().BranchTo(builder);
+    }
+  }
+
+  llvm::BasicBlock* last_block = builder->GetInsertBlock();
+  builder->CreateBr(struct_produce_value_block);
+  builder->SetInsertPoint(struct_produce_value_block);
+  *ptr = builder->CreateBitCast(children_ptrs_buffer, codegen->ptr_type());
+  *len = codegen->GetI32Constant(num_children);
+  return last_block;
+}
+
+CodegenAnyVal CodegenAnyVal::CreateFromReadWriteInfo(
+    const CodegenAnyValReadWriteInfo& read_write_info) {
+  LlvmCodeGen* codegen = read_write_info.codegen();
+  LlvmBuilder* builder = read_write_info.builder();
+  const ColumnType& type = read_write_info.type();
+
+  llvm::LLVMContext& context = codegen->context();
+  llvm::Function* fn = read_write_info.non_null_block()->getParent();
+
+  llvm::BasicBlock* produce_value_block =
+      llvm::BasicBlock::Create(context, "produce_value", fn);
+
+  builder->SetInsertPoint(read_write_info.null_block());
+  builder->CreateBr(produce_value_block);
+
+  if (!type.IsStructType()) {
+    builder->SetInsertPoint(read_write_info.non_null_block());
+    builder->CreateBr(produce_value_block);
+  }
+
+  builder->SetInsertPoint(produce_value_block);
+  CodegenAnyVal result = CodegenAnyVal::GetNonNullVal(codegen, builder, type, "result");
+
+  // For structs the code that reads the value consists of multiple basic blocks, so the
+  // block that should branch to 'produce_value_block' is not
+  // 'read_write_info.non_null_block'. This variable will be set to the appropriate block.
+  llvm::BasicBlock* non_null_incoming_block = read_write_info.non_null_block();
+  if (type.IsStringType() || type.type == TYPE_FIXED_UDA_INTERMEDIATE
+      || type.IsCollectionType() || type.IsStructType()) {
+    llvm::Value* ptr = nullptr;
+    llvm::Value* len = nullptr;
+
+    if (type.IsStructType()) {
+      non_null_incoming_block = CreateStructValFromReadWriteInfo(
+          read_write_info, &ptr, &len, produce_value_block);
+    } else {
+      ptr = read_write_info.GetPtrAndLen().ptr;
+      len = read_write_info.GetPtrAndLen().len;
+    }
+
+    DCHECK(ptr != nullptr);
+    DCHECK(len != nullptr);
+
+    llvm::Value* ptr_null = llvm::Constant::getNullValue(ptr->getType());
+    llvm::PHINode* ptr_phi = LlvmCodeGen::CreateBinaryPhiNode(builder, ptr, ptr_null,
+        non_null_incoming_block, read_write_info.null_block());
+
+    llvm::Value* len_null = llvm::ConstantInt::get(len->getType(), 0);
+    llvm::PHINode* len_phi = LlvmCodeGen::CreateBinaryPhiNode(builder, len, len_null,
+        non_null_incoming_block, read_write_info.null_block());
+
+    result.SetPtr(ptr_phi);
+    result.SetLen(len_phi);
+  } else if (type.type == TYPE_TIMESTAMP) {
+    llvm::Value* time_of_day_null = llvm::ConstantInt::get(
+        read_write_info.GetTimeAndDate().time_of_day->getType(), 0);
+    llvm::PHINode* time_of_day_phi = LlvmCodeGen::CreateBinaryPhiNode(builder,
+        read_write_info.GetTimeAndDate().time_of_day, time_of_day_null,
+        non_null_incoming_block, read_write_info.null_block());
+
+    llvm::Value* date_null = llvm::ConstantInt::get(
+        read_write_info.GetTimeAndDate().date->getType(), 0);
+    llvm::PHINode* date_phi = LlvmCodeGen::CreateBinaryPhiNode(builder,
+        read_write_info.GetTimeAndDate().date, date_null, non_null_incoming_block,
+        read_write_info.null_block());
+
+    result.SetTimeOfDay(time_of_day_phi);
+    result.SetDate(date_phi);
+  } else {
+    llvm::Value* null = llvm::Constant::getNullValue(
+        read_write_info.GetSimpleVal()->getType());
+    llvm::PHINode* val_phi = LlvmCodeGen::CreateBinaryPhiNode(builder,
+        read_write_info.GetSimpleVal(), null, non_null_incoming_block,
+        read_write_info.null_block());
+
+    result.SetVal(val_phi);
+  }
+
+  llvm::Value* zero = codegen->GetI8Constant(0);
+  llvm::Value* one = codegen->GetI8Constant(1);
+
+  // PHI nodes must be inserted at the beginning of basic blocks.
+  llvm::IRBuilderBase::InsertPoint ip = builder->saveIP();
+  builder->SetInsertPoint(produce_value_block, produce_value_block->begin());
+  llvm::PHINode* is_null_phi = LlvmCodeGen::CreateBinaryPhiNode(builder, zero, one,
+      non_null_incoming_block, read_write_info.null_block(), "is_null_phi");
+  builder->restoreIP(ip);
+  result.SetIsNull(is_null_phi);
+  return result;
+}
+
+CodegenAnyValReadWriteInfo CodegenAnyVal::ToReadWriteInfo() {
+  llvm::IRBuilderBase::InsertPoint ip = builder_->saveIP();
+
+  llvm::LLVMContext& context = codegen_->context();
+  llvm::Function* fn = builder_->GetInsertBlock()->getParent();
+
+  CodegenAnyValReadWriteInfo res(codegen_, builder_, type_);
+
+  llvm::BasicBlock* entry_block = llvm::BasicBlock::Create(context, "entry", fn);
+  builder_->SetInsertPoint(entry_block);
+
+  // Create new basic blocks and branch instruction
+  llvm::BasicBlock* non_null_block = llvm::BasicBlock::Create(context, "non_null", fn);
+  llvm::BasicBlock* null_block = llvm::BasicBlock::Create(context, "null", fn);
+  llvm::Value* is_null = GetIsNull();
+  builder_->CreateCondBr(is_null, null_block, non_null_block);
+
+  builder_->SetInsertPoint(non_null_block);
+  if (type_.IsStructType()) {
+    StructToReadWriteInfo(&res, GetPtr());
+  } else if (type_.IsStringType() || type_.IsCollectionType()) {
+    res.SetPtrAndLen(GetPtr(), GetLen());
+  } else if (type_.type == TYPE_TIMESTAMP) {
+    res.SetTimeAndDate(GetTimeOfDay(), GetDate());
+  } else {
+    res.SetSimpleVal(GetVal());
+  }
+
+  res.SetBlocks(entry_block, null_block, non_null_block);
+
+  builder_->restoreIP(ip);
+  return res;
+}
+
+void CodegenAnyVal::StructChildToReadWriteInfo(
+    CodegenAnyValReadWriteInfo* read_write_info,
+    const ColumnType& type, llvm::Value* child_ptr) {
+  LlvmCodeGen* codegen = read_write_info->codegen();
+  LlvmBuilder* builder = read_write_info->builder();
+  llvm::Type* slot_type = codegen->GetSlotType(type);
+
+  // Children of struct-typed 'AnyVal's are stored in one of the members of an 'ExprValue'
+  // belonging to the appropriate 'ScalarExprEvaluator'. These have the same type and
+  // layout as the values stored in the slots, except for BOOLEANs, which are stored as i1
+  // in the slots but are stored as 'bool' variables in 'ExprValue' (as children of struct
+  // 'AnyVal's). As this code deals with values in 'AnyVal's, for BOOLEANS we use i8,
+  // which is the LLVM type corresponding to 'bool', and for other types we use the slot
+  // type.
+  llvm::Type* child_type = type.type == TYPE_BOOLEAN ?
+    codegen->i8_type() : slot_type;
+  llvm::Value* cast_child_ptr = builder->CreateBitCast(child_ptr,
+      child_type->getPointerTo(), "cast_child_ptr");
+
+  switch (type.type) {
+    case TYPE_STRING:
+    case TYPE_VARCHAR:
+    case TYPE_CHAR:
+    case TYPE_ARRAY: // CollectionVal has the same memory layout as StringVal.
+    case TYPE_MAP: { // CollectionVal has the same memory layout as StringVal.
+      llvm::Value* ptr_addr = builder->CreateStructGEP(
+          nullptr, cast_child_ptr, 0, "ptr_addr");
+      llvm::Value* ptr = builder->CreateLoad(ptr_addr, "ptr");
+
+      llvm::Value* len;
+      if (type.type == TYPE_CHAR) {
+        len = codegen->GetI32Constant(type.len);
+      } else {
+        llvm::Value* len_addr = builder->CreateStructGEP(
+            nullptr, cast_child_ptr, 1, "len_addr");
+        len = builder->CreateLoad(len_addr, "len");
+      }
+      read_write_info->SetPtrAndLen(ptr, len);
+      break;
+    }
+    case TYPE_FIXED_UDA_INTERMEDIATE:
+      DCHECK(false) << "FIXED_UDA_INTERMEDIATE does not need to be copied: the "
+                    << "StringVal must be set up to point to the output slot";
+      break;
+    case TYPE_TIMESTAMP: {
+      llvm::Value* time_of_day_addr = builder->CreateStructGEP(
+          nullptr, cast_child_ptr, 0, "time_of_day_addr");
+      llvm::Value* time_of_day_addr_lowered = builder->CreateBitCast(
+          time_of_day_addr, codegen->i64_ptr_type(), "time_of_day_addr");
+      llvm::Value* time_of_day = builder->CreateLoad(
+          time_of_day_addr_lowered, "time_of_day");
+
+      llvm::Value* date_addr = builder->CreateStructGEP(
+          nullptr, cast_child_ptr, 1, "date_addr");
+      llvm::Value* date_addr_lowered = builder->CreateBitCast(
+          date_addr, codegen->i32_ptr_type(), "date_addr_lowered");
+      llvm::Value* date = builder->CreateLoad(date_addr_lowered, "date");
+      read_write_info->SetTimeAndDate(time_of_day, date);
+      break;
+    }
+    case TYPE_BOOLEAN:
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_DECIMAL:
+    case TYPE_DATE: {
+      // The representations of the types match - just take the value.
+      llvm::Value* child = builder->CreateLoad(child_type, cast_child_ptr, "child");
+      read_write_info->SetSimpleVal(child);
+      break;
+    }
+    default:
+      DCHECK(false) << type.DebugString();
+      break;
+  }
+}
+
+void CodegenAnyVal::StructToReadWriteInfo(
+    CodegenAnyValReadWriteInfo* read_write_info,
+    llvm::Value* children_ptr) {
+  const ColumnType& type = read_write_info->type();
+  DCHECK(type.IsStructType());
+
+  LlvmCodeGen* codegen = read_write_info->codegen();
+  llvm::LLVMContext& context = codegen->context();
+  LlvmBuilder* builder = read_write_info->builder();
+  llvm::Function* fn = builder->GetInsertBlock()->getParent();
+
+  llvm::Value* cast_children_ptr = builder->CreateBitCast(
+      children_ptr, codegen->ptr_ptr_type(), "cast_children_ptr");
+
+  for (int i = 0; i < type.children.size(); ++i) {
+    const ColumnType& child_type = type.children[i];
+    CodegenAnyValReadWriteInfo child_read_write_info(codegen, builder, child_type);
+
+    llvm::BasicBlock* child_entry_block = llvm::BasicBlock::Create(context, "entry", fn);
+
+    builder->SetInsertPoint(child_entry_block);
+    llvm::Value* child_ptr_addr = builder->CreateInBoundsGEP(cast_children_ptr,
+        codegen->GetI32Constant(i), "child_ptr_addr");
+    llvm::Value* child_ptr = builder->CreateLoad(codegen->ptr_type(), child_ptr_addr,
+        "child_ptr");
+
+    // Check whether child_ptr is NULL.
+    llvm::Value* child_is_null = builder->CreateIsNull(child_ptr, "child_is_null");
+
+    llvm::BasicBlock* non_null_block =
+        llvm::BasicBlock::Create(context, "non_null", fn);
+    llvm::BasicBlock* null_block =
+        llvm::BasicBlock::Create(context, "null", fn);
+    builder->CreateCondBr(child_is_null, null_block, non_null_block);
+    builder->SetInsertPoint(non_null_block);
+
+    if (child_type.IsStructType()) {
+      llvm::Value* child_struct_ptr = builder->CreateBitCast(
+          child_ptr, GetLoweredPtrType(codegen, child_type), "child_struct_ptr");
+      llvm::Value* child_struct = builder->CreateLoad(child_struct_ptr, "child_struct");
+      CodegenAnyVal child_anyval = CodegenAnyVal(
+          codegen, builder, child_type, child_struct);
+      llvm::Value* child_children_ptr = child_anyval.GetPtr();
+      StructToReadWriteInfo(&child_read_write_info, child_children_ptr);
+    } else {
+      StructChildToReadWriteInfo(&child_read_write_info, child_type, child_ptr);
+    }
+
+    child_read_write_info.SetBlocks(child_entry_block, null_block, non_null_block);
+    read_write_info->children().emplace_back(std::move(child_read_write_info));
+  }
+}
--- a/be/src/codegen/codegen-anyval.h
+++ b/be/src/codegen/codegen-anyval.h
@@ -28,6 +28,8 @@ class Value;

 namespace impala {

+class CodegenAnyValReadWriteInfo;
+
 /// Class for handling AnyVal subclasses during codegen. Codegen functions should use this
 /// wrapper instead of creating or manipulating *Val values directly in most cases. This is
 /// because the struct types must be lowered to integer types in many cases in order to
@@ -50,6 +52,7 @@ namespace impala {
 /// TYPE_DOUBLE/DoubleVal: { i8, double }
 /// TYPE_STRING,TYPE_VARCHAR,TYPE_CHAR,TYPE_FIXED_UDA_INTERMEDIATE/StringVal: { i64, i8* }
 /// TYPE_ARRAY/TYPE_MAP/CollectionVal: { i64, i8* }
+/// TYPE_STRUCT/StructVal: { i64, i8* }
 /// TYPE_TIMESTAMP/TimestampVal: { i64, i64 }
 /// TYPE_DECIMAL/DecimalVal (isn't lowered):
 /// %"struct.impala_udf::DecimalVal" { {i8}, [15 x i8], {i128} }
@@ -59,6 +62,7 @@ namespace impala {
 /// - unit tests
 class CodegenAnyVal {
 public:
+  static const char* LLVM_ANYVAL_NAME;
  static const char* LLVM_BOOLEANVAL_NAME;
  static const char* LLVM_TINYINTVAL_NAME;
  static const char* LLVM_SMALLINTVAL_NAME;
@@ -108,6 +112,9 @@ class CodegenAnyVal {
  /// E.g.: TYPE_BOOLEAN => %"struct.impala_udf::BooleanVal"*
  static llvm::PointerType* GetUnloweredPtrType(LlvmCodeGen* cg, const ColumnType& type);

+  /// Returns the pointer type to the AnyVal base class (AnyVal*).
+  static llvm::PointerType* GetAnyValPtrType(LlvmCodeGen* cg);
+
  /// Return the constant type-lowered value corresponding to a null *Val.
  /// E.g.: passing TYPE_DOUBLE (corresponding to the lowered DoubleVal { i8, double })
  /// returns the constant struct { 1, 0.0 }
@@ -193,60 +200,25 @@ class CodegenAnyVal {
  /// unlowered type. This *Val should be non-null. The output variable is called 'name'.
  llvm::Value* GetUnloweredPtr(const std::string& name = "") const;

-  /// Load this *Val's value from 'raw_val_ptr', which must be a pointer to the matching
-  /// native type, e.g. a StringValue or TimestampValue slot in a tuple.
-  void LoadFromNativePtr(llvm::Value* raw_val_ptr);
+  /// Stores this value in an alloca allocation, and returns the pointer, which has the
+  /// type 'AnyVal*'. This *Val should be non-null. The output variable is called 'name'.
+  llvm::Value* GetAnyValPtr(const std::string& name = "") const;

-  /// Stores this *Val's value into a native slot, e.g. a StringValue or TimestampValue.
-  /// This should only be used if this *Val is not null.
+  /// Rewrites the bit values of a value in a canonical form. Floating point values may be
+  /// "NaN". Nominally, NaN != NaN, but for grouping purposes we want that to not be the
+  /// case. Therefore all NaN values need to be converted into a consistent form where all
+  /// bits are the same. This method will do that - ensure that all NaN values have the
+  /// same bit pattern. Similarly, -0 == +0 is handled here.
  ///
-  /// Not valid to call for FIXED_UDA_INTERMEDIATE: in that case the StringVal must be
-  /// set up to point directly to the underlying slot, e.g. by LoadFromNativePtr().
-  ///
-  /// If 'pool_val' is non-NULL, var-len data will be copied into 'pool_val'.
-  /// 'pool_val' has to be of type MemPool*.
-  void StoreToNativePtr(llvm::Value* raw_val_ptr, llvm::Value* pool_val = nullptr);
-
-  /// Creates a pointer, e.g. StringValue* to an alloca() allocation with the
-  /// equivalent of this value. This should only be used if this Val is not null.
-  ///
-  /// If 'pool_val' is non-NULL, var-len data will be copied into 'pool_val'.
-  /// 'pool_val' has to be of type MemPool*.
-  llvm::Value* ToNativePtr(llvm::Value* pool_val = nullptr);
-
-  /// Writes this *Val's value to the appropriate slot in 'tuple' if non-null, or sets the
-  /// appropriate null bit if null. This assumes null bits are initialized to 0. Analogous
-  /// to RawValue::Write(void* value, Tuple*, SlotDescriptor*, MemPool*). 'tuple' should
-  /// be a pointer to the generated LLVM struct type, not an opaque Tuple*.
-  ///
-  /// Creates new basic blocks in order to branch on the 'is_null' fields, and leaves
-  /// builder_'s insert point at the block after these new blocks. This block will be
-  /// 'insert_before' if specified, or a new basic block created at the end of the
-  /// function if 'insert_before' is NULL.
-  ///
-  /// If 'pool_val' is non-NULL, var-len data will be copied into 'pool_val'.
-  /// 'pool_val' has to be of type MemPool*.
-  void WriteToSlot(const SlotDescriptor& slot_desc, llvm::Value* tuple,
-      llvm::Value* pool_val, llvm::BasicBlock* insert_before = nullptr);
-
-  /// Rewrites the bit values of a value in a canonical form.
-  /// Floating point values may be "NaN".  Nominally, NaN != NaN, but
-  /// for grouping purposes we want that to not be the case.
-  /// Therefore all NaN values need to be converted into a consistent
-  /// form where all bits are the same.  This method will do that -
-  /// ensure that all NaN values have the same bit pattern.
-  /// Similarly, -0 == +0 is handled here.
-  ///
-  /// Generically speaking, a canonical form of a value ensures that
-  /// all ambiguity is removed from a value's bit settings -- if there
-  /// are bits that can be freely changed without changing the logical
-  /// value of the value. (Currently this only has an impact for NaN
-  /// float and double values.)
+  /// Generically speaking, a canonical form of a value ensures that all ambiguity is
+  /// removed from a value's bit settings -- if there are bits that can be freely changed
+  /// without changing the logical value of the value. (Currently this only has an impact
+  /// for NaN float and double values.)
  void ConvertToCanonicalForm();

-  /// Replaces negative floating point zero with positive zero,
-  /// leaves everything else unchanged.
-  llvm::Value* ConvertToPositiveZero(llvm::Value* val);
+  /// Same as the above but works on a raw llvm::Value*.
+  static llvm::Value* ConvertToCanonicalForm(LlvmCodeGen* codegen, LlvmBuilder* builder,
+      const ColumnType& type, llvm::Value* val);

  /// Returns the i1 result of this == other. this and other must be non-null.
  llvm::Value* Eq(CodegenAnyVal* other);
@@ -285,6 +257,18 @@ class CodegenAnyVal {
      codegen_(nullptr), builder_(nullptr) {}

  LlvmCodeGen* codegen() const { return codegen_; }
+  LlvmBuilder* builder() const { return builder_; }
+  const ColumnType& type() { return type_; }
+
+  static CodegenAnyVal CreateFromReadWriteInfo(
+      const CodegenAnyValReadWriteInfo& read_write_info);
+
+  // Generate a 'CodegenAnyValReadWriteInfo' so that a destination can use it to write the
+  // value.
+  //
+  // After the function returns, the instruction point of the LlvmBuilder will be reset to
+  // where it was before the call.
+  CodegenAnyValReadWriteInfo ToReadWriteInfo();

 private:
  ColumnType type_;
@@ -303,6 +287,20 @@ class CodegenAnyVal {
  /// Both 'dst' and 'src' should be integer types.
  llvm::Value* SetHighBits(int num_bits, llvm::Value* src, llvm::Value* dst,
                           const char* name = "");
+
+  /// Replaces negative floating point zero with positive zero, leaves everything else
+  /// unchanged.
+  static llvm::Value* ConvertToPositiveZero(LlvmBuilder* builder, llvm::Value* val);
+
+  // Returns the last block generated so we can set it as a predecessor in PHI nodes.
+  static llvm::BasicBlock* CreateStructValFromReadWriteInfo(
+      const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value** ptr,
+      llvm::Value** len, llvm::BasicBlock* struct_produce_value_block);
+
+  static void StructToReadWriteInfo(CodegenAnyValReadWriteInfo* read_write_info,
+      llvm::Value* children_ptr);
+  static void StructChildToReadWriteInfo(CodegenAnyValReadWriteInfo* read_write_info,
+      const ColumnType& type, llvm::Value* child_ptr);
 };

 }
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -231,7 +231,12 @@ ir_functions = [
  ["KRPC_DSS_HASH_AND_ADD_ROWS",
  "_ZN6impala20KrpcDataStreamSender14HashAndAddRowsEPNS_8RowBatchE"],
  ["GET_FUNCTION_CTX",
-  "_ZN6impala11HiveUdfCall18GetFunctionContextEPNS_19ScalarExprEvaluatorEi"],
+  "_ZN6impala19ScalarExprEvaluator18GetFunctionContextEPS0_i"],
+  ["GET_CHILD_EVALUATOR", "_ZN6impala19ScalarExprEvaluator17GetChildEvaluatorEPS0_i"],
+  ["STORE_RESULT_IN_EVALUATOR",
+  "_ZN6impala19ScalarExprEvaluator11StoreResultERKN10impala_udf6AnyValERKNS_10ColumnTypeE"],
+  ["FN_CTX_ALLOCATE_FOR_RESULTS",
+  "_Z23FnCtxAllocateForResultsPN10impala_udf15FunctionContextEl"],
  ["GET_JNI_CONTEXT",
  "_ZN6impala11HiveUdfCall13GetJniContextEPN10impala_udf15FunctionContextE"],
  ["JNI_CTX_SET_INPUT_NULL_BUFF_ELEM",
--- a/be/src/codegen/impala-ir.cc
+++ b/be/src/codegen/impala-ir.cc
@@ -58,6 +58,7 @@
 #include "exprs/math-functions-ir.cc"
 #include "exprs/operators-ir.cc"
 #include "exprs/scalar-expr-ir.cc"
+#include "exprs/scalar-expr-evaluator-ir.cc"
 #include "exprs/string-functions-ir.cc"
 #include "exprs/timestamp-functions-ir.cc"
 #include "exprs/tuple-is-null-predicate-ir.cc"
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -684,6 +684,19 @@ void LlvmCodeGen::CreateIfElseBlocks(llvm::Function* fn, const string& if_name,
  *else_block = llvm::BasicBlock::Create(context(), else_name, fn, insert_before);
 }

+llvm::PHINode* LlvmCodeGen::CreateBinaryPhiNode(LlvmBuilder* builder, llvm::Value* value1,
+    llvm::Value* value2, llvm::BasicBlock* incoming_block1,
+    llvm::BasicBlock* incoming_block2, std::string name) {
+  std::string node_name = name == "" ? (value1->getName().str() + "_phi") : name;
+
+  llvm::PHINode* res = builder->CreatePHI(value1->getType(), 2, node_name);
+
+  res->addIncoming(value1, incoming_block1);
+  res->addIncoming(value2, incoming_block2);
+
+  return res;
+}
+
 Status LlvmCodeGen::MaterializeFunction(llvm::Function* fn) {
  DCHECK(!is_compiled_);
  if (fn->isIntrinsic() || !fn->isMaterializable()) return Status::OK();
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -527,6 +527,13 @@ class LlvmCodeGen {
      llvm::BasicBlock** if_block, llvm::BasicBlock** else_block,
      llvm::BasicBlock* insert_before = NULL);

+  // Creates a PHI node with two incoming blocks that will have the value 'value1' for the
+  // incoming block 'incoming_block1' and the value 'value2' for incoming block
+  // 'incoming_block2'.
+  static llvm::PHINode* CreateBinaryPhiNode(LlvmBuilder* builder, llvm::Value* value1,
+      llvm::Value* value2, llvm::BasicBlock* incoming_block1,
+      llvm::BasicBlock* incoming_block2, std::string name = "");
+
  /// Returns a constant int of 'byte_size' bytes based on 'low_bits' and 'high_bits'
  /// which stand for the lower and upper 64-bits of the constant respectively. For
  /// values less than or equal to 64-bits, 'high_bits' is not used. This function
--- a/be/src/exec/aggregator.cc
+++ b/be/src/exec/aggregator.cc
@@ -431,7 +431,7 @@ Status AggregatorConfig::CodegenUpdateSlot(LlvmCodeGen* codegen, int agg_fn_idx,
        dst.SetIsNull(slot_desc->CodegenIsNull(codegen, &builder, agg_tuple_arg));
      }
    }
-    dst.LoadFromNativePtr(dst_slot_ptr);
+    SlotDescriptor::CodegenLoadAnyVal(&dst, dst_slot_ptr);

    // Get the FunctionContext object for the AggFnEvaluator.
    llvm::Function* get_agg_fn_ctx_fn =
@@ -448,7 +448,7 @@ Status AggregatorConfig::CodegenUpdateSlot(LlvmCodeGen* codegen, int agg_fn_idx,
    // Copy the value back to the slot. In the FIXED_UDA_INTERMEDIATE case, the
    // UDA function writes directly to the slot so there is nothing to copy.
    if (dst_type.type != TYPE_FIXED_UDA_INTERMEDIATE) {
-      updated_dst_val.StoreToNativePtr(dst_slot_ptr);
+      SlotDescriptor::CodegenStoreNonNullAnyVal(updated_dst_val, dst_slot_ptr);
    }

    if (slot_desc->is_nullable() && !special_null_handling) {
--- a/be/src/exec/filter-context.cc
+++ b/be/src/exec/filter-context.cc
@@ -132,44 +132,48 @@ void FilterContext::MaterializeValues() const {
  }
 }

-// An example of the generated code for TPCH-Q2: RF002 -> n_regionkey
+// An example of the generated code for the following query:
 //
-// @expr_type_arg = constant %"struct.impala::ColumnType" { i32 4, i32 -1, i32 -1,
-//     i32 -1, %"class.std::vector.422" zeroinitializer,
-//     %"class.std::vector.101" zeroinitializer }
+// select a.outer_struct, b.small_struct
+// from functional_orc_def.complextypes_nested_structs a
+//     inner join functional_orc_def.complextypes_structs b
+//     on b.small_struct.i = a.outer_struct.inner_struct2.i + 19091;
 //
-// ; Function Attrs: alwaysinline
 // define i1 @FilterContextEval(%"struct.impala::FilterContext"* %this,
-//                              %"class.impala::TupleRow"* %row) #41 {
+//     %"class.impala::TupleRow"* %row) #50 {
 // entry:
-//   %0 = alloca i16
+//   %0 = alloca i64
 //   %expr_eval_ptr = getelementptr inbounds %"struct.impala::FilterContext",
 //       %"struct.impala::FilterContext"* %this, i32 0, i32 0
 //   %expr_eval_arg = load %"class.impala::ScalarExprEvaluator"*,
 //       %"class.impala::ScalarExprEvaluator"** %expr_eval_ptr
-//   %result = call i32 @GetSlotRef(%"class.impala::ScalarExprEvaluator"* %expr_eval_arg,
+//   %result = call { i8, i64 } @"impala::Operators::Add_BigIntVal_BigIntValWrapper"(
+//       %"class.impala::ScalarExprEvaluator"* %expr_eval_arg,
 //       %"class.impala::TupleRow"* %row)
-//   %is_null1 = trunc i32 %result to i1
-//   br i1 %is_null1, label %is_null, label %not_null
+//   br label %entry1
 //
-// not_null:                                         ; preds = %entry
-//   %1 = ashr i32 %result, 16
-//   %2 = trunc i32 %1 to i16
-//   store i16 %2, i16* %0
-//   %native_ptr = bitcast i16* %0 to i8*
+// entry1:                                           ; preds = %entry
+//   %1 = extractvalue { i8, i64 } %result, 0
+//   %is_null = trunc i8 %1 to i1
+//   br i1 %is_null, label %null, label %non_null
+//
+// non_null:                                         ; preds = %entry1
+//   %val = extractvalue { i8, i64 } %result, 1
+//   store i64 %val, i64* %0
+//   %native_ptr = bitcast i64* %0 to i8*
 //   br label %eval_filter
 //
-// is_null:                                          ; preds = %entry
+// null:                                             ; preds = %entry1
 //   br label %eval_filter
 //
-// eval_filter:                                      ; preds = %not_null, %is_null
-//   %val_ptr_phi = phi i8* [ %native_ptr, %not_null ], [ null, %is_null ]
+// eval_filter:                                      ; preds = %non_null, %null
+//   %native_ptr_phi = phi i8* [ %native_ptr, %non_null ], [ null, %null ]
 //   %filter_ptr = getelementptr inbounds %"struct.impala::FilterContext",
 //       %"struct.impala::FilterContext"* %this, i32 0, i32 1
 //   %filter_arg = load %"class.impala::RuntimeFilter"*,
 //       %"class.impala::RuntimeFilter"** %filter_ptr
 //   %passed_filter = call i1 @_ZNK6impala13RuntimeFilter4EvalEPvRKNS_10ColumnTypeE(
-//       %"class.impala::RuntimeFilter"* %filter_arg, i8* %val_ptr_phi,
+//       %"class.impala::RuntimeFilter"* %filter_arg, i8* %native_ptr_phi,
 //       %"struct.impala::ColumnType"* @expr_type_arg)
 //   ret i1 %passed_filter
 // }
@@ -191,13 +195,6 @@ Status FilterContext::CodegenEval(
  llvm::Value* this_arg = args[0];
  llvm::Value* row_arg = args[1];

-  llvm::BasicBlock* not_null_block =
-      llvm::BasicBlock::Create(context, "not_null", eval_filter_fn);
-  llvm::BasicBlock* is_null_block =
-      llvm::BasicBlock::Create(context, "is_null", eval_filter_fn);
-  llvm::BasicBlock* eval_filter_block =
-      llvm::BasicBlock::Create(context, "eval_filter", eval_filter_fn);
-
  llvm::Function* compute_fn;
  RETURN_IF_ERROR(filter_expr->GetCodegendComputeFn(codegen, false, &compute_fn));
  DCHECK(compute_fn != nullptr);
@@ -217,26 +214,27 @@ Status FilterContext::CodegenEval(
  CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder,
      filter_expr->type(), compute_fn, compute_fn_args, "result");

-  // Check if the result is NULL
-  llvm::Value* is_null = result.GetIsNull();
-  builder.CreateCondBr(is_null, is_null_block, not_null_block);
+  CodegenAnyValReadWriteInfo rwi = result.ToReadWriteInfo();
+  rwi.entry_block().BranchTo(&builder);
+
+  llvm::BasicBlock* eval_filter_block =
+      llvm::BasicBlock::Create(context, "eval_filter", eval_filter_fn);

  // Set the pointer to NULL in case it evaluates to NULL.
-  builder.SetInsertPoint(is_null_block);
+  builder.SetInsertPoint(rwi.null_block());
  llvm::Value* null_ptr = codegen->null_ptr_value();
  builder.CreateBr(eval_filter_block);

  // Saves 'result' on the stack and passes a pointer to it to 'runtime_filter_fn'.
-  builder.SetInsertPoint(not_null_block);
-  llvm::Value* native_ptr = result.ToNativePtr();
+  builder.SetInsertPoint(rwi.non_null_block());
+  llvm::Value* native_ptr = SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(rwi);
  native_ptr = builder.CreatePointerCast(native_ptr, codegen->ptr_type(), "native_ptr");
  builder.CreateBr(eval_filter_block);

  // Get the arguments in place to call 'runtime_filter_fn' to see if the row passes.
  builder.SetInsertPoint(eval_filter_block);
-  llvm::PHINode* val_ptr_phi = builder.CreatePHI(codegen->ptr_type(), 2, "val_ptr_phi");
-  val_ptr_phi->addIncoming(native_ptr, not_null_block);
-  val_ptr_phi->addIncoming(null_ptr, is_null_block);
+  llvm::PHINode* val_ptr_phi = rwi.CodegenNullPhiNode(native_ptr, null_ptr,
+      "val_ptr_phi");

  // Create a global constant of the filter expression's ColumnType. It needs to be a
  // constant for constant propagation and dead code elimination in 'runtime_filter_fn'.
@@ -504,13 +502,6 @@ Status FilterContext::CodegenInsert(LlvmCodeGen* codegen, ScalarExpr* filter_exp
  }
  builder.SetInsertPoint(check_val_block);

-  // Check null on the input value 'val' to be computed first
-  llvm::BasicBlock* val_not_null_block =
-      llvm::BasicBlock::Create(context, "val_not_null", insert_filter_fn);
-  llvm::BasicBlock* val_is_null_block =
-      llvm::BasicBlock::Create(context, "val_is_null", insert_filter_fn);
-  llvm::BasicBlock* insert_filter_block =
-      llvm::BasicBlock::Create(context, "insert_filter", insert_filter_fn);

  llvm::Function* compute_fn;
  RETURN_IF_ERROR(filter_expr->GetCodegendComputeFn(codegen, false, &compute_fn));
@@ -526,26 +517,27 @@ Status FilterContext::CodegenInsert(LlvmCodeGen* codegen, ScalarExpr* filter_exp
  CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(
      codegen, &builder, filter_expr->type(), compute_fn, compute_fn_args, "result");

-  // Check if the result is NULL
-  llvm::Value* val_is_null = result.GetIsNull();
-  builder.CreateCondBr(val_is_null, val_is_null_block, val_not_null_block);
+  CodegenAnyValReadWriteInfo rwi = result.ToReadWriteInfo();
+  rwi.entry_block().BranchTo(&builder);
+
+  llvm::BasicBlock* insert_filter_block =
+      llvm::BasicBlock::Create(context, "insert_filter", insert_filter_fn);

  // Set the pointer to NULL in case it evaluates to NULL.
-  builder.SetInsertPoint(val_is_null_block);
+  builder.SetInsertPoint(rwi.null_block());
  llvm::Value* null_ptr = codegen->null_ptr_value();
  builder.CreateBr(insert_filter_block);

  // Saves 'result' on the stack and passes a pointer to it to Insert().
-  builder.SetInsertPoint(val_not_null_block);
-  llvm::Value* native_ptr = result.ToNativePtr();
+  builder.SetInsertPoint(rwi.non_null_block());
+  llvm::Value* native_ptr = SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(rwi);
  native_ptr = builder.CreatePointerCast(native_ptr, codegen->ptr_type(), "native_ptr");
  builder.CreateBr(insert_filter_block);

  // Get the arguments in place to call Insert().
  builder.SetInsertPoint(insert_filter_block);
-  llvm::PHINode* val_ptr_phi = builder.CreatePHI(codegen->ptr_type(), 2, "val_ptr_phi");
-  val_ptr_phi->addIncoming(native_ptr, val_not_null_block);
-  val_ptr_phi->addIncoming(null_ptr, val_is_null_block);
+  llvm::PHINode* val_ptr_phi = rwi.CodegenNullPhiNode(native_ptr, null_ptr,
+      "val_ptr_phi");

  // Insert into the bloom filter.
  if (filter_desc.type == TRuntimeFilterType::BLOOM) {
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -765,76 +765,98 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen, LlvmBuilder* builder,
 }

 // Codegen for evaluating a tuple row over either build_expr_evals_ or
-// probe_expr_evals_. For a group by with (big int, string) the IR looks like:
+// probe_expr_evals_. Below is the IR generated for the following query:
+//
+// select bigint_col, string_col from functional.alltypestiny
+// group by bigint_col, string_col;
 //
 // define i1 @EvalProbeRow(%"class.impala::HashTableCtx"* %this_ptr,
-//    %"class.impala::TupleRow"* %row, i8* %expr_values, i8* %expr_values_null) #34 {
+//                         %"class.impala::TupleRow"* %row,
+//                         i8* %expr_values,
+//                         i8* %expr_values_null) #53 {
 // entry:
-//   %loc_addr = getelementptr i8, i8* %expr_values, i32 0
+//   %eval_vector = call %"class.impala::ScalarExprEvaluator"**
+//       @_ZNK6impala12HashTableCtx16probe_expr_evalsEv(
+//           %"class.impala::HashTableCtx"* %this_ptr)
+//   %loc_addr = getelementptr inbounds i8, i8* %expr_values, i32 0
 //   %loc = bitcast i8* %loc_addr to i64*
-//   %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
-//        inttoptr (i64 197737664 to %"class.impala::ExprContext"*),
-//        %"class.impala::TupleRow"* %row)
-//   %0 = extractvalue { i8, i64 } %result, 0
-//   %is_null = trunc i8 %0 to i1
-//   %1 = zext i1 %is_null to i8
-//   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
-//   store i8 %1, i8* %null_byte_loc
-//   br i1 %is_null, label %null, label %not_null
+//   %0 = getelementptr %"class.impala::ScalarExprEvaluator"*,
+//                      %"class.impala::ScalarExprEvaluator"** %eval_vector,
+//                      i32 0
+//   %eval = load %"class.impala::ScalarExprEvaluator"*,
+//                %"class.impala::ScalarExprEvaluator"** %0
+//   %result = call { i8, i64 } @GetSlotRef.6(%"class.impala::ScalarExprEvaluator"* %eval,
+//                                            %"class.impala::TupleRow"* %row)
+//   %null_byte_loc = getelementptr inbounds i8, i8* %expr_values_null, i32 0
+//   br label %entry1
 //
-// null:                                             ; preds = %entry
-//   store i64 2166136261, i64* %loc
-//   br label %continue
+// entry1:                                           ; preds = %entry
+//   %1 = extractvalue { i8, i64 } %result, 0
+//   %is_null = trunc i8 %1 to i1
+//   br i1 %is_null, label %null, label %non_null
 //
-// not_null:                                         ; preds = %entry
+// non_null:                                         ; preds = %entry1
 //   %val = extractvalue { i8, i64 } %result, 1
+//   store i8 0, i8* %null_byte_loc
 //   store i64 %val, i64* %loc
 //   br label %continue
 //
-// continue:                                         ; preds = %not_null, %null
-//   %is_null_phi = phi i1 [ true, %null ], [ false, %not_null ]
-//   %has_null = or i1 false, %is_null_phi
-//   %loc_addr1 = getelementptr i8, i8* %expr_values, i32 8
-//   %loc2 = bitcast i8* %loc_addr1 to %"struct.impala::StringValue"*
-//   %result6 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
-//      inttoptr (i64 197738048 to %"class.impala::ExprContext"*),
-//      %"class.impala::TupleRow"* %row)
-//   %2 = extractvalue { i64, i8* } %result6, 0
-//   %is_null7 = trunc i64 %2 to i1
-//   %3 = zext i1 %is_null7 to i8
-//   %null_byte_loc8 = getelementptr i8, i8* %expr_values_null, i32 1
-//   store i8 %3, i8* %null_byte_loc8
-//   br i1 %is_null7, label %null3, label %not_null4
+// null:                                             ; preds = %entry1
+//   store i8 1, i8* %null_byte_loc
+//   store i64 2166136261, i64* %loc
+//   br label %continue
 //
-// null3:                                            ; preds = %continue
-//   %string_ptr = getelementptr inbounds %"struct.impala::StringValue",
-//        %"struct.impala::StringValue"* %loc2, i32 0, i32 0
-//   %string_len = getelementptr inbounds %"struct.impala::StringValue",
-//        %"struct.impala::StringValue"* %loc2, i32 0, i32 1
-//   store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr
-//   store i32 -2128831035, i32* %string_len
-//   br label %continue5
+// continue:                                         ; preds = %non_null, %null
+//   %_phi = phi i1 [ false, %non_null ], [ true, %null ]
+//   %has_null = or i1 false, %_phi
+//   %loc_addr2 = getelementptr inbounds i8, i8* %expr_values, i32 8
+//   %loc3 = bitcast i8* %loc_addr2 to %"struct.impala::StringValue"*
+//   %2 = getelementptr %"class.impala::ScalarExprEvaluator"*,
+//                      %"class.impala::ScalarExprEvaluator"** %eval_vector,
+//                      i32 1
+//   %eval4 = load %"class.impala::ScalarExprEvaluator"*,
+//                 %"class.impala::ScalarExprEvaluator"** %2
+//   %result5 = call { i64, i8* } @GetSlotRef.7(
+//       %"class.impala::ScalarExprEvaluator"* %eval4,
+//       %"class.impala::TupleRow"* %row)
+//   %null_byte_loc6 = getelementptr inbounds i8, i8* %expr_values_null, i32 1
+//   br label %entry7
 //
-// not_null4:                                        ; preds = %continue
-//   %4 = extractvalue { i64, i8* } %result6, 0
+// entry7:                                           ; preds = %continue
+//   %3 = extractvalue { i64, i8* } %result5, 0
+//   %is_null10 = trunc i64 %3 to i1
+//   br i1 %is_null10, label %null9, label %non_null8
+//
+// non_null8:                                        ; preds = %entry7
+//   %4 = extractvalue { i64, i8* } %result5, 0
 //   %5 = ashr i64 %4, 32
 //   %6 = trunc i64 %5 to i32
+//   %result11 = extractvalue { i64, i8* } %result5, 1
+//   store i8 0, i8* %null_byte_loc6
 //   %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %6, 1
-//   %result9 = extractvalue { i64, i8* } %result6, 1
-//   %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result9, 0
-//   store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"* %loc2
-//   br label %continue5
+//   %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result11, 0
+//   store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"* %loc3
+//   br label %continue12
 //
-// continue5:                                        ; preds = %not_null4, %null3
-//   %is_null_phi10 = phi i1 [ true, %null3 ], [ false, %not_null4 ]
-//   %has_null11 = or i1 %has_null, %is_null_phi10
-//   ret i1 %has_null11
+// null9:                                            ; preds = %entry7
+//   store i8 1, i8* %null_byte_loc6
+//   %string_ptr = getelementptr inbounds %"struct.impala::StringValue",
+//                                        %"struct.impala::StringValue"* %loc3,
+//                                        i32 0,
+//                                        i32 0
+//   %string_len = getelementptr inbounds %"struct.impala::StringValue",
+//                                        %"struct.impala::StringValue"* %loc3,
+//                                        i32 0,
+//                                        i32 1
+//   store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr
+//   store i32 -2128831035, i32* %string_len
+//   br label %continue12
+//
+// continue12:                                       ; preds = %non_null8, %null9
+//   %_phi13 = phi i1 [ false, %non_null8 ], [ true, %null9 ]
+//   %has_null14 = or i1 %has_null, %_phi13
+//   ret i1 %has_null14
 // }
-//
-// For each expr, we create 3 code blocks.  The null, not null and continue blocks.
-// Both the null and not null branch into the continue block.  The continue block
-// becomes the start of the next block for codegen (either the next expr or just the
-// end of the function).
 Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build_row,
    const HashTableConfig& config, llvm::Function** fn) {
  const std::vector<ScalarExpr*>& exprs =
@@ -887,10 +909,6 @@ Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build_row,
    llvm::Value* llvm_loc =
        builder.CreatePointerCast(loc, codegen->GetSlotPtrType(exprs[i]->type()), "loc");

-    llvm::BasicBlock* null_block = llvm::BasicBlock::Create(context, "null", *fn);
-    llvm::BasicBlock* not_null_block = llvm::BasicBlock::Create(context, "not_null", *fn);
-    llvm::BasicBlock* continue_block = llvm::BasicBlock::Create(context, "continue", *fn);
-
    // Call expr
    llvm::Function* expr_fn;
    Status status = exprs[i]->GetCodegendComputeFn(codegen, false, &expr_fn);
@@ -908,17 +926,17 @@ Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build_row,
    llvm::Value* eval_arg = codegen->CodegenArrayAt(&builder, eval_vector, i, "eval");
    CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(
        codegen, &builder, exprs[i]->type(), expr_fn, {eval_arg, row}, "result");
-    llvm::Value* is_null = result.GetIsNull();
-
-    // Set null-byte result
-    llvm::Value* null_byte = builder.CreateZExt(is_null, codegen->i8_type());
    llvm::Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(
        NULL, expr_values_null, codegen->GetI32Constant(i), "null_byte_loc");
-    builder.CreateStore(null_byte, llvm_null_byte_loc);
-    builder.CreateCondBr(is_null, null_block, not_null_block);
+
+    CodegenAnyValReadWriteInfo rwi = result.ToReadWriteInfo();
+    rwi.entry_block().BranchTo(&builder);
+
+    llvm::BasicBlock* continue_block = llvm::BasicBlock::Create(context, "continue", *fn);

    // Null block
-    builder.SetInsertPoint(null_block);
+    builder.SetInsertPoint(rwi.null_block());
+    builder.CreateStore(codegen->GetI8Constant(1), llvm_null_byte_loc);
    if (!config.stores_nulls) {
      // hash table doesn't store nulls, no reason to keep evaluating exprs
      builder.CreateRet(codegen->true_value());
@@ -928,21 +946,20 @@ Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build_row,
    }

    // Not null block
-    builder.SetInsertPoint(not_null_block);
+    builder.SetInsertPoint(rwi.non_null_block());
+    builder.CreateStore(codegen->GetI8Constant(0), llvm_null_byte_loc);

-    result.ConvertToCanonicalForm();
+    // Convert to canonical value.
+    rwi.CodegenConvertToCanonicalForm();

-    result.StoreToNativePtr(llvm_loc);
+    SlotDescriptor::CodegenStoreNonNullAnyVal(rwi, llvm_loc);
    builder.CreateBr(continue_block);

    // Continue block
    builder.SetInsertPoint(continue_block);
    if (config.stores_nulls) {
      // Update has_null
-      llvm::PHINode* is_null_phi =
-          builder.CreatePHI(codegen->bool_type(), 2, "is_null_phi");
-      is_null_phi->addIncoming(codegen->true_value(), null_block);
-      is_null_phi->addIncoming(codegen->false_value(), not_null_block);
+      llvm::PHINode* is_null_phi = rwi.CodegenIsNullPhiNode("is_null_phi");
      has_null = builder.CreateOr(has_null, is_null_phi, "has_null");
    }
  }
@@ -1116,10 +1133,9 @@ Status HashTableCtx::CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur,
        builder.SetInsertPoint(continue_block);
        // Use phi node to reconcile that we could have come from the string-null
        // path and string not null paths.
-        llvm::PHINode* phi_node =
-            builder.CreatePHI(codegen->i32_type(), 2, "hash_phi");
-        phi_node->addIncoming(string_hash_result, not_null_block);
-        phi_node->addIncoming(str_null_result, null_block);
+        llvm::PHINode* phi_node = LlvmCodeGen::CreateBinaryPhiNode(&builder,
+            string_hash_result, str_null_result, not_null_block, null_block);
+
        hash_result = phi_node;
      } else {
        hash_result = string_hash_result;
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -43,6 +43,7 @@ namespace llvm {

 namespace impala {

+class CodegenAnyVal;
 class LlvmCodeGen;
 class MemTracker;
 class RowDescriptor;
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -45,6 +45,7 @@ add_library(ExprsIr
  math-functions-ir.cc
  operators-ir.cc
  scalar-expr-ir.cc
+  scalar-expr-evaluator-ir.cc
  string-functions-ir.cc
  timestamp-functions-ir.cc
  tuple-is-null-predicate-ir.cc
@@ -70,8 +71,8 @@ add_library(Exprs
  literal.cc
  ${MURMURHASH_SRC_DIR}/MurmurHash3.cpp
  null-literal.cc
-  scalar-expr.cc
  scalar-expr-evaluator.cc
+  scalar-expr.cc
  slot-ref.cc
  string-functions.cc
  timestamp-functions.cc
--- a/be/src/exprs/hive-udf-call-ir.cc
+++ b/be/src/exprs/hive-udf-call-ir.cc
@@ -26,11 +26,6 @@

 namespace impala {

-FunctionContext* HiveUdfCall::GetFunctionContext(
-    ScalarExprEvaluator* eval, int fn_ctx_idx) {
-  return eval->fn_context(fn_ctx_idx);
-}
-
 HiveUdfCall::JniContext* HiveUdfCall::GetJniContext(FunctionContext* fn_ctx) {
  JniContext* jni_ctx = reinterpret_cast<JniContext*>(
      fn_ctx->GetFunctionState(FunctionContext::THREAD_LOCAL));
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -286,29 +286,32 @@ Status HiveUdfCall::CodegenEvalChildren(LlvmCodeGen* codegen, LlvmBuilder* build
     RETURN_IF_ERROR(child_expr->GetCodegendComputeFn(codegen, false, &child_fn));

     builder->SetInsertPoint(current_eval_child_block);
-     llvm::BasicBlock* next_eval_child_block = llvm::BasicBlock::Create(
-         context, "eval_child", function);

     const ColumnType& child_type = child_expr->type();
     CodegenAnyVal child_wrapped = CodegenAnyVal::CreateCallWrapped(
         codegen, builder, child_type, child_fn, *args, "child");

-     llvm::BasicBlock* const child_not_null_block = llvm::BasicBlock::Create(
-         context, "child_not_null", function, next_eval_child_block);
+     CodegenAnyValReadWriteInfo rwi = child_wrapped.ToReadWriteInfo();
+     rwi.entry_block().BranchTo(builder);

-     llvm::Value* const child_is_null = child_wrapped.GetIsNull("child_is_null");
-     llvm::Value* const child_is_null_i8 = builder->CreateZExtOrTrunc(
-         child_is_null, codegen->i8_type(), "child_is_null_i8");
+     llvm::BasicBlock* next_eval_child_block = llvm::BasicBlock::Create(
+         context, "eval_child", function);
+
+     // Child is null
+     builder->SetInsertPoint(rwi.null_block());
     builder->CreateCall(set_input_null_buff_elem_fn,
-         {jni_ctx, codegen->GetI32Constant(i), child_is_null_i8});
-     builder->CreateCondBr(child_is_null, next_eval_child_block, child_not_null_block);
+         {jni_ctx, codegen->GetI32Constant(i), codegen->GetI8Constant(1)});
+     builder->CreateBr(next_eval_child_block);

     // Child is not null.
-     builder->SetInsertPoint(child_not_null_block);
+     builder->SetInsertPoint(rwi.non_null_block());
+     builder->CreateCall(set_input_null_buff_elem_fn,
+         {jni_ctx, codegen->GetI32Constant(i), codegen->GetI8Constant(0)});
     llvm::Value* const input_ptr = builder->CreateCall(get_input_val_buff_at_offset_fn,
         {jni_ctx, codegen->GetI32Constant(input_byte_offsets_[i])}, "input_ptr");

-     llvm::Value* const child_val_ptr = child_wrapped.ToNativePtr();
+     llvm::Value* const child_val_ptr =
+         SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(rwi);
     const std::size_t size = CodeGenUtil::GetTypeSize(child_type.type);
     codegen->CodegenMemcpy(builder, input_ptr, child_val_ptr, size);
     builder->CreateBr(next_eval_child_block);
@@ -334,116 +337,172 @@ llvm::Value* CastPtrAndLoad(LlvmCodeGen* codegen, LlvmBuilder* builder,
 ///   return a + b + c;
 /// }
 ///
+/// To reproduce, create the following function:
+///
+/// create function concatenate(string, string, string) returns string
+/// location '/test-warehouse/impala-hive-udfs.jar'
+/// symbol='org.apache.impala.TestUdf';
+///
+/// then run the following query:
+///
+/// select default.concatenate(date_string_col, string_col, cast(double_col as string))
+/// from functional.alltypes;
+///
 /// define { i64, i8* } @HiveUdfCall(%"class.impala::ScalarExprEvaluator"* %eval,
-///                                  %"class.impala::TupleRow"* %row) #49 {
+///                                  %"class.impala::TupleRow"* %row) #47 {
 /// entry:
 ///   %0 = alloca %"struct.impala::ColumnType"
 ///   %1 = alloca %"struct.impala::StringValue"
 ///   %2 = alloca %"struct.impala::StringValue"
 ///   %3 = alloca %"struct.impala::StringValue"
 ///   %fn_ctx = call %"class.impala_udf::FunctionContext"*
-///       @_ZN6impala11HiveUdfCall18GetFunctionContextEPNS_19ScalarExprEvaluatorEi(
-///       %"class.impala::ScalarExprEvaluator"* %eval, i32 0)
+///       @_ZN6impala19ScalarExprEvaluator18GetFunctionContextEPS0_i
+///           %"class.impala::ScalarExprEvaluator"* %eval,
+///           i32 0)
 ///   %jni_ctx = call %"struct.impala::HiveUdfCall::JniContext"*
 ///       @_ZN6impala11HiveUdfCall13GetJniContextEPN10impala_udf15FunctionContextE(
-///       %"class.impala_udf::FunctionContext"* %fn_ctx)
+///           %"class.impala_udf::FunctionContext"* %fn_ctx)
 ///   br label %eval_child
 ///
-/// eval_child:                                  ; preds = %entry
+/// eval_child:                                       ; preds = %entry
 ///   %child = call { i64, i8* } @GetSlotRef(%"class.impala::ScalarExprEvaluator"* %eval,
 ///                                          %"class.impala::TupleRow"* %row)
-///   %4 = extractvalue { i64, i8* } %child, 0
-///   %child_is_null = trunc i64 %4 to i1
-///   %child_is_null_i8 = zext i1 %child_is_null to i8
-///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
-///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
-///       i32 0, i8 %child_is_null_i8)
-///   br i1 %child_is_null, label %eval_child1, label %child_not_null
+///   br label %entry1
 ///
-/// child_not_null:                              ; preds = %eval_child
-///   %input_ptr = call i8*
-///       @_ZN6impala11HiveUdfCall10JniContext28GetInputValuesBufferAtOffsetEPS1_i(
-///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx, i32 0)
+/// entry1:                                           ; preds = %eval_child
+///   %4 = extractvalue { i64, i8* } %child, 0
+///   %is_null = trunc i64 %4 to i1
+///   br i1 %is_null, label %null, label %non_null
+///
+/// non_null:                                         ; preds = %entry1
+///   %child2 = extractvalue { i64, i8* } %child, 1
 ///   %5 = extractvalue { i64, i8* } %child, 0
 ///   %6 = ashr i64 %5, 32
 ///   %7 = trunc i64 %6 to i32
+///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
+///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///       i32 0,
+///       i8 0)
+///   %input_ptr = call i8*
+///       @_ZN6impala11HiveUdfCall10JniContext28GetInputValuesBufferAtOffsetEPS1_i(
+///           %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///           i32 0)
 ///   %8 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %7, 1
-///   %child2 = extractvalue { i64, i8* } %child, 1
 ///   %9 = insertvalue %"struct.impala::StringValue" %8, i8* %child2, 0
 ///   store %"struct.impala::StringValue" %9, %"struct.impala::StringValue"* %3
 ///   %10 = bitcast %"struct.impala::StringValue"* %3 to i8*
-///   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %input_ptr, i8* %10,
-///                                        i64 12, i32 0, i1 false)
-///   br label %eval_child1
+///   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %input_ptr,
+///                                        i8* %10,
+///                                        i64 12,
+///                                        i32 0,
+///                                        i1 false)
+///   br label %eval_child3
 ///
-/// eval_child1:                                 ; preds = %child_not_null, %eval_child
-///   %child4 = call { i64, i8* } @GetSlotRef.5(
-///       %"class.impala::ScalarExprEvaluator"* %eval, %"class.impala::TupleRow"* %row)
-///   %11 = extractvalue { i64, i8* } %child4, 0
-///   %child_is_null6 = trunc i64 %11 to i1
-///   %child_is_null_i87 = zext i1 %child_is_null6 to i8
+/// null:                                             ; preds = %entry1
 ///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
 ///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
-///       i32 1, i8 %child_is_null_i87)
-///   br i1 %child_is_null6, label %eval_child3, label %child_not_null5
+///       i32 0,
+///       i8 1)
+///   br label %eval_child3
 ///
-/// child_not_null5:                             ; preds = %eval_child1
-///   %input_ptr8 = call i8*
-///       @_ZN6impala11HiveUdfCall10JniContext28GetInputValuesBufferAtOffsetEPS1_i(
-///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx, i32 16)
+/// eval_child3:                                      ; preds = %non_null, %null
+///   %child4 = call { i64, i8* } @GetSlotRef.1(
+///       %"class.impala::ScalarExprEvaluator"* %eval,
+///       %"class.impala::TupleRow"* %row)
+///   br label %entry5
+///
+/// entry5:                                           ; preds = %eval_child3
+///   %11 = extractvalue { i64, i8* } %child4, 0
+///   %is_null8 = trunc i64 %11 to i1
+///   br i1 %is_null8, label %null7, label %non_null6
+///
+/// non_null6:                                        ; preds = %entry5
+///   %child9 = extractvalue { i64, i8* } %child4, 1
 ///   %12 = extractvalue { i64, i8* } %child4, 0
 ///   %13 = ashr i64 %12, 32
 ///   %14 = trunc i64 %13 to i32
+///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
+///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///       i32 1,
+///       i8 0)
+///   %input_ptr11 = call i8*
+///       @_ZN6impala11HiveUdfCall10JniContext28GetInputValuesBufferAtOffsetEPS1_i(
+///           %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///           i32 16)
 ///   %15 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %14, 1
-///   %child9 = extractvalue { i64, i8* } %child4, 1
 ///   %16 = insertvalue %"struct.impala::StringValue" %15, i8* %child9, 0
 ///   store %"struct.impala::StringValue" %16, %"struct.impala::StringValue"* %2
 ///   %17 = bitcast %"struct.impala::StringValue"* %2 to i8*
-///   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %input_ptr8, i8* %17,
-///                                        i64 12, i32 0, i1 false)
-///   br label %eval_child3
+///   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %input_ptr11,
+///                                        i8* %17,
+///                                        i64 12,
+///                                        i32 0,
+///                                        i1 false)
+///   br label %eval_child10
 ///
-/// eval_child3:                                 ; preds = %child_not_null5, %eval_child1
-///   %child11 = call { i64, i8* } @GetSlotRef.6(
-///       %"class.impala::ScalarExprEvaluator"* %eval, %"class.impala::TupleRow"* %row)
-///   %18 = extractvalue { i64, i8* } %child11, 0
-///   %child_is_null13 = trunc i64 %18 to i1
-///   %child_is_null_i814 = zext i1 %child_is_null13 to i8
+/// null7:                                            ; preds = %entry5
 ///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
 ///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
-///       i32 2, i8 %child_is_null_i814)
-///   br i1 %child_is_null13, label %call_java, label %child_not_null12
+///       i32 1,
+///       i8 1)
+///   br label %eval_child10
 ///
-/// child_not_null12:                            ; preds = %eval_child3
-///   %input_ptr15 = call i8*
-///       @_ZN6impala11HiveUdfCall10JniContext28GetInputValuesBufferAtOffsetEPS1_i(
-///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx, i32 32)
-///   %19 = extractvalue { i64, i8* } %child11, 0
+/// eval_child10:                                     ; preds = %non_null6, %null7
+///   %child12 = call { i64, i8* } @"impala::CastFunctions::CastToStringValWrapper"(
+///       %"class.impala::ScalarExprEvaluator"* %eval,
+///       %"class.impala::TupleRow"* %row)
+///   br label %entry13
+///
+/// entry13:                                          ; preds = %eval_child10
+///   %18 = extractvalue { i64, i8* } %child12, 0
+///   %is_null16 = trunc i64 %18 to i1
+///   br i1 %is_null16, label %null15, label %non_null14
+///
+/// non_null14:                                       ; preds = %entry13
+///   %child17 = extractvalue { i64, i8* } %child12, 1
+///   %19 = extractvalue { i64, i8* } %child12, 0
 ///   %20 = ashr i64 %19, 32
 ///   %21 = trunc i64 %20 to i32
+///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
+///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///       i32 2,
+///       i8 0)
+///   %input_ptr19 = call i8*
+///       @_ZN6impala11HiveUdfCall10JniContext28GetInputValuesBufferAtOffsetEPS1_i(
+///           %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///           i32 32)
 ///   %22 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %21, 1
-///   %child16 = extractvalue { i64, i8* } %child11, 1
-///   %23 = insertvalue %"struct.impala::StringValue" %22, i8* %child16, 0
+///   %23 = insertvalue %"struct.impala::StringValue" %22, i8* %child17, 0
 ///   store %"struct.impala::StringValue" %23, %"struct.impala::StringValue"* %1
 ///   %24 = bitcast %"struct.impala::StringValue"* %1 to i8*
-///   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %input_ptr15, i8* %24,
-///                                        i64 12, i32 0, i1 false)
+///   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %input_ptr19,
+///                                        i8* %24,
+///                                        i64 12,
+///                                        i32 0,
+///                                        i1 false)
 ///   br label %call_java
 ///
-/// call_java:                                   ; preds = %child_not_null12, %eval_child3
+/// null15:                                           ; preds = %entry13
+///   call void @_ZN6impala11HiveUdfCall10JniContext26SetInputNullsBufferElementEPS1_ih(
+///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx,
+///       i32 2,
+///       i8 1)
+///   br label %call_java
+///
+/// call_java:                                        ; preds = %non_null14, %null15
 ///   store %"struct.impala::ColumnType" {
-///       i32 10, i32 -1, i32 -1, i32 -1,
-///       %"class.std::vector.13" zeroinitializer,
-///       %"class.std::vector.18" zeroinitializer,
-///       %"class.std::vector.23" zeroinitializer },
+///           i32 10, i32 -1, i32 -1, i32 -1,
+///           %"class.std::vector.13" zeroinitializer,
+///           %"class.std::vector.18" zeroinitializer,
+///           %"class.std::vector.23" zeroinitializer },
 ///       %"struct.impala::ColumnType"* %0
 ///   %ret_ptr = call %"struct.impala_udf::AnyVal"*
 ///   ; The next two lines should be one line but the name of the identifier is too long.
 ///       @_ZN6impala11HiveUdfCall22CallJavaAndStoreResultEPKNS_10ColumnTypeEPN10
 ///impala_udf15FunctionContextEPNS0_10JniContextE(
-///       %"struct.impala::ColumnType"* %0,
-///       %"class.impala_udf::FunctionContext"* %fn_ctx,
-///       %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx)
+///           %"struct.impala::ColumnType"* %0,
+///           %"class.impala_udf::FunctionContext"* %fn_ctx,
+///           %"struct.impala::HiveUdfCall::JniContext"* %jni_ctx)
 ///   %ret_ptr_cast = bitcast %"struct.impala_udf::AnyVal"* %ret_ptr to { i64, i8* }*
 ///   %ret = load { i64, i8* }, { i64, i8* }* %ret_ptr_cast
 ///   ret { i64, i8* } %ret
--- a/be/src/exprs/hive-udf-call.h
+++ b/be/src/exprs/hive-udf-call.h
@@ -154,7 +154,6 @@ class HiveUdfCall : public ScalarExpr {
  /// Static helper functions for codegen.
  static jclass* GetExecutorClass();
  static jmethodID* GetExecutorEvaluateId();
-  static FunctionContext* GetFunctionContext(ScalarExprEvaluator* eval, int fn_ctx_idx);
  static JniContext* GetJniContext(FunctionContext* fn_ctx);
  static JNIEnv* GetJniEnv(JniContext* jni_ctx);
  static AnyVal* CallJavaAndStoreResult(const  ColumnType* type, FunctionContext* fn_ctx,
--- a/be/src/exprs/kudu-partition-expr.cc
+++ b/be/src/exprs/kudu-partition-expr.cc
@@ -167,8 +167,14 @@ void CodegenCallWriteKuduValue(LlvmCodeGen* codegen, LlvmBuilder* builder, int c
 }

 /// Sample IR:
+///
+/// To reproduce, run
+///
+/// bin/impala-py.test tests/query_test/test_kudu.py::
+/// TestKuduPartitioning::test_partitions_evenly_distributed
+///
 /// define i64 @KuduPartitionExpr(%"class.impala::ScalarExprEvaluator"* %eval,
-///                               %"class.impala::TupleRow"* %row) #46 {
+///                               %"class.impala::TupleRow"* %row) #47 {
 /// entry:
 ///   %0 = alloca %"struct.impala::ColumnType"
 ///   %status_ptr = alloca %"class.impala::Status"
@@ -186,39 +192,43 @@ void CodegenCallWriteKuduValue(LlvmCodeGen* codegen, LlvmBuilder* builder, int c
 ///                        %"class.kudu::KuduPartialRow"** %kudu_row_ptr_ptr
 ///   %kudu_partitioner_ptr = load %"class.kudu::client::KuduPartitioner"*,
 ///                                %"class.kudu::client::KuduPartitioner"**
-///                                %kudu_partitioner_ptr_ptr
+///                                    %kudu_partitioner_ptr_ptr
 ///   br label %eval_child
 ///
 /// eval_child:                                       ; preds = %entry
-///   %child = call i64 @GetSlotRef.3(%"class.impala::ScalarExprEvaluator"* %eval,
+///   %child = call i64 @GetSlotRef.4(%"class.impala::ScalarExprEvaluator"* %eval,
 ///                                   %"class.impala::TupleRow"* %row)
+///   br label %entry1
+///
+/// entry1:                                           ; preds = %eval_child
 ///   %is_null = trunc i64 %child to i1
-///   br i1 %is_null, label %child_null, label %child_not_null
+///   br i1 %is_null, label %null, label %non_null
 ///
-/// child_null:                                       ; preds = %eval_child
-///   ret i64 -4294967296
-///
-/// child_not_null:                                   ; preds = %eval_child
+/// non_null:                                         ; preds = %entry1
 ///   %2 = ashr i64 %child, 32
 ///   %3 = trunc i64 %2 to i32
 ///   store i32 %3, i32* %1
 ///   store %"struct.impala::ColumnType" {
 ///       i32 5, i32 -1, i32 -1, i32 -1,
 ///       %"class.std::vector.13" zeroinitializer,
-///       %"class.std::vector.18" zeroinitializer },
+///       %"class.std::vector.18" zeroinitializer,
+///       %"class.std::vector.23" zeroinitializer },
 ///       %"struct.impala::ColumnType"* %0
 ///   %4 = bitcast i32* %1 to i8*
 ///   call void
 ///       @_ZN6impala14WriteKuduValueEiRKNS_10ColumnTypeEPKvbPN4kudu14KuduPartialRowE(
-///       %"class.impala::Status"* %status_ptr,
-///       i32 0,
-///       %"struct.impala::ColumnType"* %0,
-///       i8* %4,
-///       i1 false,
-///       %"class.kudu::KuduPartialRow"* %kudu_row_ptr)
+///           %"class.impala::Status"* %status_ptr,
+///           i32 0,
+///           %"struct.impala::ColumnType"* %0,
+///           i8* %4,
+///           i1 false,
+///           %"class.kudu::KuduPartialRow"* %kudu_row_ptr)
 ///   br label %partition_block
 ///
-/// partition_block:                                  ; preds = %child_not_null
+/// null:                                             ; preds = %entry1
+///   ret i64 -4294967296
+///
+/// partition_block:                                  ; preds = %non_null
 ///   ; The next two lines should be one line but the name of the identifier is too long.
 ///   %ret_val = call i64 @_ZN6impala19GetKuduPartitionRowEPN4kudu6client15
 ///KuduPartitionerEPNS0_14KuduPartialRowE(
@@ -265,27 +275,25 @@ Status KuduPartitionExpr::GetCodegendComputeFnImpl(
    CodegenAnyVal child_wrapped = CodegenAnyVal::CreateCallWrapped(
        codegen, &builder, child_type, child_fn, {args[0], args[1]}, "child");

-    llvm::BasicBlock* null_block =
-        llvm::BasicBlock::Create(context, "child_null", function);
-    llvm::BasicBlock* not_null_block =
-        llvm::BasicBlock::Create(context, "child_not_null", function);
-    builder.CreateCondBr(child_wrapped.GetIsNull(), null_block, not_null_block);
+    CodegenAnyValReadWriteInfo rwi = child_wrapped.ToReadWriteInfo();
+    rwi.entry_block().BranchTo(&builder);

    // Child is null.
-    builder.SetInsertPoint(null_block);
+    builder.SetInsertPoint(rwi.null_block());
    CodegenAnyVal error_ret_val =
        CodegenAnyVal::GetNonNullVal(codegen, &builder, type(), "error_ret_val");
    error_ret_val.SetVal(-1);
    builder.CreateRet(error_ret_val.GetLoweredValue());

    // Child is not null.
-    builder.SetInsertPoint(not_null_block);
+    builder.SetInsertPoint(rwi.non_null_block());
    const int col = tkudu_partition_expr_.referenced_columns[i];
    const ColumnDescriptor& col_desc = table_desc_->col_descs()[col];
    const ColumnType& type = col_desc.type();
    DCHECK_EQ(child_expr->type().type, type.type);

-    llvm::Value* const child_native_val = child_wrapped.ToNativePtr();
+    llvm::Value* const child_native_val =
+        SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(rwi);

    CodegenCallWriteKuduValue(codegen, &builder, col, type,
        kudu_row_ptr, child_native_val);
--- a/be/src/exprs/scalar-expr-evaluator-ir.cc
+++ b/be/src/exprs/scalar-expr-evaluator-ir.cc
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/scalar-expr-evaluator.h"
+
+namespace impala {
+
+IR_ALWAYS_INLINE FunctionContext* ScalarExprEvaluator::GetFunctionContext(
+    ScalarExprEvaluator* eval, int fn_ctx_idx) {
+  return eval->fn_context(fn_ctx_idx);
+}
+
+IR_ALWAYS_INLINE ScalarExprEvaluator* ScalarExprEvaluator::GetChildEvaluator(
+    ScalarExprEvaluator* eval, int idx) {
+  DCHECK_LT(idx, eval->childEvaluators_.size());
+  return eval->childEvaluators_[idx];
+}
+
+IR_ALWAYS_INLINE void* ScalarExprEvaluator::StoreResult(const AnyVal& val,
+    const ColumnType& type) {
+  using namespace impala_udf;
+
+  if (val.is_null) return nullptr;
+
+  switch (type.type) {
+    case TYPE_BOOLEAN: {
+      const BooleanVal& v = reinterpret_cast<const BooleanVal&>(val);
+      result_.bool_val = v.val;
+      return &result_.bool_val;
+    }
+    case TYPE_TINYINT: {
+      const TinyIntVal& v = reinterpret_cast<const TinyIntVal&>(val);
+      result_.tinyint_val = v.val;
+      return &result_.tinyint_val;
+    }
+    case TYPE_SMALLINT: {
+      const SmallIntVal& v = reinterpret_cast<const SmallIntVal&>(val);
+      result_.smallint_val = v.val;
+      return &result_.smallint_val;
+    }
+    case TYPE_INT: {
+      const IntVal& v = reinterpret_cast<const IntVal&>(val);
+      result_.int_val = v.val;
+      return &result_.int_val;
+    }
+    case TYPE_BIGINT: {
+      const BigIntVal& v = reinterpret_cast<const BigIntVal&>(val);
+      result_.bigint_val = v.val;
+      return &result_.bigint_val;
+    }
+    case TYPE_FLOAT: {
+      const FloatVal& v = reinterpret_cast<const FloatVal&>(val);
+      result_.float_val = v.val;
+      return &result_.float_val;
+    }
+    case TYPE_DOUBLE: {
+      const DoubleVal& v = reinterpret_cast<const DoubleVal&>(val);
+      result_.double_val = v.val;
+      return &result_.double_val;
+    }
+    case TYPE_STRING:
+    case TYPE_VARCHAR: {
+      const StringVal& v = reinterpret_cast<const StringVal&>(val);
+      result_.string_val.ptr = reinterpret_cast<char*>(v.ptr);
+      result_.string_val.len = v.len;
+      return &result_.string_val;
+    }
+    case TYPE_CHAR:
+    case TYPE_FIXED_UDA_INTERMEDIATE: {
+      const StringVal& v = reinterpret_cast<const StringVal&>(val);
+      result_.string_val.ptr = reinterpret_cast<char*>(v.ptr);
+      result_.string_val.len = v.len;
+      return result_.string_val.ptr;
+    }
+    case TYPE_TIMESTAMP: {
+      const TimestampVal& v = reinterpret_cast<const TimestampVal&>(val);
+      result_.timestamp_val = TimestampValue::FromTimestampVal(v);
+      return &result_.timestamp_val;
+    }
+    case TYPE_DECIMAL: {
+      const DecimalVal& v = reinterpret_cast<const DecimalVal&>(val);
+      int byte_size = type.GetByteSize();
+      switch (byte_size) {
+        case 4:
+          result_.decimal4_val = v.val4;
+          return &result_.decimal4_val;
+        case 8:
+          result_.decimal8_val = v.val8;
+          return &result_.decimal8_val;
+        case 16:
+          result_.decimal16_val = v.val16;
+          return &result_.decimal16_val;
+        default:
+          DCHECK(false) << byte_size;
+          return nullptr;
+      }
+      DCHECK(false);
+      return nullptr;
+    }
+    case TYPE_DATE: {
+      const DateVal& v = reinterpret_cast<const DateVal&>(val);
+      const DateValue dv = DateValue::FromDateVal(v);
+      if (UNLIKELY(!dv.IsValid())) return nullptr;
+      result_.date_val = dv;
+      return &result_.date_val;
+    }
+    case TYPE_ARRAY:
+    case TYPE_MAP: {
+      const CollectionVal& v = reinterpret_cast<const CollectionVal&>(val);
+      result_.collection_val.ptr = v.ptr;
+      result_.collection_val.num_tuples = v.num_tuples;
+      return &result_.collection_val;
+    }
+    case TYPE_STRUCT: {
+      const StructVal& v = reinterpret_cast<const StructVal&>(val);
+      result_.struct_val = v;
+      return &result_.struct_val;
+    }
+    default:
+      DCHECK(false) << "Type not implemented: " << type;
+      return nullptr;
+  }
+}
+
+
+} // namespace impala
--- a/be/src/exprs/scalar-expr-evaluator.h
+++ b/be/src/exprs/scalar-expr-evaluator.h
@@ -255,7 +255,8 @@ class ScalarExprEvaluator {
  const ScalarExpr& root_;

  /// Stores the result of evaluation for this expr tree (or any sub-expression).
-  /// This is used in interpreted path when we need to return a void*.
+  /// This is used in interpreted path when we need to return a void* and to store the
+  /// children of a struct expression in both interpreted and codegen mode.
  ExprValue result_;

  /// For a struct scalar expression there is one evaluator created for each child of
@@ -284,6 +285,14 @@ class ScalarExprEvaluator {
  /// which need FunctionContext.
  void CreateFnCtxs(RuntimeState* state, const ScalarExpr& expr, MemPool* expr_perm_pool,
      MemPool* expr_results_pool);
+
+  // Helper functions for codegen.
+
+  // Converts and stores 'val' to 'result_' according to its type. Intended to be called
+  // from codegen code.
+  void* StoreResult(const AnyVal& val, const ColumnType& type);
+  static FunctionContext* GetFunctionContext(ScalarExprEvaluator* eval, int fn_ctx_idx);
+  static ScalarExprEvaluator* GetChildEvaluator(ScalarExprEvaluator* eval, int idx);
 };
 }

--- a/be/src/exprs/slot-ref.cc
+++ b/be/src/exprs/slot-ref.cc
@@ -24,6 +24,7 @@
 #include "codegen/llvm-codegen.h"
 #include "exprs/scalar-expr-evaluator.h"
 #include "gen-cpp/Exprs_types.h"
+#include "llvm/IR/BasicBlock.h"
 #include "runtime/collection-value.h"
 #include "runtime/decimal-value.h"
 #include "runtime/fragment-state.h"
@@ -154,38 +155,51 @@ void SlotRef::AssignFnCtxIdx(int* next_fn_ctx_idx) {
 // (Note: some of the GEPs that look like no-ops are because certain offsets are 0
 // in this slot descriptor.)
 //
-// define { i8, i64 } @GetSlotRef(i8** %context, %"class.impala::TupleRow"* %row) {
+// define { i8, i64 } @GetSlotRef.22(
+//     %"class.impala::ScalarExprEvaluator"* %eval, %"class.impala::TupleRow"* %row) #51 {
 // entry:
 //   %cast_row_ptr = bitcast %"class.impala::TupleRow"* %row to i8**
-//   %tuple_addr = getelementptr i8** %cast_row_ptr, i32 0
-//   %tuple_ptr = load i8** %tuple_addr
-//   br label %check_slot_null
+//   %tuple_ptr_addr = getelementptr inbounds i8*, i8** %cast_row_ptr, i32 0
+//   %tuple_ptr = load i8*, i8** %tuple_ptr_addr
+//   br label %check_tuple_null
 //
-// check_slot_null:                                  ; preds = %entry
-//   %null_byte_ptr = getelementptr i8* %tuple_ptr, i32 0
-//   %null_byte = load i8* %null_ptr
-//   %null_byte_set = and i8 %null_byte, 2
-//   %is_null = icmp ne i8 %null_byte_set, 0
-//   br i1 %is_null, label %ret, label %get_slot
+// check_tuple_null:                         ; preds = %entry
+//   %tuple_is_null = icmp eq i8* %tuple_ptr, null
+//   br i1 %tuple_is_null, label %null_block, label %check_slot_null
 //
-// get_slot:                                         ; preds = %check_slot_null
-//   %slot_addr = getelementptr i8* %tuple_ptr, i32 8
+// check_slot_null:                          ; preds = %check_tuple_null
+//   %null_byte_ptr = getelementptr inbounds i8, i8* %tuple_ptr, i32 8
+//   %null_byte = load i8, i8* %null_byte_ptr
+//   %null_mask = and i8 %null_byte, 1
+//   %is_null = icmp ne i8 %null_mask, 0
+//   br i1 %is_null, label %null_block, label %read_slot
+//
+// read_slot:                                ; preds = %check_slot_null
+//   %slot_addr = getelementptr inbounds i8, i8* %tuple_ptr, i32 0
 //   %val_ptr = bitcast i8* %slot_addr to i64*
-//   %val = load i64* %val_ptr
-//   br label %ret
+//   %val = load i64, i64* %val_ptr
+//   br label %produce_value
 //
-// ret:                                              ; preds = %get_slot, %check_slot_null
-//   %is_null_phi = phi i8 [ 1, %check_slot_null ], [ 0, %get_slot ]
-//   %val_phi = phi i64 [ 0, %check_slot_null ], [ %val, %get_slot ]
-//   %result = insertvalue { i8, i64 } zeroinitializer, i8 %is_null_phi, 0
-//   %result1 = insertvalue { i8, i64 } %result, i64 %val_phi, 1
+// null_block:                               ; preds = %check_slot_null, %check_tuple_null
+//   br label %produce_value
+//
+// produce_value:                            ; preds = %read_slot, %null_block
+//   %is_null_phi = phi i8 [ 1, %null_block ], [ 0, %read_slot ]
+//   %val_phi = phi i64 [ 0, %null_block ], [ %val, %read_slot ]
+//   %result = insertvalue { i8, i64 } zeroinitializer, i64 %val_phi, 1
+//   %result1 = insertvalue { i8, i64 } %result, i8 %is_null_phi, 0
 //   ret { i8, i64 } %result1
 // }
 //
+// Produced for the following query:
+//   select t1.int_col, t2.bigint_col
+//   from functional.alltypestiny t1 left outer join functional.alltypestiny t2
+//   on t1.int_col = t2.bigint_col
+//   order by bigint_col;
+//
 // TODO: We could generate a typed struct (and not a char*) for Tuple for llvm.  We know
 // the types from the TupleDesc.  It will likely make this code simpler to reason about.
 Status SlotRef::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Function** fn) {
-  DCHECK_EQ(GetNumChildren(), 0);
  // SlotRefs are based on the slot_id and tuple_idx.  Combine them to make a
  // query-wide unique id. We also need to combine whether the tuple is nullable. For
  // example, in an outer join the scan node could have the same tuple id and slot id
@@ -202,190 +216,14 @@ Status SlotRef::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Function**
    return Status::OK();
  }

-  llvm::LLVMContext& context = codegen->context();
  llvm::Value* args[2];
  *fn = CreateIrFunctionPrototype("GetSlotRef", codegen, &args);
+  llvm::Value* eval_ptr = args[0];
  llvm::Value* row_ptr = args[1];

-  llvm::Value* tuple_offset = codegen->GetI32Constant(tuple_idx_);
-  llvm::Value* slot_offset = codegen->GetI32Constant(slot_offset_);
-  llvm::Value* zero = codegen->GetI8Constant(0);
-  llvm::Value* one = codegen->GetI8Constant(1);
-
-  llvm::BasicBlock* entry_block = llvm::BasicBlock::Create(context, "entry", *fn);
-  bool slot_is_nullable = null_indicator_offset_.bit_mask != 0;
-  llvm::BasicBlock* check_slot_null_indicator_block = NULL;
-  if (slot_is_nullable) {
-    check_slot_null_indicator_block =
-        llvm::BasicBlock::Create(context, "check_slot_null", *fn);
-  }
-  llvm::BasicBlock* get_slot_block = llvm::BasicBlock::Create(context, "get_slot", *fn);
-  llvm::BasicBlock* ret_block = llvm::BasicBlock::Create(context, "ret", *fn);
-
-  LlvmBuilder builder(entry_block);
-  // Get the tuple offset addr from the row
-  llvm::Value* cast_row_ptr = builder.CreateBitCast(
-      row_ptr, codegen->ptr_ptr_type(), "cast_row_ptr");
-  llvm::Value* tuple_addr =
-      builder.CreateInBoundsGEP(cast_row_ptr, tuple_offset, "tuple_addr");
-  // Load the tuple*
-  llvm::Value* tuple_ptr = builder.CreateLoad(tuple_addr, "tuple_ptr");
-
-  // Check if tuple* is null only if the tuple is nullable
-  if (tuple_is_nullable_) {
-    llvm::Value* tuple_is_null = builder.CreateIsNull(tuple_ptr, "tuple_is_null");
-    // Check slot is null only if the null indicator bit is set
-    if (slot_is_nullable) {
-      builder.CreateCondBr(tuple_is_null, ret_block, check_slot_null_indicator_block);
-    } else {
-      builder.CreateCondBr(tuple_is_null, ret_block, get_slot_block);
-    }
-  } else {
-    if (slot_is_nullable) {
-      builder.CreateBr(check_slot_null_indicator_block);
-    } else {
-      builder.CreateBr(get_slot_block);
-    }
-  }
-
-  // Branch for tuple* != NULL.  Need to check if null-indicator is set
-  if (slot_is_nullable) {
-    builder.SetInsertPoint(check_slot_null_indicator_block);
-    llvm::Value* is_slot_null = SlotDescriptor::CodegenIsNull(
-        codegen, &builder, null_indicator_offset_, tuple_ptr);
-    builder.CreateCondBr(is_slot_null, ret_block, get_slot_block);
-  }
-
-  // Branch for slot != NULL
-  builder.SetInsertPoint(get_slot_block);
-  llvm::Value* slot_ptr = builder.CreateInBoundsGEP(tuple_ptr, slot_offset, "slot_addr");
-  llvm::Value* val_ptr = builder.CreateBitCast(slot_ptr,
-      codegen->GetSlotPtrType(type_), "val_ptr");
-  // Depending on the type, load the values we need
-  llvm::Value* val = NULL;
-  llvm::Value* ptr = NULL;
-  llvm::Value* len = NULL;
-  llvm::Value* time_of_day = NULL;
-  llvm::Value* date = NULL;
-  if (type_.IsVarLenStringType() || type_.IsCollectionType()) {
-    llvm::Value* ptr_ptr = builder.CreateStructGEP(NULL, val_ptr, 0, "ptr_ptr");
-    ptr = builder.CreateLoad(ptr_ptr, "ptr");
-    llvm::Value* len_ptr = builder.CreateStructGEP(NULL, val_ptr, 1, "len_ptr");
-    len = builder.CreateLoad(len_ptr, "len");
-  } else if (type_.type == TYPE_CHAR || type_.type == TYPE_FIXED_UDA_INTERMEDIATE) {
-    // ptr and len are the slot and its fixed length.
-    ptr = builder.CreateBitCast(val_ptr, codegen->ptr_type());
-    len = codegen->GetI32Constant(type_.len);
-  } else if (type_.type == TYPE_TIMESTAMP) {
-    llvm::Value* time_of_day_ptr =
-        builder.CreateStructGEP(NULL, val_ptr, 0, "time_of_day_ptr");
-    // Cast boost::posix_time::time_duration to i64
-    llvm::Value* time_of_day_cast =
-        builder.CreateBitCast(time_of_day_ptr, codegen->i64_ptr_type());
-    time_of_day = builder.CreateLoad(time_of_day_cast, "time_of_day");
-    llvm::Value* date_ptr = builder.CreateStructGEP(NULL, val_ptr, 1, "date_ptr");
-    // Cast boost::gregorian::date to i32
-    llvm::Value* date_cast =
-        builder.CreateBitCast(date_ptr, codegen->i32_ptr_type());
-    date = builder.CreateLoad(date_cast, "date");
-  } else {
-    // val_ptr is a native type
-    val = builder.CreateLoad(val_ptr, "val");
-  }
-  builder.CreateBr(ret_block);
-
-  // Return block
-  builder.SetInsertPoint(ret_block);
-  llvm::PHINode* is_null_phi =
-      builder.CreatePHI(codegen->i8_type(), 2, "is_null_phi");
-  if (tuple_is_nullable_) is_null_phi->addIncoming(one, entry_block);
-  if (check_slot_null_indicator_block != NULL) {
-    is_null_phi->addIncoming(one, check_slot_null_indicator_block);
-  }
-  is_null_phi->addIncoming(zero, get_slot_block);
-
-  // Depending on the type, create phi nodes for each value needed to populate the return
-  // *Val. The optimizer does a better job when there is a phi node for each value, rather
-  // than having get_slot_block generate an AnyVal and having a single phi node over that.
-  // TODO: revisit this code, can possibly be simplified
-  if (type_.IsStringType() || type_.type == TYPE_FIXED_UDA_INTERMEDIATE
-      || type_.IsCollectionType()) {
-    DCHECK(ptr != NULL);
-    DCHECK(len != NULL);
-    llvm::PHINode* ptr_phi = builder.CreatePHI(ptr->getType(), 2, "ptr_phi");
-    llvm::Value* null = llvm::Constant::getNullValue(ptr->getType());
-    if (tuple_is_nullable_) {
-      ptr_phi->addIncoming(null, entry_block);
-    }
-    if (check_slot_null_indicator_block != NULL) {
-      ptr_phi->addIncoming(null, check_slot_null_indicator_block);
-    }
-    ptr_phi->addIncoming(ptr, get_slot_block);
-
-    llvm::PHINode* len_phi = builder.CreatePHI(len->getType(), 2, "len_phi");
-    null = llvm::ConstantInt::get(len->getType(), 0);
-    if (tuple_is_nullable_) {
-      len_phi->addIncoming(null, entry_block);
-    }
-    if (check_slot_null_indicator_block != NULL) {
-      len_phi->addIncoming(null, check_slot_null_indicator_block);
-    }
-    len_phi->addIncoming(len, get_slot_block);
-
-    CodegenAnyVal result =
-        CodegenAnyVal::GetNonNullVal(codegen, &builder, type(), "result");
-    result.SetIsNull(is_null_phi);
-    result.SetPtr(ptr_phi);
-    result.SetLen(len_phi);
-    builder.CreateRet(result.GetLoweredValue());
-  } else if (type_.type == TYPE_TIMESTAMP) {
-    DCHECK(time_of_day != NULL);
-    DCHECK(date != NULL);
-    llvm::PHINode* time_of_day_phi =
-        builder.CreatePHI(time_of_day->getType(), 2, "time_of_day_phi");
-    llvm::Value* null = llvm::ConstantInt::get(time_of_day->getType(), 0);
-    if (tuple_is_nullable_) {
-      time_of_day_phi->addIncoming(null, entry_block);
-    }
-    if (check_slot_null_indicator_block != NULL) {
-      time_of_day_phi->addIncoming(null, check_slot_null_indicator_block);
-    }
-    time_of_day_phi->addIncoming(time_of_day, get_slot_block);
-
-    llvm::PHINode* date_phi = builder.CreatePHI(date->getType(), 2, "date_phi");
-    null = llvm::ConstantInt::get(date->getType(), 0);
-    if (tuple_is_nullable_) {
-      date_phi->addIncoming(null, entry_block);
-    }
-    if (check_slot_null_indicator_block != NULL) {
-      date_phi->addIncoming(null, check_slot_null_indicator_block);
-    }
-    date_phi->addIncoming(date, get_slot_block);
-
-    CodegenAnyVal result =
-        CodegenAnyVal::GetNonNullVal(codegen, &builder, type(), "result");
-    result.SetIsNull(is_null_phi);
-    result.SetTimeOfDay(time_of_day_phi);
-    result.SetDate(date_phi);
-    builder.CreateRet(result.GetLoweredValue());
-  } else {
-    DCHECK(val != NULL);
-    llvm::PHINode* val_phi = builder.CreatePHI(val->getType(), 2, "val_phi");
-    llvm::Value* null = llvm::Constant::getNullValue(val->getType());
-    if (tuple_is_nullable_) {
-      val_phi->addIncoming(null, entry_block);
-    }
-    if (check_slot_null_indicator_block != NULL) {
-      val_phi->addIncoming(null, check_slot_null_indicator_block);
-    }
-    val_phi->addIncoming(val, get_slot_block);
-
-    CodegenAnyVal result =
-        CodegenAnyVal::GetNonNullVal(codegen, &builder, type(), "result");
-    result.SetIsNull(is_null_phi);
-    result.SetVal(val_phi);
-    builder.CreateRet(result.GetLoweredValue());
-  }
+  LlvmBuilder builder(codegen->context());
+  CodegenAnyVal result_value = CodegenValue(codegen, &builder, *fn, eval_ptr, row_ptr);
+  builder.CreateRet(result_value.GetLoweredValue());

  *fn = codegen->FinalizeFunction(*fn);
  if (UNLIKELY(*fn == NULL)) return Status(TErrorCode::IR_VERIFY_FAILED, "SlotRef");
@@ -393,6 +231,238 @@ Status SlotRef::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Function**
  return Status::OK();
 }

+// Generates null checking code: null checking may be generated for the tuple and for the
+// slot based on 'tuple_is_nullable' and 'slot_is_nullable'. If any one of the checks
+// returns null, control is transferred to 'next_block_if_null', otherwise to
+// 'next_block_if_not_null.
+void SlotRef::CodegenNullChecking(LlvmCodeGen* codegen, LlvmBuilder* builder,
+    llvm::Function* fn, llvm::BasicBlock* next_block_if_null,
+    llvm::BasicBlock* next_block_if_not_null, llvm::Value* tuple_ptr) {
+  bool slot_is_nullable = null_indicator_offset_.bit_mask != 0;
+  llvm::BasicBlock* const check_tuple_null_block = tuple_is_nullable_ ?
+    llvm::BasicBlock::Create(codegen->context(), "check_tuple_null",
+        fn, /* insert before */ next_block_if_not_null)
+    : nullptr;
+  llvm::BasicBlock* const check_slot_null_block = slot_is_nullable ?
+    llvm::BasicBlock::Create(codegen->context(), "check_slot_null",
+        fn, /* insert before */ next_block_if_not_null)
+    : nullptr;
+
+  // Check if tuple* is null only if the tuple is nullable
+  if (tuple_is_nullable_) {
+    builder->CreateBr(check_tuple_null_block);
+    builder->SetInsertPoint(check_tuple_null_block);
+    llvm::Value* tuple_is_null = builder->CreateIsNull(tuple_ptr, "tuple_is_null");
+    // Check slot is null only if the null indicator bit is set
+    if (slot_is_nullable) {
+      builder->CreateCondBr(tuple_is_null, next_block_if_null, check_slot_null_block);
+    } else {
+      builder->CreateCondBr(tuple_is_null, next_block_if_null, next_block_if_not_null);
+    }
+  } else {
+    if (slot_is_nullable) {
+      builder->CreateBr(check_slot_null_block);
+    } else {
+      builder->CreateBr(next_block_if_not_null);
+    }
+  }
+
+  // Branch for tuple* != NULL.  Need to check if null-indicator is set
+  if (slot_is_nullable) {
+    builder->SetInsertPoint(check_slot_null_block);
+    llvm::Value* is_slot_null = SlotDescriptor::CodegenIsNull(
+        codegen, builder, null_indicator_offset_, tuple_ptr);
+    builder->CreateCondBr(is_slot_null, next_block_if_null, next_block_if_not_null);
+  }
+}
+
+// Codegens reading the members of a StringVal or a CollectionVal from the slot pointed to
+// by 'val_ptr'. Returns the resulting values in '*ptr' and '*len'.
+void CodegenReadingStringOrCollectionVal(LlvmCodeGen* codegen, LlvmBuilder* builder,
+    const ColumnType& type, llvm::Value* val_ptr, llvm::Value** ptr, llvm::Value** len) {
+  if (type.IsVarLenStringType() || type.IsCollectionType()) {
+    llvm::Value* ptr_ptr = builder->CreateStructGEP(nullptr, val_ptr, 0, "ptr_ptr");
+    *ptr = builder->CreateLoad(ptr_ptr, "ptr");
+    llvm::Value* len_ptr = builder->CreateStructGEP(nullptr, val_ptr, 1, "len_ptr");
+    *len = builder->CreateLoad(len_ptr, "len");
+  } else {
+    DCHECK(type.type == TYPE_CHAR || type.type == TYPE_FIXED_UDA_INTERMEDIATE);
+    // ptr and len are the slot and its fixed length.
+    *ptr = builder->CreateBitCast(val_ptr, codegen->ptr_type());
+    *len = codegen->GetI32Constant(type.len);
+  }
+}
+
+void CodegenReadingTimestamp(LlvmCodeGen* codegen, LlvmBuilder* builder,
+    llvm::Value* val_ptr, llvm::Value** time_of_day, llvm::Value** date) {
+  llvm::Value* time_of_day_ptr =
+    builder->CreateStructGEP(nullptr, val_ptr, 0, "time_of_day_ptr");
+  // Cast boost::posix_time::time_duration to i64
+  llvm::Value* time_of_day_cast =
+    builder->CreateBitCast(time_of_day_ptr, codegen->i64_ptr_type());
+  *time_of_day = builder->CreateLoad(time_of_day_cast, "time_of_day");
+  llvm::Value* date_ptr = builder->CreateStructGEP(nullptr, val_ptr, 1, "date_ptr");
+  // Cast boost::gregorian::date to i32
+  llvm::Value* date_cast =
+    builder->CreateBitCast(date_ptr, codegen->i32_ptr_type());
+  *date = builder->CreateLoad(date_cast, "date");
+}
+
+CodegenAnyValReadWriteInfo SlotRef::CodegenReadSlot(LlvmCodeGen* codegen,
+    LlvmBuilder* builder,
+    llvm::Value* eval_ptr, llvm::Value* row_ptr, llvm::BasicBlock* entry_block,
+    llvm::BasicBlock* null_block, llvm::BasicBlock* read_slot_block,
+    llvm::Value* tuple_ptr, llvm::Value* slot_offset) {
+  builder->SetInsertPoint(read_slot_block);
+  llvm::Value* slot_ptr = builder->CreateInBoundsGEP(tuple_ptr, slot_offset, "slot_addr");
+
+  // This is not used for structs because the child expressions have their own slot
+  // pointers and we only read through those, not through the struct slot pointer.
+  llvm::Value* val_ptr = type_.IsStructType() ? nullptr : builder->CreateBitCast(slot_ptr,
+      codegen->GetSlotPtrType(type_), "val_ptr");
+
+  // For structs the code that reads the value consists of multiple basic blocks, so the
+  // block that should branch to 'produce_value_block' is not 'read_slot_block'. This
+  // variable is set to the appropriate block.
+  llvm::BasicBlock* non_null_incoming_block = read_slot_block;
+
+  CodegenAnyValReadWriteInfo res(codegen, builder, type_);
+
+  // Depending on the type, create phi nodes for each value needed to populate the return
+  // *Val. The optimizer does a better job when there is a phi node for each value, rather
+  // than having read_slot_block generate an AnyVal and having a single phi node over
+  // that.
+  if (type_.IsStructType()) {
+    llvm::Function* fn = builder->GetInsertBlock()->getParent();
+    std::size_t num_children = children_.size();
+    for (std::size_t i = 0; i < num_children; ++i) {
+      ScalarExpr* child_expr = children_[i];
+      DCHECK(child_expr != nullptr);
+      SlotRef* child_slot_ref = dynamic_cast<SlotRef*>(child_expr);
+      DCHECK(child_slot_ref != nullptr);
+
+      llvm::Function* const get_child_eval_fn =
+        codegen->GetFunction(IRFunction::GET_CHILD_EVALUATOR, false);
+
+      llvm::BasicBlock* child_entry_block = llvm::BasicBlock::Create(codegen->context(),
+          "child_entry", fn);
+      builder->SetInsertPoint(child_entry_block);
+      llvm::Value* child_eval = builder->CreateCall(get_child_eval_fn,
+          {eval_ptr, codegen->GetI32Constant(i)}, "child_eval");
+      CodegenAnyValReadWriteInfo codegen_anyval_info =
+          child_slot_ref->CreateCodegenAnyValReadWriteInfo(codegen, builder, fn,
+           child_eval, row_ptr, child_entry_block);
+     res.children().push_back(codegen_anyval_info);
+    }
+  } else if (type_.IsStringType() || type_.type == TYPE_FIXED_UDA_INTERMEDIATE
+      || type_.IsCollectionType()) {
+    llvm::Value* ptr;
+    llvm::Value* len;
+    CodegenReadingStringOrCollectionVal(codegen, builder, type_, val_ptr,
+        &ptr, &len);
+    DCHECK(ptr != nullptr);
+    DCHECK(len != nullptr);
+    res.SetPtrAndLen(ptr, len);
+  } else if (type_.type == TYPE_TIMESTAMP) {
+    llvm::Value* time_of_day;
+    llvm::Value* date;
+    CodegenReadingTimestamp(codegen, builder, val_ptr, &time_of_day, &date);
+    DCHECK(time_of_day != nullptr);
+    DCHECK(date != nullptr);
+    res.SetTimeAndDate(time_of_day, date);
+  } else {
+    res.SetSimpleVal(builder->CreateLoad(val_ptr, "val"));
+  }
+
+  res.SetFnCtxIdx(fn_ctx_idx_);
+  res.SetEval(eval_ptr);
+  res.SetBlocks(entry_block, null_block, non_null_incoming_block);
+  return res;
+}
+
+/// Generates code to read the value corresponding to this 'SlotRef' into a *Val. Returns
+/// a 'CodegenAnyVal' containing the read value. No return statement is generated in the
+/// code, so this function can be called recursively for structs without putting function
+/// calls in the generated code.
+///
+/// The generated code can be conceptually divided into the following parts:
+/// 1. Find and load the tuple address (the entry block)
+/// 2. Null checking (blocks 'check_tuple_null', 'check_slot_null' and 'null_block')
+/// 3. Reading the actual non-null value from its slot (the 'read_slot' block)
+/// 4. Produce a final *Val value, whether it is null or not (the 'produce_value' block)
+///
+/// Number 1. is straightforward.
+///
+/// Null checking may involve checking the tuple, checking the null indicators for the
+/// slot, both or none, depending on what is nullable. If any null check returns true,
+/// control branches to 'null_block'. This basic block will be used in PHI nodes as an
+/// incoming branch indicating that the *Val is null.
+///
+/// If all null checks return false, control is transferred to the 'read_slot' block. Here
+/// we actually read the value from the slot. This may involve several loads yielding
+/// different parts of the value, for example the pointer and the length of a StringValue.
+/// If the value is a struct, we recurse to / read the struct members - this produces
+/// additional basic blocks.
+///
+/// The final block, 'produce_value' is used to create the final *Val. This block unites
+/// the null and the non-null paths. It has PHI nodes for each value part (ptr, len etc.)
+/// from 'null_block' and 'read_slot' (or one of its descendants in case of structs). If
+/// control reaches here from 'null_block', the *Val is set to null, otherwise to the
+/// value read from the slot.
+CodegenAnyVal SlotRef::CodegenValue(LlvmCodeGen* codegen, LlvmBuilder* builder,
+    llvm::Function* fn, llvm::Value* eval_ptr, llvm::Value* row_ptr,
+    llvm::BasicBlock* entry_block) {
+  CodegenAnyValReadWriteInfo read_write_info = CreateCodegenAnyValReadWriteInfo(codegen,
+      builder, fn, eval_ptr, row_ptr, entry_block);
+  return CodegenAnyVal::CreateFromReadWriteInfo(read_write_info);
+}
+
+CodegenAnyValReadWriteInfo SlotRef::CreateCodegenAnyValReadWriteInfo(
+    LlvmCodeGen* codegen, LlvmBuilder* builder, llvm::Function* fn,
+    llvm::Value* eval_ptr, llvm::Value* row_ptr, llvm::BasicBlock* entry_block) {
+  llvm::IRBuilderBase::InsertPoint ip = builder->saveIP();
+
+  llvm::Value* tuple_offset = codegen->GetI32Constant(tuple_idx_);
+  llvm::Value* slot_offset = codegen->GetI32Constant(slot_offset_);
+
+  llvm::LLVMContext& context = codegen->context();
+
+  // Create the necessary basic blocks.
+  if (entry_block == nullptr) {
+    entry_block = llvm::BasicBlock::Create(context, "entry", fn);
+  }
+
+  llvm::BasicBlock* read_slot_block = llvm::BasicBlock::Create(context, "read_slot", fn);
+
+  // We use this block to collect all code paths that lead to the result being null. It
+  // does nothing but branches unconditionally to 'produce_value_block' and the PHI nodes
+  // there can add this block as an incoming branch for the null case; it is simpler and
+  // more readable than having many predeccesor blocks for the null case in
+  // 'produce_value_block'.
+  llvm::BasicBlock* null_block = llvm::BasicBlock::Create(context, "null_block", fn);
+
+  /// Start generating instructions.
+  //### Part 1: find the tuple address.
+  builder->SetInsertPoint(entry_block);
+  // Get the tuple offset addr from the row
+  llvm::Value* cast_row_ptr = builder->CreateBitCast(
+      row_ptr, codegen->ptr_ptr_type(), "cast_row_ptr");
+  llvm::Value* tuple_ptr_addr =
+      builder->CreateInBoundsGEP(cast_row_ptr, tuple_offset, "tuple_ptr_addr");
+  // Load the tuple*
+  llvm::Value* tuple_ptr = builder->CreateLoad(tuple_ptr_addr, "tuple_ptr");
+
+  //### Part 2: null checking
+  CodegenNullChecking(codegen, builder, fn, null_block, read_slot_block, tuple_ptr);
+
+  //### Part 3: read non-null value.
+  CodegenAnyValReadWriteInfo res = CodegenReadSlot(codegen, builder, eval_ptr,
+      row_ptr, entry_block, null_block, read_slot_block, tuple_ptr, slot_offset);
+
+  builder->restoreIP(ip);
+  return res;
+}
+
 #define SLOT_REF_GET_FUNCTION(type_lit, type_val, type_c) \
    type_val SlotRef::Get##type_val##Interpreted( \
        ScalarExprEvaluator* eval, const TupleRow* row) const { \
--- a/be/src/exprs/slot-ref.h
+++ b/be/src/exprs/slot-ref.h
@@ -18,6 +18,7 @@
 #ifndef IMPALA_EXPRS_SLOTREF_H
 #define IMPALA_EXPRS_SLOTREF_H

+#include "codegen/codegen-anyval.h"
 #include "exprs/scalar-expr.h"
 #include "runtime/descriptors.h"

@@ -83,12 +84,32 @@ class SlotRef : public ScalarExpr {
      ScalarExprEvaluator*, const TupleRow*) const override;

 private:
+  CodegenAnyVal CodegenValue(LlvmCodeGen* codegen, LlvmBuilder* builder,
+      llvm::Function* fn, llvm::Value* eval_ptr, llvm::Value* row_ptr,
+      llvm::BasicBlock* entry_block = nullptr);
+  void CodegenNullChecking(LlvmCodeGen* codegen, LlvmBuilder* builder, llvm::Function* fn,
+      llvm::BasicBlock* next_block_if_null, llvm::BasicBlock* next_block_if_not_null,
+      llvm::Value* tuple_ptr);
+
  int tuple_idx_;  // within row
  int slot_offset_;  // within tuple
  NullIndicatorOffset null_indicator_offset_;  // within tuple
  const SlotId slot_id_;
  bool tuple_is_nullable_; // true if the tuple is nullable.
  const SlotDescriptor* slot_desc_ = nullptr;
+
+  // After the function returns, the instruction point of the LlvmBuilder will be reset to
+  // where it was before the call.
+  CodegenAnyValReadWriteInfo CreateCodegenAnyValReadWriteInfo(LlvmCodeGen* codegen,
+      LlvmBuilder* builder,
+      llvm::Function* fn,
+      llvm::Value* eval_ptr,
+      llvm::Value* row_ptr,
+      llvm::BasicBlock* entry_block = nullptr);
+  CodegenAnyValReadWriteInfo CodegenReadSlot(LlvmCodeGen* codegen, LlvmBuilder* builder,
+      llvm::Value* eval_ptr, llvm::Value* row_ptr, llvm::BasicBlock* entry_block,
+      llvm::BasicBlock* null_block, llvm::BasicBlock* read_slot_block,
+      llvm::Value* tuple_ptr, llvm::Value* slot_offset);
 };

 }
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -25,6 +25,7 @@
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 #include <llvm/IR/DataLayout.h>

+#include "codegen/codegen-anyval.h"
 #include "codegen/llvm-codegen.h"
 #include "common/object-pool.h"
 #include "common/status.h"
@@ -681,6 +682,70 @@ void DescriptorTbl::GetTupleDescs(vector<TupleDescriptor*>* descs) const {
  }
 }

+void SlotDescriptor::CodegenLoadAnyVal(CodegenAnyVal* any_val, llvm::Value* raw_val_ptr) {
+  DCHECK(raw_val_ptr->getType()->isPointerTy());
+  llvm::Type* raw_val_type = raw_val_ptr->getType()->getPointerElementType();
+  LlvmCodeGen* const codegen = any_val->codegen();
+  LlvmBuilder* const builder = any_val->builder();
+  const ColumnType& type = any_val->type();
+  DCHECK_EQ(raw_val_type, codegen->GetSlotType(type))
+      << endl
+      << LlvmCodeGen::Print(raw_val_ptr) << endl
+      << type << " => " << LlvmCodeGen::Print(
+          codegen->GetSlotType(type));
+  switch (type.type) {
+    case TYPE_STRING:
+    case TYPE_VARCHAR: {
+      // Convert StringValue to StringVal
+      llvm::Value* string_value = builder->CreateLoad(raw_val_ptr, "string_value");
+      any_val->SetPtr(builder->CreateExtractValue(string_value, 0, "ptr"));
+      any_val->SetLen(builder->CreateExtractValue(string_value, 1, "len"));
+      break;
+    }
+    case TYPE_CHAR:
+    case TYPE_FIXED_UDA_INTERMEDIATE: {
+      // Convert fixed-size slot to StringVal.
+      any_val->SetPtr(builder->CreateBitCast(raw_val_ptr, codegen->ptr_type()));
+      any_val->SetLen(codegen->GetI32Constant(type.len));
+      break;
+    }
+    case TYPE_TIMESTAMP: {
+      // Convert TimestampValue to TimestampVal
+      // TimestampValue has type
+      //   { boost::posix_time::time_duration, boost::gregorian::date }
+      // = { {{{i64}}}, {{i32}} }
+
+      llvm::Value* ts_value = builder->CreateLoad(raw_val_ptr, "ts_value");
+      // Extract time_of_day i64 from boost::posix_time::time_duration.
+      uint32_t time_of_day_idxs[] = {0, 0, 0, 0};
+      llvm::Value* time_of_day =
+          builder->CreateExtractValue(ts_value, time_of_day_idxs, "time_of_day");
+      DCHECK(time_of_day->getType()->isIntegerTy(64));
+      any_val->SetTimeOfDay(time_of_day);
+      // Extract i32 from boost::gregorian::date
+      uint32_t date_idxs[] = {1, 0, 0};
+      llvm::Value* date = builder->CreateExtractValue(ts_value, date_idxs, "date");
+      DCHECK(date->getType()->isIntegerTy(32));
+      any_val->SetDate(date);
+      break;
+    }
+    case TYPE_BOOLEAN:
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_DECIMAL:
+    case TYPE_DATE:
+      any_val->SetVal(builder->CreateLoad(raw_val_ptr, "raw_val"));
+      break;
+    default:
+      DCHECK(false) << "NYI: " << type.DebugString();
+      break;
+  }
+}
+
 llvm::Value* SlotDescriptor::CodegenIsNull(
    LlvmCodeGen* codegen, LlvmBuilder* builder, llvm::Value* tuple) const {
  return CodegenIsNull(codegen, builder, null_indicator_offset_, tuple);
@@ -744,6 +809,78 @@ void SlotDescriptor::CodegenSetNullIndicator(
  builder->CreateStore(result, null_byte_ptr);
 }

+// Example IR for materializing a string column with non-NULL 'pool'. Includes the part
+// that is generated by CodegenAnyVal::ToReadWriteInfo().
+//
+// Produced for the following query:
+//   select string_col from functional_orc_def.alltypes order by string_col limit 2;
+//
+//   ; [insert point starts here]
+//   br label %entry1
+//
+// entry1:                                           ; preds = %entry
+//   %1 = extractvalue { i64, i8* } %src, 0
+//   %is_null = trunc i64 %1 to i1
+//   br i1 %is_null, label %null, label %non_null
+//
+// non_null:                                         ; preds = %entry1
+//   %src2 = extractvalue { i64, i8* } %src, 1
+//   %2 = extractvalue { i64, i8* } %src, 0
+//   %3 = ashr i64 %2, 32
+//   %4 = trunc i64 %3 to i32
+//   %slot = getelementptr inbounds <{ %"struct.impala::StringValue", i8 }>,
+//                                  <{ %"struct.impala::StringValue", i8 }>* %tuple,
+//                                  i32 0,
+//                                  i32 0
+//   %5 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %4, 1
+//   %6 = sext i32 %4 to i64
+//   %new_ptr = call i8* @_ZN6impala7MemPool8AllocateILb0EEEPhli(
+//       %"class.impala::MemPool"* %pool,
+//       i64 %6,
+//       i32 8)
+//   call void @llvm.memcpy.p0i8.p0i8.i32(
+//       i8* %new_ptr,
+//       i8* %src2,
+//       i32 %4,
+//       i32 0,
+//       i1 false)
+//   %7 = insertvalue %"struct.impala::StringValue" %5, i8* %new_ptr, 0
+//   store %"struct.impala::StringValue" %7, %"struct.impala::StringValue"* %slot
+//   br label %end_write
+//
+// null:                                             ; preds = %entry1
+//   %8 = bitcast <{ %"struct.impala::StringValue", i8 }>* %tuple to i8*
+//   %null_byte_ptr = getelementptr inbounds i8, i8* %8, i32 12
+//   %null_byte = load i8, i8* %null_byte_ptr
+//   %null_bit_set = or i8 %null_byte, 1
+//   store i8 %null_bit_set, i8* %null_byte_ptr
+//   br label %end_write
+//
+// end_write:                                        ; preds = %null, %non_null
+//   ; [insert point ends here]
+void SlotDescriptor::CodegenWriteToSlot(const CodegenAnyValReadWriteInfo& read_write_info,
+    llvm::Value* tuple_llvm_struct_ptr, llvm::Value* pool_val,
+    llvm::BasicBlock* insert_before) const {
+  DCHECK(tuple_llvm_struct_ptr->getType()->isPointerTy());
+  DCHECK(tuple_llvm_struct_ptr->getType()->getPointerElementType()->isStructTy());
+  LlvmBuilder* builder = read_write_info.builder();
+  llvm::LLVMContext& context = read_write_info.codegen()->context();
+  llvm::Function* fn = builder->GetInsertBlock()->getParent();
+
+  // Create new block that will come after conditional blocks if necessary
+  if (insert_before == nullptr) {
+    insert_before = llvm::BasicBlock::Create(context, "end_write", fn);
+  }
+
+  read_write_info.entry_block().BranchTo(builder);
+
+  CodegenWriteToSlotHelper(read_write_info, tuple_llvm_struct_ptr,
+      tuple_llvm_struct_ptr, pool_val, NonWritableBasicBlock(insert_before));
+
+  // Leave builder_ after conditional blocks
+  builder->SetInsertPoint(insert_before);
+}
+
 llvm::Value* SlotDescriptor::CodegenGetNullByte(
    LlvmCodeGen* codegen, LlvmBuilder* builder,
    const NullIndicatorOffset& null_indicator_offset, llvm::Value* tuple,
@@ -757,6 +894,256 @@ llvm::Value* SlotDescriptor::CodegenGetNullByte(
  return builder->CreateLoad(byte_ptr, "null_byte");
 }

+// TODO: Maybe separate null handling and non-null-handling so that it is easier to insert
+// a different null handling mechanism (for example in hash tables when structs are
+// supported there).
+void SlotDescriptor::CodegenWriteToSlotHelper(
+    const CodegenAnyValReadWriteInfo& read_write_info,
+    llvm::Value* main_tuple_llvm_struct_ptr, llvm::Value* tuple_llvm_struct_ptr,
+    llvm::Value* pool_val,
+    NonWritableBasicBlock insert_before) const {
+  DCHECK(main_tuple_llvm_struct_ptr->getType()->isPointerTy());
+  DCHECK(main_tuple_llvm_struct_ptr->getType()->getPointerElementType()->isStructTy());
+  DCHECK(tuple_llvm_struct_ptr->getType()->isPointerTy());
+  DCHECK(tuple_llvm_struct_ptr->getType()->getPointerElementType()->isStructTy());
+  LlvmBuilder* builder = read_write_info.builder();
+
+  // Non-null block: write slot
+  builder->SetInsertPoint(read_write_info.non_null_block());
+  llvm::Value* slot = builder->CreateStructGEP(nullptr, tuple_llvm_struct_ptr,
+      llvm_field_idx(), "slot");
+  if (read_write_info.type().IsStructType()) {
+    CodegenStoreStructToNativePtr(read_write_info, main_tuple_llvm_struct_ptr,
+        slot, pool_val, insert_before);
+  } else {
+    CodegenStoreNonNullAnyVal(read_write_info, slot, pool_val);
+
+    // We only need this branch if we are not a struct, because for structs, the last leaf
+    // (non-struct) field will add this branch.
+    insert_before.BranchTo(builder);
+  }
+
+  // Null block: set null bit
+  builder->SetInsertPoint(read_write_info.null_block());
+  CodegenSetToNull(read_write_info, main_tuple_llvm_struct_ptr);
+  insert_before.BranchTo(builder);
+}
+
+void SlotDescriptor::CodegenStoreStructToNativePtr(
+    const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* main_tuple_ptr,
+    llvm::Value* struct_slot_ptr, llvm::Value* pool_val,
+    NonWritableBasicBlock insert_before) const {
+  DCHECK(type_.IsStructType());
+  DCHECK(children_tuple_descriptor_ != nullptr);
+  DCHECK(read_write_info.type().IsStructType());
+  DCHECK(main_tuple_ptr->getType()->isPointerTy());
+  DCHECK(main_tuple_ptr->getType()->getPointerElementType()->isStructTy());
+  DCHECK(struct_slot_ptr->getType()->isPointerTy());
+  DCHECK(struct_slot_ptr->getType()->getPointerElementType()->isStructTy());
+
+  LlvmBuilder* builder = read_write_info.builder();
+  const std::vector<SlotDescriptor*>& slots = children_tuple_descriptor_->slots();
+  DCHECK_GE(slots.size(), 1);
+  DCHECK_EQ(slots.size(), read_write_info.children().size());
+
+  read_write_info.children()[0].entry_block().BranchTo(builder);
+  for (int i = 0; i < slots.size(); ++i) {
+    const SlotDescriptor* const child_slot_desc = slots[i];
+    const CodegenAnyValReadWriteInfo& child_read_write_info =
+        read_write_info.children()[i];
+
+    NonWritableBasicBlock next_block = i == slots.size() - 1
+        ? insert_before : read_write_info.children()[i+1].entry_block();
+    child_slot_desc->CodegenWriteToSlotHelper(child_read_write_info, main_tuple_ptr,
+        struct_slot_ptr, pool_val, next_block);
+  }
+}
+
+// Create a 'CodegenAnyValReadWriteInfo' but without creating basic blocks for null
+// handling as this function should only be called if we assume that the value is not
+// null.
+CodegenAnyValReadWriteInfo CodegenAnyValToReadWriteInfo(CodegenAnyVal& any_val,
+    llvm::Value* pool_val) {
+  CodegenAnyValReadWriteInfo rwi(any_val.codegen(), any_val.builder(), any_val.type());
+
+  switch (rwi.type().type) {
+    case TYPE_STRING:
+    case TYPE_VARCHAR:
+    case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal.
+    case TYPE_MAP: { // CollectionVal has same memory layout as StringVal.
+      rwi.SetPtrAndLen(any_val.GetPtr(), any_val.GetLen());
+      break;
+    }
+    case TYPE_CHAR:
+      rwi.SetPtrAndLen(any_val.GetPtr(), rwi.codegen()->GetI32Constant(rwi.type().len));
+      break;
+    case TYPE_FIXED_UDA_INTERMEDIATE:
+      DCHECK(false) << "FIXED_UDA_INTERMEDIATE does not need to be copied: the "
+                    << "StringVal must be set up to point to the output slot";
+      break;
+    case TYPE_TIMESTAMP: {
+      rwi.SetTimeAndDate(any_val.GetTimeOfDay(), any_val.GetDate());
+      break;
+    }
+    case TYPE_BOOLEAN:
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_DECIMAL:
+    case TYPE_DATE:
+      // The representations of the types match - just store the value.
+      rwi.SetSimpleVal(any_val.GetVal());
+      break;
+    case TYPE_STRUCT:
+      DCHECK(false) << "Invalid type for this function. "
+                    << "Call 'StoreStructToNativePtr()' instead.";
+    default:
+      DCHECK(false) << "NYI: " << rwi.type().DebugString();
+      break;
+  }
+
+  return rwi;
+}
+
+void SlotDescriptor::CodegenStoreNonNullAnyVal(CodegenAnyVal& any_val,
+      llvm::Value* raw_val_ptr, llvm::Value* pool_val) {
+  CodegenAnyValReadWriteInfo rwi = CodegenAnyValToReadWriteInfo(any_val, pool_val);
+  CodegenStoreNonNullAnyVal(rwi, raw_val_ptr, pool_val);
+}
+
+void SlotDescriptor::CodegenStoreNonNullAnyVal(
+    const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* raw_val_ptr,
+    llvm::Value* pool_val) {
+  LlvmBuilder* builder = read_write_info.builder();
+  const ColumnType& type = read_write_info.type();
+  switch (type.type) {
+    case TYPE_STRING:
+    case TYPE_VARCHAR:
+    case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal.
+    case TYPE_MAP: { // CollectionVal has same memory layout as StringVal.
+      CodegenWriteStringOrCollectionToSlot(read_write_info, raw_val_ptr, pool_val);
+      break;
+    }
+    case TYPE_CHAR:
+      read_write_info.codegen()->CodegenMemcpy(
+          builder, raw_val_ptr, read_write_info.GetPtrAndLen().ptr, type.len);
+      break;
+    case TYPE_FIXED_UDA_INTERMEDIATE:
+      DCHECK(false) << "FIXED_UDA_INTERMEDIATE does not need to be copied: the "
+                    << "StringVal must be set up to point to the output slot";
+      break;
+    case TYPE_TIMESTAMP: {
+      llvm::Value* timestamp_value = CodegenToTimestampValue(read_write_info);
+      builder->CreateStore(timestamp_value, raw_val_ptr);
+      break;
+    }
+    case TYPE_BOOLEAN: {
+      llvm::Value* bool_as_i1 = builder->CreateTrunc(
+          read_write_info.GetSimpleVal(), builder->getInt1Ty(), "bool_as_i1");
+      builder->CreateStore(bool_as_i1, raw_val_ptr);
+      break;
+    }
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_DECIMAL:
+    case TYPE_DATE:
+      // The representations of the types match - just store the value.
+      builder->CreateStore(read_write_info.GetSimpleVal(), raw_val_ptr);
+      break;
+    case TYPE_STRUCT:
+      DCHECK(false) << "Invalid type for this function. "
+                    << "Call 'StoreStructToNativePtr()' instead.";
+    default:
+      DCHECK(false) << "NYI: " << type.DebugString();
+      break;
+  }
+}
+
+llvm::Value* SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(
+      const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* pool_val) {
+  LlvmCodeGen* codegen = read_write_info.codegen();
+  llvm::Value* native_ptr = codegen->CreateEntryBlockAlloca(*read_write_info.builder(),
+      codegen->GetSlotType(read_write_info.type()));
+  SlotDescriptor::CodegenStoreNonNullAnyVal(read_write_info, native_ptr, pool_val);
+  return native_ptr;
+}
+
+llvm::Value* SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(
+      CodegenAnyVal& any_val, llvm::Value* pool_val) {
+  CodegenAnyValReadWriteInfo rwi = CodegenAnyValToReadWriteInfo(any_val, pool_val);
+  return CodegenStoreNonNullAnyValToNewAlloca(rwi, pool_val);
+}
+
+void SlotDescriptor::CodegenSetToNull(const CodegenAnyValReadWriteInfo& read_write_info,
+    llvm::Value* tuple) const {
+  LlvmCodeGen* codegen = read_write_info.codegen();
+  LlvmBuilder* builder = read_write_info.builder();
+  CodegenSetNullIndicator(
+      codegen, builder, tuple, codegen->true_value());
+  if (type_.IsStructType()) {
+    DCHECK(children_tuple_descriptor_ != nullptr);
+    for (SlotDescriptor* child_slot_desc : children_tuple_descriptor_->slots()) {
+      child_slot_desc->CodegenSetToNull(read_write_info, tuple);
+    }
+  }
+}
+
+void SlotDescriptor::CodegenWriteStringOrCollectionToSlot(
+    const CodegenAnyValReadWriteInfo& read_write_info,
+    llvm::Value* slot_ptr, llvm::Value* pool_val) {
+  LlvmCodeGen* codegen = read_write_info.codegen();
+  LlvmBuilder* builder = read_write_info.builder();
+  const ColumnType& type = read_write_info.type();
+  DCHECK(type.IsStringType() || type.IsCollectionType());
+
+  // Convert to StringValue/CollectionValue
+  llvm::Type* raw_type = codegen->GetSlotType(type);
+  llvm::Value* str_or_coll_value = llvm::Constant::getNullValue(raw_type);
+  str_or_coll_value = builder->CreateInsertValue(
+      str_or_coll_value, read_write_info.GetPtrAndLen().len, 1);
+  if (pool_val != nullptr) {
+    // Allocate a 'new_ptr' from 'pool_val' and copy the data from
+    // 'read_write_info->ptr'
+    llvm::Value* new_ptr = codegen->CodegenMemPoolAllocate(
+        builder, pool_val, read_write_info.GetPtrAndLen().len, "new_ptr");
+    codegen->CodegenMemcpy(builder, new_ptr, read_write_info.GetPtrAndLen().ptr,
+        read_write_info.GetPtrAndLen().len);
+    str_or_coll_value = builder->CreateInsertValue(str_or_coll_value, new_ptr, 0);
+  } else {
+    str_or_coll_value = builder->CreateInsertValue(
+        str_or_coll_value, read_write_info.GetPtrAndLen().ptr, 0);
+  }
+  builder->CreateStore(str_or_coll_value, slot_ptr);
+}
+
+llvm::Value* SlotDescriptor::CodegenToTimestampValue(
+    const CodegenAnyValReadWriteInfo& read_write_info) {
+  const ColumnType& type = read_write_info.type();
+  DCHECK_EQ(type.type, TYPE_TIMESTAMP);
+  // Convert TimestampVal to TimestampValue
+  // TimestampValue has type
+  //   { boost::posix_time::time_duration, boost::gregorian::date }
+  // = { {{{i64}}}, {{i32}} }
+  llvm::Type* raw_type = read_write_info.codegen()->GetSlotType(type);
+  llvm::Value* timestamp_value = llvm::Constant::getNullValue(raw_type);
+  uint32_t time_of_day_idxs[] = {0, 0, 0, 0};
+
+  LlvmBuilder* builder = read_write_info.builder();
+  timestamp_value = builder->CreateInsertValue(
+      timestamp_value, read_write_info.GetTimeAndDate().time_of_day, time_of_day_idxs);
+  uint32_t date_idxs[] = {1, 0, 0};
+  timestamp_value = builder->CreateInsertValue(
+      timestamp_value, read_write_info.GetTimeAndDate().date, date_idxs);
+  return timestamp_value;
+}
+
 vector<SlotDescriptor*> TupleDescriptor::SlotsOrderedByIdx() const {
  vector<SlotDescriptor*> sorted_slots(slots().size());
  for (SlotDescriptor* slot: slots()) sorted_slots[slot->slot_idx_] = slot;
@@ -768,17 +1155,11 @@ vector<SlotDescriptor*> TupleDescriptor::SlotsOrderedByIdx() const {
 }

 llvm::StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const {
-  // Get slots in the order they will appear in LLVM struct.
-  vector<SlotDescriptor*> sorted_slots = SlotsOrderedByIdx();
-
-  // Add the slot types to the struct description.
-  vector<llvm::Type*> struct_fields;
  int curr_struct_offset = 0;
-  for (SlotDescriptor* slot: sorted_slots) {
-    DCHECK_EQ(curr_struct_offset, slot->tuple_offset());
-    struct_fields.push_back(codegen->GetSlotType(slot->type()));
-    curr_struct_offset = slot->tuple_offset() + slot->slot_size();
-  }
+  auto struct_fields_and_offset = GetLlvmTypesAndOffset(codegen, curr_struct_offset);
+  vector<llvm::Type*> struct_fields = struct_fields_and_offset.first;
+  curr_struct_offset = struct_fields_and_offset.second;
+
  // For each null byte, add a byte to the struct
  for (int i = 0; i < num_null_bytes_; ++i) {
    struct_fields.push_back(codegen->i8_type());
@@ -791,20 +1172,52 @@ llvm::StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const {
        byte_size_ - curr_struct_offset));
  }

+  return CreateLlvmStructTypeFromFieldTypes(codegen, struct_fields, 0);
+}
+
+pair<vector<llvm::Type*>, int> TupleDescriptor::GetLlvmTypesAndOffset(
+    LlvmCodeGen* codegen, int curr_struct_offset) const {
+  // Get slots in the order they will appear in LLVM struct.
+  vector<SlotDescriptor*> sorted_slots = SlotsOrderedByIdx();
+
+  // Add the slot types to the struct description.
+  vector<llvm::Type*> struct_fields;
+  for (SlotDescriptor* slot: sorted_slots) {
+    DCHECK_EQ(curr_struct_offset, slot->tuple_offset());
+    if (slot->type().IsStructType()) {
+      const int slot_offset = slot->tuple_offset();
+      const TupleDescriptor* children_tuple = slot->children_tuple_descriptor();
+      DCHECK(children_tuple != nullptr);
+      vector<llvm::Type*> child_field_types = children_tuple->GetLlvmTypesAndOffset(
+          codegen, curr_struct_offset).first;
+      llvm::StructType* struct_type = children_tuple->CreateLlvmStructTypeFromFieldTypes(
+          codegen, child_field_types, slot_offset);
+      struct_fields.push_back(struct_type);
+    } else {
+      struct_fields.push_back(codegen->GetSlotType(slot->type()));
+    }
+    curr_struct_offset = slot->tuple_offset() + slot->slot_size();
+  }
+  return make_pair(struct_fields, curr_struct_offset);
+}
+
+llvm::StructType* TupleDescriptor::CreateLlvmStructTypeFromFieldTypes(
+    LlvmCodeGen* codegen, const vector<llvm::Type*>& field_types,
+    int parent_slot_offset) const {
  // Construct the struct type. Use the packed layout although not strictly necessary
  // because the fields are already aligned, so LLVM should not add any padding. The
  // fields are already aligned because we order the slots by descending size and only
  // have powers-of-two slot sizes. Note that STRING and TIMESTAMP slots both occupy
  // 16 bytes although their useful payload is only 12 bytes.
  llvm::StructType* tuple_struct = llvm::StructType::get(codegen->context(),
-      llvm::ArrayRef<llvm::Type*>(struct_fields), true);
+      llvm::ArrayRef<llvm::Type*>(field_types), true);
  const llvm::DataLayout& data_layout = codegen->execution_engine()->getDataLayout();
  const llvm::StructLayout* layout = data_layout.getStructLayout(tuple_struct);
  for (SlotDescriptor* slot: slots()) {
    // Verify that the byte offset in the llvm struct matches the tuple offset
    // computed in the FE.
-    DCHECK_EQ(layout->getElementOffset(slot->llvm_field_idx()), slot->tuple_offset())
-        << id_;
+    DCHECK_EQ(layout->getElementOffset(slot->llvm_field_idx()) + parent_slot_offset,
+        slot->tuple_offset()) << id_;
  }
  return tuple_struct;
 }
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>

+#include "codegen/codegen-anyval-read-write-info.h"
 #include "codegen/impala-ir.h"
 #include "common/global-types.h"
 #include "common/status.h"
@@ -35,11 +36,13 @@
 namespace llvm {
  class Constant;
  class StructType;
+  class Type;
  class Value;
 };

 namespace impala {

+class CodegenAnyVal;
 class LlvmBuilder;
 class LlvmCodeGen;
 class ObjectPool;
@@ -154,6 +157,10 @@ class SlotDescriptor {
  /// of other_desc, but not necessarily ids.
  bool LayoutEquals(const SlotDescriptor& other_desc) const;

+  /// Load "any_val"'s value from 'raw_val_ptr', which must be a pointer to the matching
+  /// native type, e.g. a StringValue or TimestampValue slot in a tuple.
+  static void CodegenLoadAnyVal(CodegenAnyVal* any_val, llvm::Value* raw_val_ptr);
+
  /// Generate LLVM code at the insert position of 'builder' that returns a boolean value
  /// represented as a LLVM i1 indicating whether this slot is null in 'tuple'.
  llvm::Value* CodegenIsNull(
@@ -171,6 +178,38 @@ class SlotDescriptor {
  void CodegenSetNullIndicator(LlvmCodeGen* codegen, LlvmBuilder* builder,
      llvm::Value* tuple, llvm::Value* is_null) const;

+  void CodegenWriteToSlot(const CodegenAnyValReadWriteInfo& read_write_info,
+      llvm::Value* tuple_llvm_struct_ptr,
+      llvm::Value* pool_val, llvm::BasicBlock* insert_before = nullptr) const;
+
+  /// Stores this 'any_val' into a native slot, e.g. a StringValue or TimestampValue.
+  /// This should only be used if 'any_val' is not null.
+  ///
+  /// Not valid to call for FIXED_UDA_INTERMEDIATE: in that case the StringVal must be
+  /// set up to point directly to the underlying slot, e.g. by LoadFromNativePtr().
+  ///
+  /// Not valid to call for structs.
+  ///
+  /// If 'pool_val' is non-NULL, var-len data will be copied into 'pool_val'.
+  /// 'pool_val' has to be of type MemPool*.
+  static void CodegenStoreNonNullAnyVal(CodegenAnyVal& any_val,
+      llvm::Value* raw_val_ptr, llvm::Value* pool_val = nullptr);
+
+  /// Like the above, but takes a 'CodegenAnyValReadWriteInfo' instead of a
+  /// 'CodegenAnyVal'.
+  static void CodegenStoreNonNullAnyVal(const CodegenAnyValReadWriteInfo& read_write_info,
+      llvm::Value* raw_val_ptr, llvm::Value* pool_val = nullptr);
+
+  /// Like 'CodegenStoreNonNullAnyVal' but stores the value into a new alloca()
+  /// allocation. Returns a pointer to the stored value.
+  static llvm::Value* CodegenStoreNonNullAnyValToNewAlloca(
+      CodegenAnyVal& any_val, llvm::Value* pool_val = nullptr);
+
+  /// Like the above, but takes a 'CodegenAnyValReadWriteInfo' instead of a
+  /// 'CodegenAnyVal'.
+  static llvm::Value* CodegenStoreNonNullAnyValToNewAlloca(
+      const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* pool_val = nullptr);
+
  /// Returns true if this slot is a child of a struct slot.
  inline bool IsChildOfStruct() const;
 private:
@@ -206,6 +245,35 @@ class SlotDescriptor {
  static llvm::Value* CodegenGetNullByte(LlvmCodeGen* codegen, LlvmBuilder* builder,
      const NullIndicatorOffset& null_indicator_offset, llvm::Value* tuple,
      llvm::Value** null_byte_ptr);
+
+  void CodegenWriteToSlotHelper(const CodegenAnyValReadWriteInfo& read_write_info,
+      llvm::Value* main_tuple_llvm_struct_ptr, llvm::Value* tuple_llvm_struct_ptr,
+      llvm::Value* pool_val, NonWritableBasicBlock insert_before) const;
+
+  /// Stores a struct value into a native slot. This should only be used if this struct is
+  /// not null.
+  ///
+  /// 'main_tuple_ptr' should be a pointer to the beginning of the whole main tuple, while
+  /// 'struct_slot_ptr' should be the slot where the struct should be stored. The first
+  /// one is needed because the offsets of the struct's children are calculated from the
+  /// beginning of the main tuple.
+  void CodegenStoreStructToNativePtr(const CodegenAnyValReadWriteInfo& read_write_info,
+      llvm::Value* main_tuple_ptr, llvm::Value* struct_slot_ptr, llvm::Value* pool_val,
+      NonWritableBasicBlock insert_before) const;
+
+  // Sets the null indicator bit to 0 - recursively for structs.
+  void CodegenSetToNull(const CodegenAnyValReadWriteInfo& read_write_info,
+      llvm::Value* tuple) const;
+
+  /// Codegens writing a string or a collection to the address pointed to by 'slot_ptr'.
+  /// If 'pool_val' is non-NULL, the data will be copied into 'pool_val'.  'pool_val' has
+  /// to be of type MemPool*.
+  static void CodegenWriteStringOrCollectionToSlot(
+      const CodegenAnyValReadWriteInfo& read_write_info,
+      llvm::Value* slot_ptr, llvm::Value* pool_val);
+
+  static llvm::Value* CodegenToTimestampValue(
+      const CodegenAnyValReadWriteInfo& read_write_info);
 };

 class ColumnDescriptor {
@@ -543,6 +611,11 @@ class TupleDescriptor {

  /// Returns slots in their physical order.
  std::vector<SlotDescriptor*> SlotsOrderedByIdx() const;
+
+  std::pair<std::vector<llvm::Type*>, int> GetLlvmTypesAndOffset(LlvmCodeGen* codegen,
+      int curr_struct_offset) const;
+  llvm::StructType* CreateLlvmStructTypeFromFieldTypes(LlvmCodeGen* codegen,
+      const std::vector<llvm::Type*>& field_types, int parent_slot_offset) const;
 };

 class DescriptorTbl {
--- a/be/src/runtime/fragment-state.cc
+++ b/be/src/runtime/fragment-state.cc
@@ -22,6 +22,7 @@
 #include "codegen/llvm-codegen.h"
 #include "exec/exec-node.h"
 #include "exec/data-sink.h"
+#include "exprs/slot-ref.h"
 #include "runtime/exec-env.h"
 #include "runtime/query-state.h"
 #include "gen-cpp/ImpalaInternalService_types.h"
@@ -168,13 +169,17 @@ Status FragmentState::CreateCodegen() {

 Status FragmentState::CodegenScalarExprs() {
  for (auto& item : scalar_exprs_to_codegen_) {
-    llvm::Function* fn;
-    RETURN_IF_ERROR(item.first->GetCodegendComputeFn(codegen_.get(), item.second, &fn));
+    ScalarExpr* expr = item.first;
+    // We don't need to codegen GetSlotRef() for struct children because the struct takes
+    // care of its members.
+    if (!ScalarExprIsWithinStruct(expr)) {
+      llvm::Function* fn;
+      RETURN_IF_ERROR(expr->GetCodegendComputeFn(codegen_.get(), item.second, &fn));
+    }
  }
  return Status::OK();
 }

-
 std::string FragmentState::GenerateCodegenMsg(
    bool codegen_enabled, const Status& codegen_status, const std::string& extra_label) {
  const string& err_msg = codegen_status.ok() ? "" : codegen_status.msg().msg();
@@ -190,4 +195,12 @@ std::string FragmentState::GenerateCodegenMsg(bool codegen_enabled,
  return str.str();
 }

+bool FragmentState::ScalarExprIsWithinStruct(const ScalarExpr* expr) const {
+  if (!expr->IsSlotRef()) return false;
+  const SlotRef* slot_ref_expr = static_cast<const SlotRef*>(expr);
+  const SlotDescriptor* slot_desc = desc_tbl().GetSlotDescriptor(
+      slot_ref_expr->slot_id());
+  return slot_desc->parent()->isTupleOfStructSlot();
+}
+
 }
--- a/be/src/runtime/fragment-state.h
+++ b/be/src/runtime/fragment-state.h
@@ -179,6 +179,8 @@ class FragmentState {
      const std::string& extra_info = "", const std::string& extra_label = "");

 private:
+  bool ScalarExprIsWithinStruct(const ScalarExpr* expr) const;
+
  ObjectPool obj_pool_;

  /// Reference to the query state object that owns this.
--- a/be/src/runtime/krpc-data-stream-sender.cc
+++ b/be/src/runtime/krpc-data-stream-sender.cc
@@ -803,38 +803,45 @@ Status KrpcDataStreamSender::Open(RuntimeState* state) {
  return ScalarExprEvaluator::Open(partition_expr_evals_, state);
 }

-//
-// An example of generated code with int type.
+// An example of generated code. Used the following query to generate it:
+//   use functional_orc_def;
+//   select a.outer_struct, b.small_struct
+//   from complextypes_nested_structs a
+//       full outer join complextypes_structs b
+//           on b.small_struct.i = a.outer_struct.inner_struct2.i + 19091;
 //
 // define i64 @KrpcDataStreamSenderHashRow(%"class.impala::KrpcDataStreamSender"* %this,
 //                                         %"class.impala::TupleRow"* %row,
-//                                         i64 %seed) #52 {
+//                                         i64 %seed) #49 {
 // entry:
-//   %0 = alloca i32
+//   %0 = alloca i64
 //   %1 = call %"class.impala::ScalarExprEvaluator"*
 //       @_ZN6impala20KrpcDataStreamSender25GetPartitionExprEvaluatorEi(
 //           %"class.impala::KrpcDataStreamSender"* %this, i32 0)
-//   %partition_val = call i64 @GetSlotRef(
-//       %"class.impala::ScalarExprEvaluator"* %1, %"class.impala::TupleRow"* %row)
-//   %is_null = trunc i64 %partition_val to i1
-//   br i1 %is_null, label %is_null_block, label %not_null_block
+//   %partition_val = call { i8, i64 }
+//       @"impala::Operators::Add_BigIntVal_BigIntValWrapper"(
+//           %"class.impala::ScalarExprEvaluator"* %1, %"class.impala::TupleRow"* %row)
+//   br label %entry1
 //
-// is_null_block:                                ; preds = %entry
+// entry1:                                           ; preds = %entry
+//   %2 = extractvalue { i8, i64 } %partition_val, 0
+//   %is_null = trunc i8 %2 to i1
+//   br i1 %is_null, label %null, label %non_null
+//
+// non_null:                                         ; preds = %entry1
+//   %val = extractvalue { i8, i64 } %partition_val, 1
+//   store i64 %val, i64* %0
+//   %native_ptr = bitcast i64* %0 to i8*
 //   br label %hash_val_block
 //
-// not_null_block:                               ; preds = %entry
-//   %2 = ashr i64 %partition_val, 32
-//   %3 = trunc i64 %2 to i32
-//   store i32 %3, i32* %0
-//   %native_ptr = bitcast i32* %0 to i8*
+// null:                                             ; preds = %entry1
 //   br label %hash_val_block
 //
-// hash_val_block:                               ; preds = %not_null_block, %is_null_block
-//   %val_ptr_phi = phi i8* [ %native_ptr, %not_null_block ], [ null, %is_null_block ]
+// hash_val_block:                                   ; preds = %non_null, %null
+//   %native_ptr_phi = phi i8* [ %native_ptr, %non_null ], [ null, %null ]
 //   %hash_val = call i64
 //       @_ZN6impala8RawValue20GetHashValueFastHashEPKvRKNS_10ColumnTypeEm(
-//           i8* %val_ptr_phi, %"struct.impala::ColumnType"* @expr_type_arg,
-//               i64 %seed)
+//           i8* %native_ptr_phi, %"struct.impala::ColumnType"* @expr_type_arg, i64 %seed)
 //   ret i64 %hash_val
 // }
 Status KrpcDataStreamSenderConfig::CodegenHashRow(
@@ -876,33 +883,26 @@ Status KrpcDataStreamSenderConfig::CodegenHashRow(
    CodegenAnyVal partition_val = CodegenAnyVal::CreateCallWrapped(codegen, &builder,
        partition_exprs_[i]->type(), compute_fn, compute_fn_args, "partition_val");

-    llvm::BasicBlock* is_null_block =
-        llvm::BasicBlock::Create(context, "is_null_block", hash_row_fn);
-    llvm::BasicBlock* not_null_block =
-        llvm::BasicBlock::Create(context, "not_null_block", hash_row_fn);
+    CodegenAnyValReadWriteInfo rwi = partition_val.ToReadWriteInfo();
+    rwi.entry_block().BranchTo(&builder);
+
    llvm::BasicBlock* hash_val_block =
        llvm::BasicBlock::Create(context, "hash_val_block", hash_row_fn);

-    // Check if 'partition_val' is NULL
-    llvm::Value* val_is_null = partition_val.GetIsNull();
-    builder.CreateCondBr(val_is_null, is_null_block, not_null_block);
-
    // Set the pointer to NULL in case 'partition_val' evaluates to NULL
-    builder.SetInsertPoint(is_null_block);
+    builder.SetInsertPoint(rwi.null_block());
    llvm::Value* null_ptr = codegen->null_ptr_value();
    builder.CreateBr(hash_val_block);

    // Saves 'partition_val' on the stack and passes a pointer to it to the hash function
-    builder.SetInsertPoint(not_null_block);
-    llvm::Value* native_ptr = partition_val.ToNativePtr();
+    builder.SetInsertPoint(rwi.non_null_block());
+    llvm::Value* native_ptr = SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca(rwi);
    native_ptr = builder.CreatePointerCast(native_ptr, codegen->ptr_type(), "native_ptr");
    builder.CreateBr(hash_val_block);

    // Picks the input value to hash function
    builder.SetInsertPoint(hash_val_block);
-    llvm::PHINode* val_ptr_phi = builder.CreatePHI(codegen->ptr_type(), 2, "val_ptr_phi");
-    val_ptr_phi->addIncoming(native_ptr, not_null_block);
-    val_ptr_phi->addIncoming(null_ptr, is_null_block);
+    llvm::PHINode* val_ptr_phi = rwi.CodegenNullPhiNode(native_ptr, null_ptr);

    // Creates a global constant of the partition expression's ColumnType. It has to be a
    // constant for constant propagation and dead code elimination in 'get_hash_value_fn'
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -279,56 +279,75 @@ char* Tuple::AllocateStrings(const char* err_ctx, RuntimeState* state,
 //
 // Example IR for materializing a string column with non-NULL 'pool':
 //
-// ; Function Attrs: alwaysinline
-// define void @MaterializeExprs(%"class.impala::Tuple"* %opaque_tuple,
-//     %"class.impala::TupleRow"* %row, %"class.impala::TupleDescriptor"* %desc,
-//     %"class.impala::ScalarExprEvaluator"** %materialize_expr_evals,
+// Produced for the following query:
+//   select string_col from functional_orc_def.alltypes order by string_col limit 2;
+//
+// define void @MaterializeExprs(
+//     %"class.impala::Tuple"* %opaque_tuple,
+//     %"class.impala::TupleRow"* %row,
+//     %"class.impala::TupleDescriptor"* %desc,
+//     %"class.impala::ScalarExprEvaluator"** %slot_materialize_exprs,
 //     %"class.impala::MemPool"* %pool,
 //     %"struct.impala::StringValue"** %non_null_string_values,
-//     i32* %total_string_lengths, i32* %num_non_null_string_values) #34 {
+//     i32* %total_string_lengths,
+//     i32* %num_non_null_string_values) #48 {
 // entry:
-//   %tuple = bitcast %"class.impala::Tuple"* %opaque_tuple to
-//       <{ %"struct.impala::StringValue", i8 }>*
+//   %tuple = bitcast %"class.impala::Tuple"* %opaque_tuple
+//       to <{ %"struct.impala::StringValue", i8 }>*
 //   %int8_ptr = bitcast <{ %"struct.impala::StringValue", i8 }>* %tuple to i8*
-//   %null_bytes_ptr = getelementptr inbounds i8, i8* %int8_ptr, i32 16
+//   %null_bytes_ptr = getelementptr inbounds i8, i8* %int8_ptr, i32 12
 //   call void @llvm.memset.p0i8.i64(i8* %null_bytes_ptr, i8 0, i64 1, i32 0, i1 false)
-//   %0 = getelementptr %"class.impala::ExprContext"*,
-//       %"class.impala::ExprContext"** %materialize_expr_ctxs, i32 0
-//   %expr_ctx = load %"class.impala::ExprContext"*, %"class.impala::ExprContext"** %0
-//   %src = call { i64, i8* } @"impala::StringFunctions::UpperWrapper"(
-//        %"class.impala::ExprContext"* %expr_ctx, %"class.impala::TupleRow"* %row)
+//   %0 = getelementptr %"class.impala::ScalarExprEvaluator"*,
+//                      %"class.impala::ScalarExprEvaluator"** %slot_materialize_exprs,
+//                      i32 0
+//   %expr_eval = load %"class.impala::ScalarExprEvaluator"*,
+//                     %"class.impala::ScalarExprEvaluator"** %0
+//   %src = call { i64, i8* } @GetSlotRef.3(
+//       %"class.impala::ScalarExprEvaluator"* %expr_eval,
+//       %"class.impala::TupleRow"* %row)
+//   ; -- generated by CodegenAnyVal::ToReadWriteInfo() and SlotDescriptor::WriteToSlot()
+//   br label %entry1
+//
+// entry1:                                           ; preds = %entry
 //   %1 = extractvalue { i64, i8* } %src, 0
-//   ; ----- generated by CodegenAnyVal::WriteToSlot() ----------------------------------
 //   %is_null = trunc i64 %1 to i1
 //   br i1 %is_null, label %null, label %non_null
 //
-// non_null:                                         ; preds = %entry
-//   %slot = getelementptr inbounds <{ %"struct.impala::StringValue", i8 }>,
-//       <{ %"struct.impala::StringValue", i8 }>* %tuple, i32 0, i32 0
+// non_null:                                         ; preds = %entry1
+//   %src2 = extractvalue { i64, i8* } %src, 1
 //   %2 = extractvalue { i64, i8* } %src, 0
 //   %3 = ashr i64 %2, 32
 //   %4 = trunc i64 %3 to i32
+//   %slot = getelementptr inbounds <{ %"struct.impala::StringValue", i8 }>,
+//                                  <{ %"struct.impala::StringValue", i8 }>* %tuple,
+//                                  i32 0,
+//                                  i32 0
 //   %5 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %4, 1
 //   %6 = sext i32 %4 to i64
 //   %new_ptr = call i8* @_ZN6impala7MemPool8AllocateILb0EEEPhli(
-//       %"class.impala::MemPool"* %pool, i64 %6, i32 8)
-//   %src1 = extractvalue { i64, i8* } %src, 1
+//       %"class.impala::MemPool"* %pool,
+//       i64 %6,
+//       i32 8)
 //   call void @llvm.memcpy.p0i8.p0i8.i32(
-//       i8* %new_ptr, i8* %src1, i32 %4, i32 0, i1 false)
+//       i8* %new_ptr,
+//       i8* %src2,
+//       i32 %4,
+//       i32 0,
+//       i1 false)
 //   %7 = insertvalue %"struct.impala::StringValue" %5, i8* %new_ptr, 0
 //   store %"struct.impala::StringValue" %7, %"struct.impala::StringValue"* %slot
 //   br label %end_write
 //
-// null:                                             ; preds = %entry
+// null:                                             ; preds = %entry1
 //   %8 = bitcast <{ %"struct.impala::StringValue", i8 }>* %tuple to i8*
-//   %null_byte_ptr = getelementptr inbounds i8, i8* %8, i32 16
+//   %null_byte_ptr = getelementptr inbounds i8, i8* %8, i32 12
 //   %null_byte = load i8, i8* %null_byte_ptr
 //   %null_bit_set = or i8 %null_byte, 1
 //   store i8 %null_bit_set, i8* %null_byte_ptr
 //   br label %end_write
 //
 // end_write:                                        ; preds = %null, %non_null
-//   ; ----- end CodegenAnyVal::WriteToSlot() -------------------------------------------
+//   ; -- end CodegenAnyVal::ToReadWriteInfo() and SlotDescriptor::WriteToSlot() --------
 //   ret void
 // }
 Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_vals,
@@ -412,7 +431,9 @@ Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_
        slot_materialize_exprs[i]->type(), materialize_expr_fns[i], expr_args, "src");

    // Write expr result 'src' to slot
-    src.WriteToSlot(*slot_desc, tuple, use_mem_pool ? pool_arg : nullptr);
+    CodegenAnyValReadWriteInfo read_write_info = src.ToReadWriteInfo();
+    slot_desc->CodegenWriteToSlot(
+        read_write_info, tuple, use_mem_pool ? pool_arg : nullptr);
  }
  builder.CreateRetVoid();
  // TODO: if pool != NULL, OptimizeFunctionWithExprs() is inlining the Allocate()
--- a/be/src/udf/udf-internal.h
+++ b/be/src/udf/udf-internal.h
@@ -320,9 +320,9 @@ struct CollectionVal : public AnyVal {
 struct StructVal : public AnyVal {
  int num_children;

-  /// Pointer to the start of the vector of children pointers. These children pointers in
-  /// fact are AnyVal pointers where a null pointer means that this child is NULL.
-  /// The buffer is not null-terminated.
+  /// Pointer to the start of the vector of children pointers. The set of types that the
+  /// pointed-to values can have is types of the members of 'ExprValue'. A null pointer
+  /// means that this child is NULL. The buffer is not null-terminated.
  /// Memory allocation to 'ptr' is done using FunctionContext. As a result it's not
  /// needed to take care of memory deallocation in StructVal as it will be done through
  /// FunctionContext automatically.
@@ -353,4 +353,9 @@ static_assert(sizeof(CollectionVal) == sizeof(StringVal), "Wrong size.");
 static_assert(
    offsetof(CollectionVal, num_tuples) == offsetof(StringVal, len), "Wrong offset.");
 static_assert(offsetof(CollectionVal, ptr) == offsetof(StringVal, ptr), "Wrong offset.");
+
+static_assert(sizeof(StructVal) == sizeof(StringVal), "Wrong size.");
+static_assert(
+    offsetof(StructVal, num_children) == offsetof(StringVal, len), "Wrong offset.");
+static_assert(offsetof(StructVal, ptr) == offsetof(StringVal, ptr), "Wrong offset.");
 } // namespace impala_udf
--- a/be/src/udf/udf-ir.cc
+++ b/be/src/udf/udf-ir.cc
@@ -54,3 +54,9 @@ void* FunctionContext::GetFunctionState(FunctionStateScope scope) const {
      return NULL;
  }
 }
+
+uint8_t* FnCtxAllocateForResults(FunctionContext* ctx, int64_t byte_size) {
+  assert(ctx != nullptr);
+  uint8_t* ptr = ctx->impl()->AllocateForResults(byte_size);
+  return ptr;
+}
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -575,11 +575,6 @@ public class SelectStmt extends QueryStmt {
            throw new AnalysisException("Unsupported type '" +
                expr.getType().toSql() + "' in '" + expr.toSql() + "'.");
          }
-        } else if (expr.getType().isStructType()) {
-          if (!analyzer_.getQueryCtx().client_request.query_options.disable_codegen) {
-            throw new AnalysisException("Struct type in select list is not allowed " +
-                "when Codegen is ON. You might want to set DISABLE_CODEGEN=true");
-          }
        }
        if (!expr.getType().isSupported()) {
          throw new AnalysisException("Unsupported type '"
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -3223,9 +3223,6 @@ public class AnalyzeDDLTest extends FrontendTestBase {
        "from functional.allcomplextypes");
    // It's allowed to do the same with struct as it is supported in the select list.
    AnalysisContext ctx = createAnalysisCtx();
-    // TODO: Turning Codegen OFF could be removed once the Codegen support is implemented
-    // for structs given in the select list.
-    ctx.getQueryOptions().setDisable_codegen(true);
    AnalyzesOk("create view functional.foo (a) as " +
        "select tiny_struct from functional_orc_def.complextypes_structs", ctx);

--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -432,10 +432,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
   */
  private void testSlotRefPath(String sql, List<Integer> expectedAbsPath) {
    AnalysisContext ctx = createAnalysisCtx();
-    // TODO: Turning Codegen OFF could be removed once the Codegen support is implemented
-    // for structs given in the select list.
-    ctx.getQueryOptions().setDisable_codegen(true);
-
    SelectStmt stmt = (SelectStmt) AnalyzesOk(sql, ctx);
    Expr e = stmt.getResultExprs().get(stmt.getResultExprs().size() - 1);
    Preconditions.checkState(e instanceof SlotRef);
@@ -776,7 +772,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {

    // Check the support of struct in the select list for different file formats.
    AnalysisContext ctx = createAnalysisCtx();
-    ctx.getQueryOptions().setDisable_codegen(true);
    AnalysisError("select int_struct_col from functional.allcomplextypes", ctx,
        "Querying STRUCT is only supported for ORC and Parquet file formats.");
    AnalyzesOk("select alltypes from functional_orc_def.complextypes_structs", ctx);
@@ -804,7 +799,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {

    // Slot path is not ambiguous and resolves to a struct.
    AnalysisContext ctx = createAnalysisCtx();
-    ctx.getQueryOptions().setDisable_codegen(true);
    AnalyzesOk("select a from a.a", ctx);
    AnalyzesOk("select t.a from a.a t", ctx);
    AnalyzesOk("select t.a.a from a.a t", ctx);
@@ -1025,11 +1019,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {

    // Struct in select list works only if codegen is OFF.
    AnalysisContext ctx = createAnalysisCtx();
-    ctx.getQueryOptions().setDisable_codegen(false);
-    AnalysisError("select alltypes from functional_orc_def.complextypes_structs", ctx,
-        "Struct type in select list is not allowed when Codegen is ON. You might want " +
-        "to set DISABLE_CODEGEN=true");
-    ctx.getQueryOptions().setDisable_codegen(true);
    AnalyzesOk("select alltypes from functional_orc_def.complextypes_structs", ctx);
    AnalyzesOk("select int_array_col from functional.allcomplextypes");
    AnalyzesOk("select int_array_col from functional.allcomplextypes " +
@@ -1070,8 +1059,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {

    //Make complex types available in star queries
    ctx.getQueryOptions().setExpand_complex_types(true);
-    //TODO: Once IMPALA-10851 is resolved it can be removed
-    ctx.getQueryOptions().setDisable_codegen(true);

    AnalyzesOk("select * from functional_parquet.complextypes_structs",ctx);
    AnalyzesOk("select * from functional_parquet.complextypes_nested_structs",ctx);
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -802,9 +802,7 @@ public class PlannerTest extends PlannerTestBase {
    // Also tests that selecting the whole struct or fields from an inline view or
    // directly from the table give the same row size.

-    // For comlex types in the select list, we have to turn codegen off.
    TQueryOptions queryOpts = defaultQueryOptions();
-    queryOpts.setDisable_codegen(true);

    String queryWholeStruct =
        "select outer_struct from functional_orc_def.complextypes_nested_structs";
@@ -854,12 +852,7 @@ public class PlannerTest extends PlannerTestBase {
    // the select list, no extra slots are generated in the row for the struct fields but
    // the memory of the struct is reused, i.e. the row size is the same as when only the
    // struct is queried.
-
-    // For complex types in the select list, we have to turn codegen off.
-    // TODO: Remove this when IMPALA-10851 is fixed.
    TQueryOptions queryOpts = defaultQueryOptions();
-    queryOpts.setDisable_codegen(true);
-
    String queryTemplate =
        "select %s from functional_orc_def.complextypes_nested_structs";

@@ -887,9 +880,6 @@ public class PlannerTest extends PlannerTestBase {
    // star expansion.

    TQueryOptions queryOpts = defaultQueryOptions();
-    // For complex types in the select list, we have to turn codegen off.
-    // TODO: Remove this when IMPALA-10851 is fixed.
-    queryOpts.setDisable_codegen(true);
    // Enable star-expandion of complex types.
    queryOpts.setExpand_complex_types(true);

--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3941,7 +3941,6 @@ functional
 ---- BASE_TABLE_NAME
 collection_struct_mix_view
 ---- CREATE
-SET disable_codegen=1;
 DROP VIEW IF EXISTS {db_name}{db_suffix}.{table_name};
 CREATE VIEW {db_name}{db_suffix}.{table_name}
 AS SELECT id, arr_contains_struct, arr_contains_nested_struct, struct_contains_nested_arr FROM {db_name}{db_suffix}.collection_struct_mix;
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
@@ -49,6 +49,18 @@ order by id;
 INT,STRING
 ====
 ---- QUERY
+# Select a nested struct with an order by and a limit. The limit triggers materialisation
+# in the TopN node.
+select id, outer_struct
+from functional_orc_def.complextypes_nested_structs
+order by id limit 2;
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
+2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
+---- TYPES
+INT,STRING
+====
+---- QUERY
 # Select a nested struct with an order by.
 select id, outer_struct
 from complextypes_nested_structs
--- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test
@@ -78,8 +78,17 @@ INT
 # Test resolving nested columns in expanding star expression.
 set EXPAND_COMPLEX_TYPES=1;
 select nested_struct.* from complextypestbl
---- CATCH
-AnalysisException: Struct type in select list is not allowed when Codegen is ON. You might want to set DISABLE_CODEGEN=true
+---- RESULTS
+-1,'[-1]','{"d":[[{"e":-1,"f":"nonnullable"}]]}','{}'
+1,'[1]','{"d":[[{"e":10,"f":"aaa"},{"e":-10,"f":"bbb"}],[{"e":11,"f":"c"}]]}','{"foo":{"h":{"i":[1.1]}}}'
+NULL,'[null]','{"d":[[{"e":null,"f":null},{"e":10,"f":"aaa"},{"e":null,"f":null},{"e":-10,"f":"bbb"},{"e":null,"f":null}],[{"e":11,"f":"c"},null],[],null]}','{"g1":{"h":{"i":[2.2,null]}},"g2":{"h":{"i":[]}},"g3":null,"g4":{"h":{"i":null}},"g5":{"h":null}}'
+NULL,'NULL','{"d":[]}','{}'
+NULL,'NULL','{"d":null}','NULL'
+NULL,'NULL','NULL','{"foo":{"h":{"i":[2.2,3.3]}}}'
+NULL,'NULL','NULL','NULL'
+7,'[2,3,null]','{"d":[[],[null],null]}','NULL'
+---- TYPES
+INT,STRING,STRING,STRING
 ====
 ---- QUERY
 # Test resolving explicit STAR path on a nested struct column inside array
--- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_struct_in_select_list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_struct_in_select_list.test
@@ -4,8 +4,6 @@
 # select list.
 # Note1, functional_orc_def is hard-coded here. Once we implement struct direct read
 # support for Parquet as well then we can remove the DB name here. IMPALA-9496
-# Note2, turning off codegen support could be removed once we implement the support for
-# that. IMPALA-10851
 select id, str, alltypes from functional_orc_def.complextypes_structs
 ---- RESULTS
 1,'NULL','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01 10:19:04","s1":"some string","s2":"another str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}'
--- a/tests/authorization/test_ranger.py
+++ b/tests/authorization/test_ranger.py
@@ -2275,8 +2275,6 @@ class TestRangerColumnMaskingComplexTypesInSelectList(CustomClusterTestSuite):
    cls.ImpalaTestMatrix.add_dimension(create_orc_dimension(cls.get_workload()))
    cls.ImpalaTestMatrix.add_constraint(lambda v:
        v.get_value('protocol') == 'hs2')
-    cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
-        disable_codegen_options=[True]))

  @classmethod
  def add_custom_cluster_constraints(cls):
--- a/tests/hs2/test_fetch_first.py
+++ b/tests/hs2/test_fetch_first.py
@@ -468,7 +468,7 @@ class TestFetchFirst(HS2TestSuite):
    """Regression test for IMPALA-11447. Returning complex types in select list
    was crashing in hs2 if result caching was enabled.
    """
-    options = {self.IMPALA_RESULT_CACHING_OPT: "1024", "disable_codegen": "true"}
+    options = {self.IMPALA_RESULT_CACHING_OPT: "1024"}
    handle = self.run_query_expect_success(
        "select int_array from functional_parquet.complextypestbl", options)
    self.fetch_until(handle, TCLIService.TFetchOrientation.FETCH_NEXT, 10, 8)
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -128,15 +128,19 @@ class TestNestedStructsInSelectList(ImpalaTestSuite):
        ImpalaTestDimension('mt_dop', 0, 2))
    cls.ImpalaTestMatrix.add_dimension(
        create_exec_option_dimension_from_dict({
-            'disable_codegen': ['False', 'True']}))
+            # Putting 'True' first because this way in non-exhaustive runs there are more
+            # test cases with codegen enabled.
+            'disable_codegen': ['True', 'False'],
+            # The below two options are set to prevent the planner from disabling codegen
+            # because of the small data size even when 'disable_codegen' is False.
+            'disable_codegen_rows_threshold': [0],
+            'exec_single_node_rows_threshold': [0]}))
    cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 0, 1))
    cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)

  def test_struct_in_select_list(self, vector):
    """Queries where a struct column is in the select list"""
-    if vector.get_value('exec_option')['disable_codegen'] == 'False':
-      pytest.skip()
    new_vector = deepcopy(vector)
    new_vector.get_value('exec_option')['convert_legacy_hive_parquet_utc_timestamps'] = 1
    new_vector.get_value('exec_option')['TIMEZONE'] = '"Europe/Budapest"'
@@ -144,8 +148,6 @@ class TestNestedStructsInSelectList(ImpalaTestSuite):

  def test_nested_struct_in_select_list(self, vector):
    """Queries where a nested struct column is in the select list"""
-    if vector.get_value('exec_option')['disable_codegen'] == 'False':
-      pytest.skip()
    new_vector = deepcopy(vector)
    new_vector.get_value('exec_option')['convert_legacy_hive_parquet_utc_timestamps'] = 1
    self.run_test_case('QueryTest/nested-struct-in-select-list', new_vector)
@@ -183,9 +185,6 @@ class TestNestedCollectionsInSelectList(ImpalaTestSuite):
    """Queries where a map has null keys. Is only possible in ORC, not Parquet."""
    if vector.get_value('table_format').file_format == 'parquet':
      pytest.skip()
-    # Structs in select list are not supported with codegen enabled: see IMPALA-10851.
-    if vector.get_value('exec_option')['disable_codegen'] == 'False':
-      pytest.skip()
    self.run_test_case('QueryTest/map_null_keys', vector)


@@ -212,9 +211,6 @@ class TestMixedCollectionsAndStructsInSelectList(ImpalaTestSuite):

  def test_mixed_complex_types_in_select_list(self, vector, unique_database):
    """Queries where structs and collections are embedded into one another."""
-    # Structs in select list are not supported with codegen enabled: see IMPALA-10851.
-    if vector.get_value('exec_option')['disable_codegen'] == 'False':
-      pytest.skip()
    self.run_test_case('QueryTest/mixed-collections-and-structs', vector)


@@ -907,7 +903,7 @@ class TestNestedTypesStarExpansion(ImpalaTestSuite):
        v.get_value('protocol') == 'hs2')
    cls.ImpalaTestMatrix.add_dimension(
        create_exec_option_dimension_from_dict({
-            'disable_codegen': ['True']}))
+            'disable_codegen': ['False', 'True']}))
    cls.ImpalaTestMatrix.add_mandatory_exec_option(
            'convert_legacy_hive_parquet_utc_timestamps', 'true')
    cls.ImpalaTestMatrix.add_mandatory_exec_option('TIMEZONE', '"Europe/Budapest"')