Sequence File Scanner

2025-12-23 21:08:39 -05:00 · 2012-04-12 15:31:11 -07:00
parent 8bb82f7e2a
commit 62d29ff1c6
49 changed files with 4959 additions and 1620 deletions
--- a/be/src/common/status.cc
+++ b/be/src/common/status.cc
@@ -2,7 +2,6 @@

 #include <boost/algorithm/string/join.hpp>

-#include <glog/logging.h>
 #include "common/status.h"

 using namespace std;
@@ -10,27 +9,8 @@ using namespace boost::algorithm;

 namespace impala {

-struct Status::ErrorDetail {
-  vector<string> error_msgs;
-
-  ErrorDetail(const string& msg): error_msgs(1, msg) {}
-  ErrorDetail(const vector<string>& msgs): error_msgs(msgs) {}
-};
-
 const Status Status::OK;

-Status::Status(const string& error_msg)
-  : error_detail_(new ErrorDetail(error_msg)) {
-  LOG(ERROR) << "Error Status: " << error_msg;
-}
-
-Status::Status(const Status& status)
-  : error_detail_(
-      status.error_detail_ != NULL
-        ? new ErrorDetail(*status.error_detail_)
-        : NULL) {
-}
-
 Status& Status::operator=(const Status& status) {
  delete error_detail_;
  if (status.error_detail_ == NULL) {
@@ -56,10 +36,6 @@ Status& Status::operator=(const TStatus& status) {
  return *this;
 }

-Status::~Status() {
-  if (error_detail_ != NULL) delete error_detail_;
-}
-
 void Status::GetErrorMsgs(vector<string>* msgs) const {
  msgs->clear();
  if (error_detail_ != NULL) {
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -6,6 +6,7 @@
 #include <string>
 #include <vector>

+#include <glog/logging.h>
 #include "common/compiler-util.h"
 #include "gen-cpp/Types_types.h"  // for TStatus

@@ -34,11 +35,24 @@ class Status {

  static const Status OK;

-  // c'tor for error case
-  Status(const std::string& error_msg);
-
  // copy c'tor makes copy of error detail so Status can be returned by value
-  Status(const Status& status);
+  Status(const Status& status)
+    : error_detail_(
+        status.error_detail_ != NULL
+          ? new ErrorDetail(*status.error_detail_)
+          : NULL) {
+  }
+
+  // c'tor for error case
+  Status(const std::string& error_msg)
+    : error_detail_(new ErrorDetail(error_msg)) {
+    LOG(WARNING) << "Error Status: " << error_msg;
+  }
+
+  ~Status() {
+    if (error_detail_ != NULL) delete error_detail_;
+  }
+

  // same as copy c'tor
  Status& operator=(const Status& status);
@@ -53,8 +67,6 @@ class Status {
  // assign from stringstream
  Status& operator=(const std::stringstream& stream);

-  ~Status();
-
  bool ok() const { return error_detail_ == NULL; }

  void AddErrorMsg(const std::string& msg);
@@ -71,7 +83,13 @@ class Status {
  std::string GetErrorMsg() const;

 private:
-  struct ErrorDetail;
+  struct ErrorDetail {
+    std::vector<std::string> error_msgs;
+
+    ErrorDetail(const std::string& msg): error_msgs(1, msg) {}
+    ErrorDetail(const std::vector<std::string>& msgs): error_msgs(msgs) {}
+  };
+
  ErrorDetail* error_detail_;
 };

--- a/be/src/exec/CMakeLists.txt
+++ b/be/src/exec/CMakeLists.txt
@@ -10,6 +10,8 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec")

 add_library(Exec STATIC
  aggregation-node.cc
+  buffered-byte-stream.cc
+  delimited-text-parser.cc
  exec-node.cc
  exchange-node.cc
  hash-join-node.cc
@@ -18,6 +20,7 @@ add_library(Exec STATIC
  hdfs-scanner.cc
  hash-table.cc
  hdfs-rcfile-scanner.cc
+  hdfs-sequence-scanner.cc
  hdfs-text-scanner.cc
  hbase-scan-node.cc
  hbase-table-scanner.cc
@@ -34,4 +37,5 @@ target_link_libraries(Exec
 target_link_libraries(Exec
  ${JAVA_JVM_LIBRARY}
  ${HDFS_LIBS}
+  -lz -lbz2 -lsnappy
 )
--- a/be/src/exec/buffered-byte-stream.cc
+++ b/be/src/exec/buffered-byte-stream.cc
@@ -0,0 +1,93 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+#include "exec/buffered-byte-stream.h"
+#include "common/status.h"
+#include <glog/logging.h>
+#include <sstream>
+
+using namespace impala;
+using namespace std;
+
+BufferedByteStream::BufferedByteStream(ByteStream* parent,
+                                       int64_t buffer_size,
+                                       RuntimeProfile::Counter* timer)
+    : parent_byte_stream_(parent),
+      mem_pool_(new MemPool()),
+      byte_buffer_size_(buffer_size),
+      byte_buffer_(mem_pool_->Allocate(byte_buffer_size_)),
+      byte_offset_(0),
+      byte_buffer_start_(0),
+      byte_buffer_len_(0),
+      scanner_timer_(timer) {
+}
+
+Status BufferedByteStream::GetPosition(int64_t* position) {
+  *position = byte_buffer_start_ + byte_offset_;
+  return Status::OK;
+}
+
+Status BufferedByteStream::Open(const string& location) {
+  return Status::OK;
+}
+
+Status BufferedByteStream::Read(char* buf, int64_t req_len, int64_t* actual_len) {
+  DCHECK(buf != NULL);
+  DCHECK_GE(req_len, 0);
+
+  int number_bytes_read = 0;
+  if (req_len <= byte_buffer_len_ - byte_offset_) {
+    memcpy(buf, byte_buffer_ + byte_offset_, req_len);
+    number_bytes_read = req_len;
+    byte_offset_ += number_bytes_read;
+  } else {
+    while (number_bytes_read < req_len) {
+      int copy_len = min(byte_buffer_len_ - byte_offset_, req_len - number_bytes_read);
+      memcpy(buf + number_bytes_read, byte_buffer_ + byte_offset_, copy_len);
+      number_bytes_read += copy_len;
+      byte_offset_ += copy_len;
+      if (byte_offset_ == byte_buffer_len_) {
+        byte_buffer_start_ += byte_buffer_len_;
+        {
+          if (scanner_timer_ != NULL) {
+            COUNTER_SCOPED_TIMER(scanner_timer_);
+          }
+          RETURN_IF_ERROR(parent_byte_stream_->Read(
+              byte_buffer_, byte_buffer_size_, &byte_buffer_len_));
+        }
+        byte_offset_ = 0;
+
+        if (byte_buffer_len_ == 0) break;
+      }
+    }
+  }
+
+  *actual_len = number_bytes_read;
+  return Status::OK;
+}
+
+Status BufferedByteStream::Close() {
+  return Status::OK;
+}
+
+Status BufferedByteStream::Seek(int64_t offset) {
+  if (offset >= byte_buffer_start_ && offset < byte_buffer_start_ + byte_buffer_len_) {
+    byte_offset_ = offset - byte_buffer_start_;
+  } else {
+    RETURN_IF_ERROR(parent_byte_stream_->Seek(offset));
+    byte_buffer_start_ = offset;
+    byte_buffer_len_ = 0;
+    byte_offset_ = 0;
+  }
+
+  return Status::OK;
+}
+
+Status BufferedByteStream::Eof(bool* eof) {
+  if (byte_offset_ < byte_buffer_len_) {
+    *eof = false;
+    return Status::OK;
+  }
+  RETURN_IF_ERROR(SyncParent());
+  RETURN_IF_ERROR(parent_byte_stream_->Eof(eof));
+  return Status::OK;
+}
--- a/be/src/exec/buffered-byte-stream.h
+++ b/be/src/exec/buffered-byte-stream.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+#ifndef IMPALA_EXEC_BUFFERED_BYTE_STREAM_H_
+#define IMPALA_EXEC_BUFFERED_BYTE_STREAM_H_
+
+#include <string>
+#include <hdfs.h>
+#include <boost/scoped_ptr.hpp>
+
+#include "util/runtime-profile.h"
+#include "exec/byte-stream.h"
+#include "runtime/mem-pool.h"
+#include "common/status.h"
+
+namespace impala {
+
+// A Buffered ByteStream implementation.
+// This class provides buffered reads from the underlying parent byte stream.
+// TODO: This is needed because of the way SerDeUtils work, we should revisit this.
+class BufferedByteStream : public ByteStream {
+ public:
+  BufferedByteStream(ByteStream* parent,
+      int64_t buffer_size, RuntimeProfile::Counter* timer = NULL);
+
+  virtual Status Open(const std::string& location);
+  virtual Status Close();
+  virtual Status Read(char *buf, int64_t req_len, int64_t* actual_len);
+  virtual Status Seek(int64_t offset);
+  virtual Status GetPosition(int64_t* position);
+  virtual Status Eof(bool* eof);
+
+  // Set the parent offset to our current position.
+  Status SyncParent() {
+    RETURN_IF_ERROR(parent_byte_stream_->Seek(byte_buffer_start_ + byte_offset_));
+    return Status::OK;
+  }
+
+  // Set our posistion to where the parent is.
+  Status SeekToParent() {
+    int64_t position;
+    RETURN_IF_ERROR(parent_byte_stream_->GetPosition(&position));
+    RETURN_IF_ERROR(Seek(position));
+    return Status::OK;
+  }
+
+ private:
+  // Pointer to the source byte stream.
+  ByteStream* parent_byte_stream_;
+
+  // Memory pool to allocate buffers.
+  boost::scoped_ptr<MemPool> mem_pool_;
+
+  // Size of the buffer.
+  int64_t byte_buffer_size_;
+
+  // Buffer containing bytes.
+  char* byte_buffer_;
+
+  // Current offset within buffer.
+  int64_t byte_offset_;
+
+  // Posistion of start of buffer in parent byte stream.
+  int64_t byte_buffer_start_;
+
+  // Amount of data in buffer.
+  int64_t byte_buffer_len_;
+
+  RuntimeProfile::Counter* scanner_timer_;
+
+};
+
+}
+
+#endif
--- a/be/src/exec/byte-stream.h
+++ b/be/src/exec/byte-stream.h
@@ -13,6 +13,8 @@ class Status;
 // A simple wrapper around sources of byte data
 class ByteStream {
 public:
+  virtual ~ByteStream() { }
+
  // Opens a resource from supplied location, ready for reading
  virtual Status Open(const std::string& location) = 0;

@@ -29,6 +31,9 @@ class ByteStream {
  // Returns the position of the stream cursor
  virtual Status GetPosition(int64_t* position) = 0;

+  // Returns if the stream is at EOF
+  virtual Status Eof(bool* eof) = 0;
+
  // Returns the name of the resource backing this stream
  const std::string& GetLocation() { return location_; };

--- a/be/src/exec/delimited-text-parser.cc
+++ b/be/src/exec/delimited-text-parser.cc
@@ -0,0 +1,337 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+#include "util/cpu-info.h"
+#include "exec/delimited-text-parser.h"
+
+using namespace impala;
+using namespace std;
+
+void DelimitedTextParser::ParserReset() {
+  current_column_has_escape_ = false;
+  last_char_is_escape_ = false;
+  column_idx_ = start_column_;
+}
+
+DelimitedTextParser::DelimitedTextParser(const vector<int>& map_column_to_slot,
+                                         int start_column,
+                                         RuntimeProfile::Counter* timer,
+                                         char tuple_delim,
+                                         char field_delim,
+                                         char collection_item_delim,
+                                         char escape_char)
+    : map_column_to_slot_(map_column_to_slot),
+      start_column_(start_column),
+      parse_time_counter_(timer),
+      field_delim_(field_delim),
+      escape_char_(escape_char),
+      collection_item_delim_(collection_item_delim),
+      tuple_delim_(tuple_delim) {
+
+  // Initialize the sse search registers.
+  // TODO: is this safe to do in here?  Not sure if the compiler/system
+  // will manage these registers for us.
+  char tmp[SSEUtil::CHARS_PER_128_BIT_REGISTER];
+  memset(tmp, 0, sizeof(tmp));
+  if (tuple_delim_ != '\0') {
+    tmp[0] = tuple_delim_;
+    xmm_tuple_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(tmp));
+  }
+  if (escape_char_ != '\0') {
+    tmp[0] = escape_char_;
+    xmm_escape_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(tmp));
+  }
+  tmp[0] = field_delim_;
+  tmp[1] = collection_item_delim_;
+  xmm_field_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(tmp));
+
+  column_idx_ = start_column_;
+  current_column_has_escape_ = false;
+  last_char_is_escape_ = false;
+}
+
+
+// Updates the values in the field and tuple masks, escaping them if necessary.
+// If the character at n is an escape character, then delimiters(tuple/field/escape
+// characters) at n+1 don't count.
+inline void ProcessEscapeMask(int escape_mask, bool* last_char_is_escape,
+                              int* field_mask, int* tuple_mask) {
+  // Escape characters can escape escape characters.
+  bool first_char_is_escape = *last_char_is_escape;
+  bool escape_next = first_char_is_escape;
+  for (int i = 0; i < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++i) {
+    if (escape_next) {
+      escape_mask &= ~SSEUtil::SSE_BITMASK[i];
+    }
+    escape_next = escape_mask & SSEUtil::SSE_BITMASK[i];
+  }
+
+  // Remember last character for the next iteration
+  *last_char_is_escape = escape_mask &
+    SSEUtil::SSE_BITMASK[SSEUtil::CHARS_PER_128_BIT_REGISTER - 1];
+
+  // Shift escape mask up one so they match at the same bit index as the tuple and 
+  // field mask (instead of being the character before) and set the correct first bit
+  escape_mask = escape_mask << 1 | first_char_is_escape;
+
+  // If escape_mask[n] is true, then tuple/field_mask[n] is escaped
+  *tuple_mask &= ~escape_mask;
+  *field_mask &= ~escape_mask;
+}
+
+inline void DelimitedTextParser::AddColumn(int len,
+    char** next_column_start, int* num_fields,
+    vector<DelimitedTextParser::FieldLocation>* field_locations) {
+  if (ReturnCurrentColumn()) {
+    DCHECK_LT(*num_fields, field_locations->size());
+    // Found a column that needs to be parsed, write the start/len to 'parsed_data_'
+    (*field_locations)[*num_fields].start = *next_column_start;
+    (*field_locations)[*num_fields].len = len;
+    if (current_column_has_escape_) (*field_locations)[*num_fields].len *= -1;
+    ++(*num_fields);
+  }
+  current_column_has_escape_ = false;
+  *next_column_start += len + 1;
+  ++column_idx_;
+}
+
+// SSE optimized raw text file parsing.  SSE4_2 added an instruction (with 3 modes) for
+// text processing.  The modes mimic strchr, strstr and strcmp.  For text parsing, we can
+// leverage the strchr functionality.
+//
+// The instruction operates on two sse registers:
+//  - the needle (what you are searching for)
+//  - the haystack (where you are searching in)
+// Both registers can contain up to 16 characters.  The result is a 16-bit mask with a bit
+// set for each character in the haystack that matched any character in the needle.
+// For example:
+//  Needle   = 'abcd000000000000' (we're searching for any a's, b's, c's d's)
+//  Haystack = 'asdfghjklhjbdwwc' (the raw string)
+//  Result   = '101000000001101'
+Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remaining_len,
+    char** byte_buffer_ptr, std::vector<FieldLocation>* field_locations,
+    int* num_tuples, int* num_fields, char** next_column_start) {
+  if (parse_time_counter_ != NULL)
+    COUNTER_SCOPED_TIMER(parse_time_counter_);
+
+  // Start of this batch.
+  *next_column_start = *byte_buffer_ptr;
+
+  // To parse using SSE, we:
+  //  1. Load into different sse registers the different characters we need to search for
+  //        tuple breaks, field breaks, escape characters
+  //  2. Load 16 characters at a time into the sse register
+  //  3. Use the SSE instruction to do strchr on those 16 chars, the result is a bitmask
+  //  4. Compute the bitmask for tuple breaks, field breaks and escape characters.
+  //  5. If there are escape characters, fix up the matching masked bits in the
+  //        field/tuple mask
+  //  6. Go through the mask bit by bit and write the parsed data.
+
+  // xmm registers:
+  //  - xmm_buffer: the register holding the current (16 chars) we're working on from the
+  //        file
+  //  - xmm_tuple_search_: the tuple search register.  Only contains the tuple_delim char.
+  //  - xmm_field_search_: the field search register.  Contains field delim and
+  //        collection_item delim_char
+  //  - xmm_escape_search_: the escape search register. Only contains escape char
+  //  - xmm_tuple_mask: the result of doing strchr for the tuple delim
+  //  - xmm_field_mask: the result of doing strchr for the field delim
+  //  - xmm_escape_mask: the result of doing strchr for the escape char
+  __m128i xmm_buffer, xmm_tuple_mask, xmm_field_mask, xmm_escape_mask;
+
+  if (CpuInfo::Instance()->IsSupported(CpuInfo::SSE4_2)) {
+    while (remaining_len >= SSEUtil::CHARS_PER_128_BIT_REGISTER) {
+      // Load the next 16 bytes into the xmm register
+      xmm_buffer = _mm_loadu_si128(reinterpret_cast<__m128i*>(*byte_buffer_ptr));
+
+      // Do the strchr for tuple and field breaks
+      // TODO: can we parallelize this as well?  Are there multiple sse execution units?
+      // The strchr sse instruction returns the result in the lower bits of the sse
+      // register.  Since we only process 16 characters at a time, only the lower 16 bits
+      // can contain non-zero values.
+      // _mm_extract_epi16 will extract 16 bits out of the xmm register.  The second
+      int tuple_mask = 0;
+      if (tuple_delim_ != '\0') {
+        xmm_tuple_mask =
+            _mm_cmpistrm(xmm_tuple_search_, xmm_buffer, SSEUtil::STRCHR_MODE);
+        tuple_mask = _mm_extract_epi16(xmm_tuple_mask, 0);
+      }
+      int field_mask = 0;
+      if (field_delim_ != '\0' || collection_item_delim_ != 0) {
+        xmm_field_mask =
+            _mm_cmpistrm(xmm_field_search_, xmm_buffer, SSEUtil::STRCHR_MODE);
+        field_mask = _mm_extract_epi16(xmm_field_mask, 0);
+      }
+
+      // parameter specifies which 16 bits to extract (0 for the lowest 16 bits).
+      int escape_mask = 0;
+
+      // If the table does not use escape characters, skip processing for it.
+      if (escape_char_ != '\0') {
+        xmm_escape_mask = _mm_cmpistrm(xmm_escape_search_, xmm_buffer,
+                                       SSEUtil::STRCHR_MODE);
+        escape_mask = _mm_extract_epi16(xmm_escape_mask, 0);
+        ProcessEscapeMask(escape_mask, &last_char_is_escape_, &field_mask, &tuple_mask);
+      }
+
+      // Tuple delims are automatically field delims
+      field_mask |= tuple_mask;
+
+      if (field_mask != 0) {
+        // Loop through the mask and find the tuple/column offsets
+        for (int n = 0; n < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++n) {
+          if (escape_mask != 0) {
+            current_column_has_escape_ =
+                current_column_has_escape_ || (escape_mask & SSEUtil::SSE_BITMASK[n]);
+          }
+
+          if (field_mask & SSEUtil::SSE_BITMASK[n]) {
+            AddColumn((*byte_buffer_ptr + n) - *next_column_start,
+                next_column_start, num_fields, field_locations);
+          }
+
+          if (tuple_mask & SSEUtil::SSE_BITMASK[n]) {
+            column_idx_ = start_column_;
+            ++(*num_tuples);
+            if (*num_tuples == max_tuples) {
+              (*byte_buffer_ptr) += (n + 1);
+              last_char_is_escape_ = false;
+              return Status::OK;
+            }
+          }
+        }
+      } else {
+        current_column_has_escape_ = (current_column_has_escape_ || escape_mask);
+      }
+
+      remaining_len -= SSEUtil::CHARS_PER_128_BIT_REGISTER;
+      *byte_buffer_ptr += SSEUtil::CHARS_PER_128_BIT_REGISTER;
+    }
+  }
+
+  // Handle the remaining characters
+  while (remaining_len > 0) {
+    bool new_tuple = false;
+    bool new_col = false;
+
+    if (!last_char_is_escape_) {
+      if (tuple_delim_ != '\0' && **byte_buffer_ptr == tuple_delim_) {
+        new_tuple = true;
+        new_col = true;
+      } else if (**byte_buffer_ptr == field_delim_
+                 || **byte_buffer_ptr == collection_item_delim_) {
+        new_col = true;
+      }
+    }
+    if (**byte_buffer_ptr == escape_char_) {
+      current_column_has_escape_ = true;
+      last_char_is_escape_ = !last_char_is_escape_;
+    } else {
+      last_char_is_escape_ = false;
+    }
+
+    if (new_col) {
+      AddColumn(*byte_buffer_ptr - *next_column_start,
+          next_column_start, num_fields, field_locations);
+    }
+
+    if (new_tuple) {
+      column_idx_ = start_column_;
+      ++(*num_tuples);
+      if (*num_tuples == max_tuples) {
+        ++*byte_buffer_ptr;
+        return Status::OK;
+      }
+    }
+
+    --remaining_len;
+    ++*byte_buffer_ptr;
+
+  }
+
+  // For formats that store the length of the row the row is not delimited:
+  // e.g. Sequene files.
+  if (tuple_delim_ == '\0') {
+    DCHECK(remaining_len == 0);
+    AddColumn(*byte_buffer_ptr - *next_column_start,
+        next_column_start, num_fields, field_locations);
+    column_idx_ = start_column_;
+    ++(*num_tuples);
+  }
+
+  return Status::OK;
+}
+
+// Find the start of the first full tuple in buffer by looking for the end of
+// the previous tuple.
+// TODO: most of this is not tested.  We need some tailored data to exercise the boundary
+// cases
+int DelimitedTextParser::FindFirstTupleStart(char* buffer, int len) {
+  int tuple_start = 0;
+  char* buffer_start = buffer;
+restart:
+  if (CpuInfo::Instance()->IsSupported(CpuInfo::SSE4_2)) {
+    __m128i xmm_buffer, xmm_tuple_mask;
+    while (tuple_start < len) {
+      // TODO: can we parallelize this as well?  Are there multiple sse execution units?
+      // Load the next 16 bytes into the xmm register and do strchr for the
+      // tuple delimiter.
+      int chr_count = len - tuple_start;
+      if (chr_count > SSEUtil::CHARS_PER_128_BIT_REGISTER) {
+        chr_count = SSEUtil::CHARS_PER_128_BIT_REGISTER;
+      }
+      xmm_buffer = _mm_loadu_si128(reinterpret_cast<__m128i*>(buffer));
+      xmm_tuple_mask =
+          _mm_cmpestrm(xmm_tuple_search_, 1, xmm_buffer, chr_count, SSEUtil::STRCHR_MODE);
+      int tuple_mask = _mm_extract_epi16(xmm_tuple_mask, 0);
+      if (tuple_mask != 0) {
+        for (int i = 0; i < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++i) {
+          if ((tuple_mask & SSEUtil::SSE_BITMASK[i]) != 0) {
+            tuple_start += i + 1;
+            buffer += i + 1;
+            break;
+          }
+        }
+        break;
+      }
+      tuple_start += chr_count;
+      buffer += chr_count;
+    }
+  } else {
+    for (int i = tuple_start; i < len; ++i) {
+      char c = *buffer++;
+      if (c == tuple_delim_) {
+        tuple_start = i + 1;
+        break;
+      }
+    }
+  }
+
+  if (escape_char_ != '\0') {
+    // Scan backwards for escape characters.  We do this after
+    // finding the tuple break rather than during the (above)
+    // forward scan to make the forward scan faster.  This will
+    // perform worse if there are many characters right before the
+    // tuple break that are all escape characters, but that is
+    // unlikely.
+    int num_escape_chars = 0;
+    int before_tuple_end = tuple_start - 2;
+    for (; before_tuple_end >= 0; --before_tuple_end) {
+      if (buffer_start[before_tuple_end] == escape_char_) {
+        ++num_escape_chars;
+      } else {
+        break;
+      }
+    }
+    // TODO: This sucks.  All the preceding characters before the tuple delim were
+    // escape characters.  We need to read from the previous block to see what to do.
+    DCHECK_GT(before_tuple_end, 0);
+
+    // An even number of escape characters means they cancel out and this tuple break
+    // is *not* escaped.
+    if (num_escape_chars % 2 != 0) {
+      goto restart;
+    }
+  }
+
+  return tuple_start;
+}
--- a/be/src/exec/delimited-text-parser.h
+++ b/be/src/exec/delimited-text-parser.h
@@ -0,0 +1,142 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+#ifndef IMPALA_EXEC_DELIMITED_TEXT_PARSER_H
+#define IMPALA_EXEC_DELIMITED_TEXT_PARSER_H
+
+#include "exec/hdfs-scanner.h"
+#include "exec/hdfs-scan-node.h"
+
+namespace impala {
+
+class DelimitedTextParser {
+ public:
+  // Intermediate structure used for two pass parsing approach. In the first pass,
+  // the FieldLocation structs are filled out and contain where all the fields start and
+  // their lengths.  In the second pass, the FieldLocation is used to write out the
+  // slots. We want to keep this struct as small as possible.
+  struct FieldLocation {
+    //start of field
+    char* start;
+    // Encodes the length and whether or not this fields needs to be unescaped.
+    // If len < 0, then the field needs to be unescaped.
+    int len;
+  };
+
+  // The Delimited Text Parser parses text rows that are delimited by specific
+  // characters:
+  //   tuple_delim: delimits tuples
+  //   field_delim: delimits fields
+  //   collection_item_delim: delimits collection items
+  //   escape_char: escape delimiters, make them part of the data.
+  // Other parameters to the creator:
+  //   map_column_to_slot: maps a column in the input to the output slot.
+  //   start_column: the index in the above vector where the columns start.
+  //                 it will be non-zero if there are partition columns.
+  //   timer: timer to use to time the parsing operation, or NULL.
+  //
+  // The main method is ParseData which fills in a vector of
+  // pointers and lengths to the fields.  It also can handle an excape character 
+  // which masks a tuple or field delimiter that occurs in the data.
+  // FindFirstTupleStart returns the position after the first non-escaped tuple
+  // delimiter from the starting offset.
+  DelimitedTextParser(const std::vector<int>& map_column_to_slot, int start_column,
+                      RuntimeProfile::Counter* timer,
+                      char tuple_delim, char field_delim_ = '\0',
+                      char collection_item_delim = '\0', char escape_char = '\0');
+
+  // Called to initialize parser at beginning of scan range.
+  void ParserReset();
+
+  // Check if we are at the start of a tuple.
+  bool AtTupleStart() { return column_idx_ == start_column_; }
+
+  // Parses a byte buffer for the field and tuple breaks.
+  // This function will write the field start & len to field_locations
+  // which can then be written out to tuples.
+  // This function uses SSE ("Intel x86 instruction set extension
+  // 'Streaming Simd Extension') if the hardware supports SSE4.2
+  // instructions.  SSE4.2 added string processing instructions that
+  // allow for processing 16 characters at a time.  Otherwise, this
+  // function walks the file_buffer_ character by character.
+  // Input Parameters:
+  //   max_tuples: The maximum number of tuples that should be parsed.
+  //               This is used to control how the batching works.
+  //   remeing_len: Length of data remaining in the byte_buffer_pointer.
+  //   byte_buffer_pointer: Pointer to the buffer containing the data to be parsed.
+  // Output Parameters:
+  //   field_locations: Vector of pointers to data fields and their lengths
+  //   num_tuples: Number of tuples parsed
+  //   num_fields: Number of materialized fields parsed
+  //   next_column_start: pointer within file_buffer_ where the next field starts
+  //                      after the return from the call to ParseData
+  Status ParseFieldLocations(int max_tuples, int64_t remaining_len,
+      char** byte_buffer_ptr, std::vector<FieldLocation>* field_locations,
+      int* num_tuples, int* num_fields, char** next_column_start);
+
+  // Find the start of a tuple if jumping into the middle of a file.
+  // Returns the offset in the buffer of the tuple.
+  int FindFirstTupleStart(char* buffer, int len);
+
+  // Will we return the current column to the query?
+  bool ReturnCurrentColumn() {
+    return map_column_to_slot_[column_idx_] != HdfsScanNode::SKIP_COLUMN;
+  }
+
+ private:
+  // Initialize the parser state.
+  void ParserInit(HdfsScanNode* scan_node);
+
+  // Helper routine to add a column to the field_locations vector.
+  // Input: 
+  //   len: lenght of the current column.
+  // Input/Output:
+  //   next_column_start: Start of the current column, moved to the start of the next.
+  //   num_fields: current number of fileds processed, updated to next field.
+  // Output:
+  //   field_locations: updated with start and length of current field.
+  void AddColumn(int len, char** next_column_start, int* num_fields,
+                 std::vector<FieldLocation>* field_locations);
+
+  // Map columns in the data to slots in the tuples.
+  const std::vector<int>& map_column_to_slot_;
+
+  // First non-partition column that will be extracted from parsed data.
+  int start_column_;
+
+  // Pointer to scan node parse time counter.
+  RuntimeProfile::Counter* parse_time_counter_;
+
+  // SSE(xmm) register containing the tuple search character.
+  __m128i xmm_tuple_search_;
+
+  // SSE(xmm) register containing the field search character.
+  __m128i xmm_field_search_;
+
+  // SSE(xmm) register containing the escape search character.
+  __m128i xmm_escape_search_;
+
+  // Character delimiting fields (to become slots).
+  char field_delim_;
+
+  // Escape character.
+  char escape_char_;
+
+  // Character delimiting collection items (to become slots).
+  char collection_item_delim_;
+
+  // Character delimiting tuples.
+  char tuple_delim_;
+
+  // Whether or not the current column has an escape character in it
+  // (and needs to be unescaped)
+  bool current_column_has_escape_;
+
+  // Whether or not the previous character was the escape character
+  bool last_char_is_escape_;
+
+  // Index to keep track of the current current column in the current file
+  int column_idx_;
+};
+
+}// namespace impala
+#endif// IMPALA_EXEC_DELIMITED_TEXT_PARSER_H
--- a/be/src/exec/exec-node.cc
+++ b/be/src/exec/exec-node.cc
@@ -114,6 +114,7 @@ Status ExecNode::CreateNode(ObjectPool* pool, const TPlanNode& tnode,
  switch (tnode.node_type) {
    case TPlanNodeType::HDFS_TEXT_SCAN_NODE:
    case TPlanNodeType::HDFS_RCFILE_SCAN_NODE:
+    case TPlanNodeType::HDFS_SEQFILE_SCAN_NODE:
      *node = pool->Add(new HdfsScanNode(pool, tnode, descs));
      return Status::OK;
    case TPlanNodeType::HBASE_SCAN_NODE:
--- a/be/src/exec/hbase-scan-node.cc
+++ b/be/src/exec/hbase-scan-node.cc
@@ -8,6 +8,7 @@
 #include "util/jni-util.h"
 #include "util/runtime-profile.h"
 #include "gen-cpp/PlanNodes_types.h"
+#include "exec/text-converter.inline.h"

 using namespace std;
 using namespace boost;
@@ -90,12 +91,12 @@ void HBaseScanNode::WriteTextSlot(
    void* value, int value_length, SlotDescriptor* slot,
    RuntimeState* state, bool* error_in_row) {
  COUNTER_SCOPED_TIMER(tuple_write_timer());
-  bool parsed_ok = text_converter_->ConvertAndWriteSlotBytes(reinterpret_cast<char*>(value),
-      reinterpret_cast<char*>(value) + value_length, tuple_, slot, true, false);
-  if (!parsed_ok) {
+  if (!text_converter_->WriteSlot(state, 
+      slot, tuple_, reinterpret_cast<char*>(value), value_length, true, false).ok()) {
    *error_in_row = true;
    if (state->LogHasSpace()) {
-      state->error_stream() << "Error converting column " << family << ":" << qualifier << ": "
+      state->error_stream() << "Error converting column " << family
+          << ":" << qualifier << ": "
          << "'" << reinterpret_cast<char*>(value) << "' TO "
          << TypeToString(slot->type()) << endl;
    }
--- a/be/src/exec/hdfs-byte-stream.cc
+++ b/be/src/exec/hdfs-byte-stream.cc
@@ -79,3 +79,14 @@ Status HdfsByteStream::Seek(int64_t offset) {

  return Status::OK;
 }
+
+Status HdfsByteStream::Eof(bool* eof) {
+  hdfsFileInfo* hdfsInfo = hdfsGetPathInfo(hdfs_connection_, &location_[0]);
+  if (hdfsInfo == NULL) {
+    return Status("Error getting Info for HDFS file: " + location_);
+  }
+  *eof = hdfsTell(hdfs_connection_, hdfs_file_) >= hdfsInfo->mSize;
+
+  hdfsFreeFileInfo(hdfsInfo, 1);
+  return Status::OK;
+}
--- a/be/src/exec/hdfs-byte-stream.h
+++ b/be/src/exec/hdfs-byte-stream.h
@@ -22,6 +22,7 @@ class HdfsByteStream : public ByteStream {
  virtual Status Read(char *buf, int64_t req_length, int64_t* actual_length);
  virtual Status Seek(int64_t offset);
  virtual Status GetPosition(int64_t* position);
+  virtual Status Eof(bool* eof);

 private:
  hdfsFS hdfs_connection_;
--- a/be/src/exec/hdfs-rcfile-scanner.cc
+++ b/be/src/exec/hdfs-rcfile-scanner.cc
@@ -17,6 +17,7 @@
 #include "gen-cpp/PlanNodes_types.h"
 #include "exec/hdfs-rcfile-scanner.h"
 #include "exec/hdfs-scan-node.h"
+#include "exec/text-converter.inline.h"

 using namespace std;
 using namespace boost;
@@ -130,30 +131,30 @@ Status HdfsRCFileScanner::GetNext(

        const char* col_start = row_group_->GetFieldPtr(rc_column_idx);
        int field_len = row_group_->GetFieldLength(rc_column_idx);
-        bool parse_ok = true;
+        Status parse_status;

        switch (slot_desc->type()) {
          case TYPE_STRING:
            // TODO: Eliminate the unnecessary copy operation from the RCFileRowGroup
            // buffers to the tuple buffers by pushing the tuple buffer down into the
            // RowGroup class.
-            parse_ok = text_converter_->ConvertAndWriteSlotBytes(col_start,
-                col_start + field_len, tuple_, slot_desc, true, false);
+            parse_status = text_converter_->WriteSlot(state, slot_desc, tuple_,
+                                                      col_start, field_len, true, false);
            break;
          default:
            // RCFile stores all fields as strings regardless of type, but these
            // strings are not NULL terminated. The strto* functions that TextConverter
            // uses require NULL terminated strings, so we have to manually NULL terminate
-            // the strings before passing them to ConvertAndWriteSlotBytes.
+            // the strings before passing them to WriteSlot
            // TODO: Devise a way to avoid this unecessary copy-and-terminate operation.
            string terminated_field(col_start, field_len);
            const char* c_str = terminated_field.c_str();
-            parse_ok = text_converter_->ConvertAndWriteSlotBytes(c_str,
-                 c_str + field_len, tuple_, slot_desc, false, false);
+            parse_status = text_converter_->WriteSlot(state, slot_desc, tuple_,
+                                                      c_str, field_len, false, false);
            break;
        }

-        if (!parse_ok) {
+        if (!parse_status.ok()) {
          error_in_row = true;
          if (state->LogHasSpace()) {
            state->error_stream() << "Error converting column: " << rc_column_idx <<
--- a/be/src/exec/hdfs-scan-node.cc
+++ b/be/src/exec/hdfs-scan-node.cc
@@ -2,6 +2,7 @@

 #include "exec/hdfs-scan-node.h"
 #include "exec/hdfs-text-scanner.h"
+#include "exec/hdfs-sequence-scanner.h"
 #include "exec/hdfs-rcfile-scanner.h"
 #include "exec/hdfs-byte-stream.h"

@@ -63,7 +64,7 @@ Status HdfsScanNode::InitRegex(ObjectPool* pool, const TPlanNode& tnode) {
  return Status::OK;
 }

-Status HdfsScanNode::GetNext(RuntimeState* state, RowBatch* row_batch, bool *eos) {
+Status HdfsScanNode::GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos) {
  COUNTER_SCOPED_TIMER(runtime_profile_->total_time_counter());

  // Guard against trying to read an empty set of scan ranges
@@ -134,6 +135,9 @@ Status HdfsScanNode::InitCurrentScanRange(RuntimeState* state) {
        current_scanner_.reset(new HdfsTextScanner(this, tuple_desc_, template_tuple_,
                                                   tuple_pool_.get()));
        break;
+      case TPlanNodeType::HDFS_SEQFILE_SCAN_NODE:
+        current_scanner_.reset(new HdfsSequenceScanner(this, tuple_desc_, template_tuple_,
+                                                       tuple_pool_.get())); break;
      case TPlanNodeType::HDFS_RCFILE_SCAN_NODE:
        current_scanner_.reset(new HdfsRCFileScanner(this, tuple_desc_, template_tuple_,
                                                     tuple_pool_.get()));
--- a/be/src/exec/hdfs-sequence-scanner.cc
+++ b/be/src/exec/hdfs-sequence-scanner.cc
@@ -0,0 +1,791 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+#include "runtime/runtime-state.h"
+#include "exec/hdfs-sequence-scanner.h"
+#include "runtime/tuple.h"
+#include "runtime/row-batch.h"
+#include "exec/text-converter.h"
+#include "util/cpu-info.h"
+#include "exec/hdfs-scan-node.h"
+#include "exec/delimited-text-parser.h"
+#include "exec/serde-utils.h"
+#include "exec/buffered-byte-stream.h"
+#include "exec/text-converter.inline.h"
+
+// Compression libraries
+#include <zlib.h>
+#include <bzlib.h>
+#include <snappy.h>
+
+using namespace std;
+using namespace boost;
+using namespace impala;
+
+const char* const HdfsSequenceScanner::SEQFILE_KEY_CLASS_NAME =
+  "org.apache.hadoop.io.BytesWritable";
+
+const char* const HdfsSequenceScanner::SEQFILE_VALUE_CLASS_NAME =
+  "org.apache.hadoop.io.Text";
+
+const char* const HdfsSequenceScanner::SEQFILE_DEFAULT_COMPRESSION =
+  "org.apache.hadoop.io.compress.DefaultCodec";
+
+const char* const HdfsSequenceScanner::SEQFILE_GZIP_COMPRESSION =
+  "org.apache.hadoop.io.compress.GzipCodec";
+
+const char* const HdfsSequenceScanner::SEQFILE_BZIP2_COMPRESSION =
+  "org.apache.hadoop.io.compress.BZip2Codec";
+
+const char* const HdfsSequenceScanner::SEQFILE_SNAPPY_COMPRESSION =
+  "org.apache.hadoop.io.compress.SnappyCodec";
+
+const uint8_t HdfsSequenceScanner::SEQFILE_VERSION_HEADER[4] = {'S', 'E', 'Q', 6};
+
+const int HdfsSequenceScanner::SEQFILE_KEY_LENGTH = 4;
+
+// These are magic numbers from zlib.h.  Not clear why they are not defined there.
+// 15 == window size, 32 == figure out if libz or gzip.
+#define WINDOW_BITS 15
+#define DETECT_CODEC 32
+
+// Decompress a block encoded by gzip or lzip.
+// Inputs:
+//     input_length: length of input buffer.
+//     in: input buffer, contains compressed data
+//     output_length: length of output buffer.
+// In/Out:
+//     out: output buffer, place to put decompressed data.
+// Output:
+//     too_small: set to true if the output_length is too small.
+static Status DecompressGzipBlock(int input_length, char* in,
+                                  int output_length, char* out, bool* too_small) {
+  z_stream stream;
+  bzero(&stream, sizeof(stream));
+  stream.next_in = reinterpret_cast<Bytef*>(in);
+  stream.avail_in = input_length;
+  stream.next_out = reinterpret_cast<Bytef*>(out);
+  stream.avail_out = output_length;
+ 
+  *too_small = false;
+  int ret;
+  // Initialize and run either zlib or gzib inflate. 
+  if ((ret = inflateInit2(&stream, WINDOW_BITS | DETECT_CODEC)) != Z_OK) {
+    stringstream ss;
+    ss << "zlib inflateInit failed: " << stream.msg;
+    return Status(ss.str());
+  }
+  if ((ret = inflate(&stream, 1)) != Z_STREAM_END) {
+    (void)inflateEnd(&stream);
+    if (ret == Z_OK) {
+      *too_small = true;
+      return Status::OK;
+    }
+
+    stringstream ss;
+    ss << "zlib inflate failed: " << stream.msg;
+    return Status(ss.str());
+  }
+  if (inflateEnd(&stream) != Z_OK) {
+    stringstream ss;
+    ss << "zlib inflateEnd failed: " << stream.msg;
+    return Status(ss.str());
+  }
+
+  return Status::OK;
+}
+
+// Decompress a block encoded by bzip2.
+// Inputs:
+//     input_length: length of input buffer.
+//     in: input buffer, contains compressed data
+//     output_length: length of output buffer.
+// In/Out:
+//     out: output buffer, place to put decompressed data.
+// Output:
+//     too_small: set to true if the output_length is too small.
+static Status DecompressBzip2Block(int input_length, char* in,
+                                   int output_length, char* out, bool* too_small) {
+  bz_stream stream;
+  bzero(&stream, sizeof(stream));
+  stream.next_in = in;
+  stream.avail_in = input_length;
+  stream.next_out = out;
+  stream.avail_out = output_length;
+ 
+  *too_small = false;
+  int ret;
+  if ((ret = BZ2_bzDecompressInit(&stream, 0, 0)) != BZ_OK) {
+    stringstream ss;
+    ss << "bzlib BZ2_bzDecompressInit failed: " << ret;
+    return Status(ss.str());
+  }
+  if ((ret = BZ2_bzDecompress(&stream)) != BZ_STREAM_END) {
+    (void)BZ2_bzDecompressEnd(&stream);
+    if (ret == BZ_OK) {
+      *too_small = true;
+      return Status::OK;
+    }
+    stringstream ss;
+    ss << "bzlib BZ2_bzDecompress failed: " << ret;
+    return Status(ss.str());
+  }
+  if ((ret = BZ2_bzDecompressEnd(&stream)) != BZ_OK) {
+    stringstream ss;
+    ss << "bzlib BZ2_bzDecompressEnd failed: " << ret;
+    return Status(ss.str());
+  }
+
+  return Status::OK;
+}
+
+// Decompress a block encoded by Snappy.
+// Inputs:
+//     input_length: length of input buffer.
+//     in: input buffer, contains compressed data
+//     output_length: length of output buffer.
+// In/Out:
+//     out: output buffer, place to put decompressed data.
+// Output:
+//     too_small: set to true if the output_length is too small.
+static Status DecompressSnappyBlock(int input_length, char* in,
+                                    int output_length, char* out, bool* too_small) {
+  *too_small = false;
+
+  // Hadoop uses a block compression scheme on top of snappy.  First there is
+  // an integer which is the size of the decompressed data followed by a
+  // sequence of compressed blocks each preceded with an integer size.
+  int32_t len;
+  RETURN_IF_ERROR(SerDeUtils::ReadInt(in, &len));
+
+  // TODO: Snappy knows how big the output is, we should just use that.
+  if (output_length < len) {
+    *too_small = true;
+    return Status::OK;
+  }
+  in += sizeof(len);
+  input_length -= sizeof(len);
+ 
+  do {
+    // Read the length of the next block.
+    RETURN_IF_ERROR(SerDeUtils::ReadInt(in, &len));
+
+    if (len == 0) break;
+
+    in += sizeof(len);
+    input_length -= sizeof(len);
+
+    // Read how big the output will be.
+    size_t uncompressed_len;
+    if (!snappy::GetUncompressedLength(static_cast<const char*>(in),
+        input_length, &uncompressed_len)) {
+      return Status("Snappy: GetUncompressedLength failed");
+    }
+
+    DCHECK_GT(output_length, 0);
+    if (!snappy::RawUncompress(static_cast<const char*>(in),
+                               static_cast<size_t>(len), out)) {
+      return Status("Snappy: RawUncompress failed");
+    }
+    in += len;
+    input_length -= len;
+    out += uncompressed_len;
+    output_length -= uncompressed_len;
+  } while (input_length > 0);
+
+  return Status::OK;
+}
+
+HdfsSequenceScanner::HdfsSequenceScanner(HdfsScanNode* scan_node,
+                                         const TupleDescriptor* tuple_desc,
+                                         Tuple* template_tuple, MemPool* tuple_pool)
+    : HdfsScanner(scan_node, tuple_desc, template_tuple, tuple_pool),
+      delimited_text_parser_(NULL),
+      text_converter_(NULL),
+      unparsed_data_buffer_pool_(new MemPool()),
+      unparsed_data_buffer_(NULL),
+      unparsed_data_buffer_size_(0),
+      num_buffered_records_in_compressed_block_(0) {
+  const HdfsTableDescriptor* hdfs_table =
+    static_cast<const HdfsTableDescriptor*>(tuple_desc->table_desc());
+  
+  text_converter_.reset(new TextConverter(hdfs_table->escape_char(), tuple_pool_));
+
+  delimited_text_parser_.reset(new DelimitedTextParser(scan_node->column_to_slot_index(),
+      scan_node->GetNumPartitionKeys(), NULL, '\0',
+      hdfs_table->field_delim(), hdfs_table->collection_delim(),
+      hdfs_table->escape_char()));
+  // use the parser to find bytes that are -1
+  find_first_parser_.reset(new DelimitedTextParser(scan_node->column_to_slot_index(),
+      scan_node->GetNumPartitionKeys(), scan_node->parse_time_counter(),
+      static_cast<char>(0xff)));
+}
+
+Status HdfsSequenceScanner::InitCurrentScanRange(RuntimeState* state,
+                                                 HdfsScanRange* scan_range,
+                                                 ByteStream* byte_stream) {
+  HdfsScanner::InitCurrentScanRange(state, scan_range, byte_stream);
+  end_of_scan_range_ = scan_range->length + scan_range->offset;
+  unbuffered_byte_stream_ = byte_stream;
+
+  // If the file is blocked compressed then we don't want to double buffer
+  // the compressed blocks.  In that case we read meta information in
+  // filesystem block sizes (4kb) otherwise we read large chunks (1Mb)
+  // and pick meta data and data from that buffer.
+  buffered_byte_stream_.reset(new BufferedByteStream(
+      unbuffered_byte_stream_,
+      is_blk_compressed_ ? FILE_BLOCK_SIZE : state->file_buffer_size(),
+      scan_node_->scanner_timer()));
+
+  // Check the Location (file name) to see if we have changed files.
+  // If this a new file then we need to read and process the header.
+  if (previous_location_ != unbuffered_byte_stream_->GetLocation()) {
+    RETURN_IF_ERROR(buffered_byte_stream_->Seek(0));
+    RETURN_IF_ERROR(ReadFileHeader());
+    if (is_blk_compressed_) {
+      unparsed_data_buffer_size_ = state->file_buffer_size();
+    }
+    previous_location_ = unbuffered_byte_stream_->GetLocation();
+  }
+
+  delimited_text_parser_->ParserReset();
+
+  // Offset may not point to record boundary
+  if (scan_range->offset != 0) {
+    RETURN_IF_ERROR(unbuffered_byte_stream_->Seek(scan_range->offset));
+    RETURN_IF_ERROR(FindFirstRecord(state));
+  } 
+
+
+  return Status::OK;
+}
+
+// The start of the sync block is specified by an integer of -1.  We search
+// bytes till we find a -1 and then look for 3 more -1 bytes which will make up
+// the integer.  This is followed by the 16 byte sync block which was specified in
+// the file header.
+Status HdfsSequenceScanner::FindFirstRecord(RuntimeState* state) {
+  // A sync block is preceeded by 4 bytes of -1 (0xff).
+  int sync_flag_counter = 0;
+  // Starting offset of the buffer we are scanning
+  int64_t buf_start = 0;
+  // Number of bytes read from stream
+  int64_t num_bytes_read = 0;
+  // Current offset into buffer.
+  int64_t off = 0;
+  // Bytes left to process in buffer.
+  int64_t bytes_left = 0;
+  // Size of buffer to read.
+  int64_t read_size = FILE_BLOCK_SIZE;
+  // Buffer to scan.
+  char buf[read_size];
+
+  // Loop until we find a Sync block or get to the end of the range.
+  while (buf_start + off < end_of_scan_range_ || sync_flag_counter != 0) {
+    // If there are no bytes left to process in the buffer get some more.
+    // We may make bytes_left < 0 while looping for 0xff bytes below.
+    if (bytes_left <= 0) {
+      if (buf_start == 0) {
+        RETURN_IF_ERROR(unbuffered_byte_stream_->GetPosition(&buf_start));
+      } else {
+        // Seek to the next buffer, in case we read the byte stream below.
+        buf_start += num_bytes_read;
+#ifndef NDEBUG
+        int64_t position;
+        RETURN_IF_ERROR(unbuffered_byte_stream_->GetPosition(&position));
+        DCHECK_EQ(buf_start, position);
+#endif
+      }
+      // Do not read past the end of range, unless we stopped at a -1 byte.
+      // This could be the start of a sync block and we must process the
+      // following data.
+      if (buf_start + read_size >= end_of_scan_range_) {
+        read_size = (end_of_scan_range_ - buf_start);
+        if (sync_flag_counter != 0 && read_size < 4 - sync_flag_counter) {
+          read_size = 4 - sync_flag_counter;
+        }
+      }
+      if (read_size == 0) {
+        return Status::OK;
+      }
+      RETURN_IF_ERROR(unbuffered_byte_stream_->Read(buf, read_size, &num_bytes_read));
+      off = 0;
+      if (num_bytes_read == 0) {
+        RETURN_IF_ERROR(buffered_byte_stream_->SeekToParent());
+        return Status::OK;
+      }
+      bytes_left = num_bytes_read;
+    }
+
+    if (sync_flag_counter == 0) {
+      off += find_first_parser_->FindFirstTupleStart(buf + off, bytes_left);
+      bytes_left = num_bytes_read - off;
+
+      if (bytes_left == 0) continue;
+
+      sync_flag_counter = 1;
+    }
+    
+    // We found a -1 see if there are 3 more
+    while (bytes_left != 0) {
+      --bytes_left;
+      if (buf[off++] != static_cast<char>(0xff)) {
+        sync_flag_counter = 0;
+        break;
+      }
+
+      if (++sync_flag_counter == 4) {
+        RETURN_IF_ERROR(buffered_byte_stream_->Seek(buf_start + off));
+        bool verified;
+        RETURN_IF_ERROR(CheckSync(false, &verified));
+        if (verified) {
+          // Seek back to the beginning of the sync so the protocol readers are right.
+          RETURN_IF_ERROR(buffered_byte_stream_->Seek(buf_start + off - 4));
+          return Status::OK;
+        }
+        sync_flag_counter = 0;
+        break;
+      }
+    }
+  }
+  RETURN_IF_ERROR(buffered_byte_stream_->SeekToParent());
+  return Status::OK;
+    
+}
+  
+Status HdfsSequenceScanner::Prepare(RuntimeState* state, ByteStream* byte_stream) {
+  RETURN_IF_ERROR(HdfsScanner::Prepare(state, byte_stream));
+
+  // Allocate the scratch space for two pass parsing.  The most fields we can go
+  // through in one parse pass is the batch size (tuples) * the number of fields per tuple
+  // TODO: This should probably be based on L2/L3 cache sizes (as should the batch size)
+  field_locations_.resize(state->batch_size() * scan_node_->materialized_slots().size());
+
+  return Status::OK;
+}
+
+inline Status HdfsSequenceScanner::GetRecordFromCompressedBlock(RuntimeState *state,
+                                                                char** record_ptr,
+                                                                int64_t* record_len,
+                                                                bool* eosr) {
+  if (num_buffered_records_in_compressed_block_ == 0) {
+    int64_t position;
+    RETURN_IF_ERROR(buffered_byte_stream_->GetPosition(&position));
+    if (position >= end_of_scan_range_) {
+      *eosr = true;
+      return Status::OK;
+    }
+    RETURN_IF_ERROR(ReadCompressedBlock(state));
+  }
+  // Adjust next_record_ to move past the size of the length indicator.
+  int size = SerDeUtils::ReadVLong(next_record_in_compressed_block_, record_len);
+  next_record_in_compressed_block_ += size;
+  *record_ptr = next_record_in_compressed_block_;
+  // Point to the length of the next record.
+  next_record_in_compressed_block_ += *record_len;
+  --num_buffered_records_in_compressed_block_;
+  return Status::OK;
+}
+
+inline Status HdfsSequenceScanner::GetRecord(char** record_ptr,
+                                             int64_t* record_len, bool* eosr) {
+  int64_t position;
+  RETURN_IF_ERROR(buffered_byte_stream_->GetPosition(&position));
+  if (position >= end_of_scan_range_) {
+    *eosr = true;
+  }
+
+  // If we are past the end of the range we must read to the next sync block.
+  // TODO: We need better error returns from bytestream functions.
+  bool sync;
+  Status stat = ReadBlockHeader(&sync);
+  if (!stat.ok()) {
+    // Since we are past the end of the range then we might be at the end of the file.
+    bool eof;
+    RETURN_IF_ERROR(buffered_byte_stream_->Eof(&eof));
+
+    if (!*eosr || !eof) {
+      return stat;
+    } else {
+      return Status::OK;
+    }
+  }
+
+  if (sync && *eosr) return Status::OK;
+  *eosr = false;
+
+  // We don't look at the keys, only the values.
+  RETURN_IF_ERROR(
+      SerDeUtils::SkipBytes(buffered_byte_stream_.get(), current_key_length_));
+
+  // Reading a compressed record, we don't know how big the output is.
+  // If we are told our output buffer is too small, double it and try again.
+  if (is_compressed_) {
+    int in_size = current_block_length_ - current_key_length_;
+    RETURN_IF_ERROR(
+        SerDeUtils::ReadBytes(buffered_byte_stream_.get(), in_size, &scratch_buf_));
+
+    int out_size = in_size;
+    bool too_small = false;
+    do {
+      out_size *= 2;
+      if (has_string_slots_ || unparsed_data_buffer_size_ < out_size) {
+        unparsed_data_buffer_ = unparsed_data_buffer_pool_->Allocate(out_size);
+        unparsed_data_buffer_size_ = out_size;
+      }
+      
+      RETURN_IF_ERROR(decompress_block_function_(in_size, &scratch_buf_[0],
+          out_size, unparsed_data_buffer_, &too_small));
+    } while (too_small);
+
+    *record_ptr = unparsed_data_buffer_;
+    // Read the length of the record.
+    int size = SerDeUtils::ReadVLong(*record_ptr, record_len);
+    *record_ptr += size;
+  } else {
+    // Uncompressed records
+    RETURN_IF_ERROR(SerDeUtils::ReadVLong(buffered_byte_stream_.get(), record_len));
+    if (has_string_slots_ || *record_len > unparsed_data_buffer_size_) {
+      unparsed_data_buffer_ = unparsed_data_buffer_pool_->Allocate(*record_len);
+      unparsed_data_buffer_size_ = *record_len;
+    }
+    RETURN_IF_ERROR(SerDeUtils::ReadBytes(buffered_byte_stream_.get(),
+        *record_len, unparsed_data_buffer_));
+    *record_ptr = unparsed_data_buffer_;
+  }
+  return Status::OK;
+}
+
+// Add rows to the row_batch until it is full or we run off the end of the scan range.
+Status HdfsSequenceScanner::GetNext(RuntimeState* state,
+                                    RowBatch* row_batch, bool* eosr) {
+  AllocateTupleBuffer(row_batch);
+  // Index into current row in row_batch.
+  int row_idx = RowBatch::INVALID_ROW_INDEX;
+  runtime_state_ = state;
+
+  // We count the time here since there is too much overhead to do
+  // this on each record.
+  COUNTER_SCOPED_TIMER(scan_node_->parse_time_counter());
+
+  // Read records from the sequence file and parse the data for each record into
+  // columns.  These are added to the row_batch.  The loop continues until either
+  // the row batch is full or we are off the end of the range.
+  while (true) {
+    // Current record to process and its length.
+    char* record = NULL;
+    int64_t record_len;
+    // Get the next record and record length.
+    // There are 3 cases:
+    //  Block compressed -- each block contains several records.
+    //  Record compressed -- like a regular record, but the data is compressed.
+    //  Uncompressed.
+    if (is_blk_compressed_) {
+      RETURN_IF_ERROR(GetRecordFromCompressedBlock(state, &record, &record_len, eosr));
+    } else {
+      // Get the next compressed or uncompressed record.
+      RETURN_IF_ERROR(GetRecord(&record, &record_len, eosr));
+    }
+
+    if (*eosr) break;
+
+    // Parse the current record.
+    if (scan_node_->materialized_slots().size() != 0) {
+      char* col_start;
+      char* record_start = record;
+      int num_tuples = 0;
+      int num_fields = 0;
+      
+      RETURN_IF_ERROR(delimited_text_parser_->ParseFieldLocations(
+          row_batch->capacity() - row_batch->num_rows(), record_len, &record, 
+          &field_locations_, &num_tuples, &num_fields, &col_start));
+      DCHECK(num_tuples == 1);
+    
+      if (num_fields != 0) {
+        if (!WriteFields(state, row_batch, num_fields, &row_idx).ok()) {
+          // Report all the fields that have errors.
+          ++num_errors_in_file_;
+          if (state->LogHasSpace()) {
+            state->error_stream() << "file: "
+                << buffered_byte_stream_->GetLocation() << endl;
+            state->error_stream() << "record: ";
+            state->error_stream() << string(record_start, record_len);
+            state->LogErrorStream();
+          }
+          if (state->abort_on_error()) {
+            state->ReportFileErrors(buffered_byte_stream_->GetLocation(), 1);
+            return Status("Aborted HdfsSequenceScanner due to parse errors." 
+                          "View error log for details.");
+          }
+        }
+      }
+    } else {
+      RETURN_IF_ERROR(WriteTuples(state, row_batch, 1, &row_idx));
+    }
+    if (row_batch->IsFull()) {
+      row_batch->tuple_data_pool()->AcquireData(tuple_pool_, true);
+      *eosr = false;
+      break;
+    }
+  }
+  if (has_string_slots_) {
+    // Pass the buffer data to the row_batch.
+    // If we are at the end of a scan range then release the ownership
+    row_batch->tuple_data_pool()->AcquireData(unparsed_data_buffer_pool_.get(), !*eosr);
+  }
+  return Status::OK;
+}
+
+// TODO: apply conjuncts as slots get materialized and skip to the end of the row
+// if we determine it's not a match.
+Status HdfsSequenceScanner::WriteFields(RuntimeState* state, RowBatch* row_batch,
+                                        int num_fields, int* row_idx) {
+  // This has too much overhead to do it per-tuple
+  // COUNTER_SCOPED_TIMER(scan_node_->tuple_write_timer());
+  DCHECK_EQ(num_fields, scan_node_->materialized_slots().size());
+
+  // Keep track of where lines begin as we write out fields for error reporting
+  int next_line_offset = 0;
+
+  // Initialize tuple_ from the partition key template tuple before writing the slots
+  if (template_tuple_ != NULL) {
+    memcpy(tuple_, template_tuple_, tuple_byte_size_);
+  }
+
+  // Loop through all the parsed_data and parse out the values to slots
+  bool error_in_row = false;
+  for (int n = 0; n < num_fields; ++n) {
+    int need_escape = false;
+    int len = field_locations_[n].len;
+    if (len < 0) {
+      len = -len; 
+      need_escape = true;
+    }
+    next_line_offset += (len + 1);
+
+    if (!text_converter_->WriteSlot(state, scan_node_->materialized_slots()[n].second,
+        tuple_, field_locations_[n].start, len, false, need_escape).ok()) {
+      error_in_row = true;
+    }
+  }
+
+  DCHECK_EQ(num_fields, scan_node_->materialized_slots().size());
+
+  // TODO: The code from here down is more or less common to all scanners. Move it.
+  // We now have a complete row, with everything materialized
+  DCHECK(!row_batch->IsFull());
+  if (*row_idx == RowBatch::INVALID_ROW_INDEX) {
+    *row_idx = row_batch->AddRow();
+  }
+  TupleRow* current_row = row_batch->GetRow(*row_idx);
+  current_row->SetTuple(tuple_idx_, tuple_);
+
+  // Evaluate the conjuncts and add the row to the batch
+  bool conjuncts_true = scan_node_->EvalConjunctsForScanner(current_row);
+
+  if (conjuncts_true) {
+    row_batch->CommitLastRow();
+    *row_idx = RowBatch::INVALID_ROW_INDEX;
+    scan_node_->IncrNumRowsReturned();
+    if (scan_node_->ReachedLimit() || row_batch->IsFull()) {
+      tuple_ = NULL;
+      return Status::OK;
+    }
+    char* new_tuple = reinterpret_cast<char*>(tuple_);
+    new_tuple += tuple_byte_size_;
+    tuple_ = reinterpret_cast<Tuple*>(new_tuple);
+  }
+
+  // Need to reset the tuple_ if
+  //  1. eval failed (clear out null-indicator bits) OR
+  //  2. there are partition keys that need to be copied
+  // TODO: if the slots that need to be updated are very sparse (very few NULL slots
+  // or very few partition keys), updating all the tuple memory is probably bad
+  if (!conjuncts_true || template_tuple_ != NULL) {
+    if (template_tuple_ != NULL) {
+      memcpy(tuple_, template_tuple_, tuple_byte_size_);
+    } else {
+      tuple_->Init(tuple_byte_size_);
+    }
+  }
+
+  if (error_in_row) return Status("Conversion from string failed");
+  return Status::OK;
+}
+
+Status HdfsSequenceScanner::ReadFileHeader() {
+  RETURN_IF_ERROR(SerDeUtils::ReadBytes(buffered_byte_stream_.get(),
+      sizeof(SEQFILE_VERSION_HEADER), &scratch_buf_));
+  if (memcmp(&scratch_buf_[0], SEQFILE_VERSION_HEADER, sizeof(SEQFILE_VERSION_HEADER))) {
+    if (runtime_state_->LogHasSpace()) {
+      runtime_state_->error_stream() << "Invalid SEQFILE_VERSION_HEADER: '"
+         << SerDeUtils::HexDump(&scratch_buf_[0], sizeof(SEQFILE_VERSION_HEADER)) << "'";
+    }
+    return Status("Invalid SEQFILE_VERSION_HEADER");
+  }
+
+  RETURN_IF_ERROR(SerDeUtils::ReadText(buffered_byte_stream_.get(), &scratch_buf_));
+  if (strncmp(&scratch_buf_[0],
+      HdfsSequenceScanner::SEQFILE_KEY_CLASS_NAME, scratch_buf_.size())) {
+    if (runtime_state_->LogHasSpace()) {
+      runtime_state_->error_stream() << "Invalid SEQFILE_KEY_CLASS_NAME: '"
+         << string(&scratch_buf_[0], strlen(HdfsSequenceScanner::SEQFILE_KEY_CLASS_NAME))
+         << "'";
+    }
+    return Status("Invalid SEQFILE_KEY_CLASS_NAME");
+  }
+
+  RETURN_IF_ERROR(SerDeUtils::ReadText(buffered_byte_stream_.get(), &scratch_buf_));
+  if (strncmp(&scratch_buf_[0], HdfsSequenceScanner::SEQFILE_VALUE_CLASS_NAME,
+      scratch_buf_.size())) {
+    if (runtime_state_->LogHasSpace()) {
+      runtime_state_->error_stream() << "Invalid SEQFILE_VALUE_CLASS_NAME: '"
+         << string(
+             scratch_buf_[0], strlen(HdfsSequenceScanner::SEQFILE_VALUE_CLASS_NAME))
+         << "'";
+    }
+    return Status("Invalid SEQFILE_VALUE_CLASS_NAME");
+  }
+
+  RETURN_IF_ERROR(SerDeUtils::ReadBoolean(buffered_byte_stream_.get(), &is_compressed_));
+  RETURN_IF_ERROR(
+      SerDeUtils::ReadBoolean(buffered_byte_stream_.get(), &is_blk_compressed_));
+
+  if (is_compressed_) {
+    RETURN_IF_ERROR(
+        SerDeUtils::ReadText(buffered_byte_stream_.get(), &compression_codec_));
+    RETURN_IF_ERROR(SetCompression());
+  }
+  
+  RETURN_IF_ERROR(ReadFileHeaderMetadata());
+  RETURN_IF_ERROR(ReadSync());
+  return Status::OK;
+}
+
+Status HdfsSequenceScanner::SetCompression() {
+  if (strncmp(&compression_codec_[0], HdfsSequenceScanner::SEQFILE_DEFAULT_COMPRESSION,
+      compression_codec_.size()) == 0 ||
+      strncmp(&compression_codec_[0], HdfsSequenceScanner::SEQFILE_GZIP_COMPRESSION,
+      compression_codec_.size()) == 0) {
+    decompress_block_function_ = DecompressGzipBlock;
+
+  } else if (strncmp(&compression_codec_[0],
+      HdfsSequenceScanner::SEQFILE_BZIP2_COMPRESSION, compression_codec_.size()) == 0) {
+    decompress_block_function_ = DecompressBzip2Block;
+
+  } else if (strncmp(&compression_codec_[0],
+      HdfsSequenceScanner::SEQFILE_SNAPPY_COMPRESSION, compression_codec_.size()) == 0) {
+    decompress_block_function_ = DecompressSnappyBlock;
+  } else {
+    if (runtime_state_->LogHasSpace()) {
+      runtime_state_->error_stream() << "Unknown Codec: " 
+         << string(&compression_codec_[0], compression_codec_.size());
+    }
+    return Status("Unknown Codec");
+  }
+
+  return Status::OK;
+}
+
+Status HdfsSequenceScanner::ReadFileHeaderMetadata() {
+  int map_size = 0;
+  RETURN_IF_ERROR(SerDeUtils::ReadInt(buffered_byte_stream_.get(), &map_size));
+
+  for (int i = 0; i < map_size; ++i) {
+    RETURN_IF_ERROR(SerDeUtils::SkipText(buffered_byte_stream_.get()));
+    RETURN_IF_ERROR(SerDeUtils::SkipText(buffered_byte_stream_.get()));
+
+  }
+  return Status::OK;
+}
+
+Status HdfsSequenceScanner::ReadSync() {
+  RETURN_IF_ERROR(
+      SerDeUtils::ReadBytes(buffered_byte_stream_.get(), SYNC_HASH_SIZE, sync_));
+  return Status::OK;
+}
+
+Status HdfsSequenceScanner::ReadBlockHeader(bool* sync) {
+  RETURN_IF_ERROR(
+      SerDeUtils::ReadInt(buffered_byte_stream_.get(), &current_block_length_));
+  *sync = false;
+  if (current_block_length_ == HdfsSequenceScanner::SYNC_MARKER) {
+    RETURN_IF_ERROR(CheckSync(true, NULL));
+    RETURN_IF_ERROR(
+        SerDeUtils::ReadInt(buffered_byte_stream_.get(), &current_block_length_));
+    *sync = true;
+  }
+  RETURN_IF_ERROR(SerDeUtils::ReadInt(buffered_byte_stream_.get(), &current_key_length_));
+  DCHECK_EQ(current_key_length_, SEQFILE_KEY_LENGTH); 
+  return Status::OK;
+}
+
+Status HdfsSequenceScanner::CheckSync(bool report_error, bool* verified) {
+  char hash[SYNC_HASH_SIZE];
+  RETURN_IF_ERROR(SerDeUtils::ReadBytes(buffered_byte_stream_.get(),
+      HdfsSequenceScanner::SYNC_HASH_SIZE, hash));
+
+  bool sync_compares_equal = memcmp(static_cast<void*>(hash),
+      static_cast<void*>(sync_), HdfsSequenceScanner::SYNC_HASH_SIZE) == 0;
+  if (report_error && !sync_compares_equal) {
+    if (runtime_state_->LogHasSpace()) {
+      runtime_state_->error_stream() << "Bad sync hash in current HdfsSequenceScanner: "
+           << buffered_byte_stream_->GetLocation() << "." << endl
+           << "Expected: '"
+           << SerDeUtils::HexDump(sync_, HdfsSequenceScanner::SYNC_HASH_SIZE)
+           << "'" << endl
+           << "Actual:   '"
+           << SerDeUtils::HexDump(hash, HdfsSequenceScanner::SYNC_HASH_SIZE)
+           << "'" << endl;
+    }
+    return Status("Bad sync hash");
+  }
+  if (verified != NULL) *verified = sync_compares_equal;
+  return Status::OK;
+}
+
+
+Status HdfsSequenceScanner::ReadCompressedBlock(RuntimeState* state) {
+  int dummy;
+  // Read the sync indicator and check the sync block.
+  RETURN_IF_ERROR(SerDeUtils::ReadInt(buffered_byte_stream_.get(), &dummy));
+  RETURN_IF_ERROR(CheckSync(true, NULL));
+  
+  RETURN_IF_ERROR(SerDeUtils::ReadVLong(buffered_byte_stream_.get(),
+      &num_buffered_records_in_compressed_block_));
+  
+  // Read the compressed key length and key buffers, we don't need them.
+  RETURN_IF_ERROR(SerDeUtils::SkipText(buffered_byte_stream_.get()));
+  RETURN_IF_ERROR(SerDeUtils::SkipText(buffered_byte_stream_.get()));
+
+  // Read the compressed value length buffer. We don't need these either since the
+  // records are in Text format with length included.
+  RETURN_IF_ERROR(SerDeUtils::SkipText(buffered_byte_stream_.get()));
+
+  // Read the compressed value buffer from the unbuffered stream.
+  int block_size;
+  RETURN_IF_ERROR(SerDeUtils::ReadVInt(buffered_byte_stream_.get(), &block_size));
+  RETURN_IF_ERROR(buffered_byte_stream_->SyncParent());
+  {
+    COUNTER_SCOPED_TIMER(scan_node_->scanner_timer());
+    RETURN_IF_ERROR(
+        SerDeUtils::ReadBytes(unbuffered_byte_stream_, block_size, &scratch_buf_));
+  }
+  RETURN_IF_ERROR(buffered_byte_stream_->SeekToParent());
+
+  bool too_small = false;
+  do {
+    if (too_small || has_string_slots_ || unparsed_data_buffer_ == NULL) {
+      unparsed_data_buffer_ =
+          unparsed_data_buffer_pool_->Allocate(unparsed_data_buffer_size_);
+    }
+
+    RETURN_IF_ERROR(decompress_block_function_(block_size,
+        &scratch_buf_[0], unparsed_data_buffer_size_, unparsed_data_buffer_, &too_small));
+
+    if (too_small) {
+      unparsed_data_buffer_size_ *= 2;
+    }
+  } while (too_small);
+
+  next_record_in_compressed_block_ = unparsed_data_buffer_;
+  return Status::OK;
+}
--- a/be/src/exec/hdfs-sequence-scanner.h
+++ b/be/src/exec/hdfs-sequence-scanner.h
@@ -0,0 +1,329 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+#ifndef IMPALA_EXEC_HDFS_SEQUENCE_SCANNER_H
+#define IMPALA_EXEC_HDFS_SEQUENCE_SCANNER_H
+
+#include "exec/hdfs-scanner.h"
+#include "exec/buffered-byte-stream.h"
+#include "exec/delimited-text-parser.h"
+
+namespace impala {
+
+// This scanner parses Sequence file located in HDFS, and writes the
+// content as tuples in the Impala in-memory representation of data, e.g.
+// (tuples, rows, row batches).
+// org.apache.hadoop.io.SequenceFile is the original SequenceFile implementation
+// and should be viewed as the canonical definition of this format. If
+// anything is unclear in this file you should consult the code in
+// org.apache.hadoop.io.SequenceFile.
+//
+// The following is a pseudo-BNF grammar for SequenceFile. Comments are prefixed
+// with dashes:
+//
+// seqfile ::=
+//   <file-header>
+//   <record-block>+
+//
+// record-block ::=
+//   <record>+
+//   <file-sync-hash>
+// 
+// file-header ::=
+//   <file-version-header>
+//   <file-key-class-name>
+//   <file-value-class-name>
+//   <file-is-compressed>
+//   <file-is-block-compressed>
+//   [<file-compression-codec-class>]
+//   <file-header-metadata>
+//   <file-sync-field>
+//
+// file-version-header ::= Byte[4] {'S', 'E', 'Q', 6}
+//
+// -- The name of the Java class responsible for reading the key buffer
+//
+// file-key-class-name ::=
+//   Text {"org.apache.hadoop.io.BytesWritable"}
+//
+// -- The name of the Java class responsible for reading the value buffer
+//
+// file-value-class-name ::=
+//   Text {"org.apache.hadoop.io.Text"}
+//
+// -- Boolean variable indicating whether or not the file uses compression
+// -- for key/values in this file
+//
+// file-is-compressed ::= Byte[1]
+//
+// -- A boolean field indicating whether or not the file is block compressed.
+//
+// file-is-block-compressed ::= Byte[1] {false}
+//
+// -- The Java class name of the compression codec iff <file-is-compressed>
+// -- is true. The named class must implement
+// -- org.apache.hadoop.io.compress.CompressionCodec.
+// -- The expected value is org.apache.hadoop.io.compress.GzipCodec.
+//
+// file-compression-codec-class ::= Text
+//
+// -- A collection of key-value pairs defining metadata values for the
+// -- file. The Map is serialized using standard JDK serialization, i.e.
+// -- an Int corresponding to the number of key-value pairs, followed by
+// -- Text key and value pairs.
+//
+// file-header-metadata ::= Map<Text, Text>
+//
+// -- A 16 byte marker that is generated by the writer. This marker appears
+// -- at regular intervals at the beginning of records or record blocks
+// -- intended to enable readers to skip to a random part of the file
+// -- the sync hash is preceeded by a length of -1, refered to as the sync marker
+//
+// file-sync-hash ::= Byte[16]
+//
+// -- Records are all of one type as determined by the compression bits in the header
+//
+// record ::=
+//   <uncompressed-record>     |
+//   <block-compressed-record> |
+//   <record-compressed-record> 
+//
+// uncompressed-record ::=
+//   <record-length>
+//   <key-length>
+//   <key>
+//   <value>
+//
+// record-compressed-record ::=
+//   <record-length>
+//   <key-length>
+//   <key>
+//   <compressed-value>
+//
+// block-compessed-record ::=
+//   <file-sync-field>
+//   <key-lengths-block-size>
+//   <key-lengths-block>
+//   <keys-block-size>
+//   <keys-block>
+//   <value-lengths-block-size>
+//   <value-lengths-block>
+//   <values-block-size>
+//   <values-block>
+//
+// record-length := Int
+// key-length := Int
+// keys-lengths-block-size> := Int
+// value-lengths-block-size> := Int
+//
+// keys-block :: = Byte[keys-block-size]
+// values-block :: = Byte[values-block-size]
+//
+// -- The key-lengths and value-lengths blocks are are a sequence of lengths encoded
+// -- in ZeroCompressedInteger (VInt) format.
+// 
+// key-lengths-block :: = Byte[key-lengths-block-size]
+// value-lengths-block :: = Byte[value-lengths-block-size]
+//
+// Byte ::= An eight-bit byte
+// 
+// VInt ::= Variable length integer. The high-order bit of each byte
+// indicates whether more bytes remain to be read. The low-order seven
+// bits are appended as increasingly more significant bits in the
+// resulting integer value.
+// 
+// Int ::= A four-byte integer in big-endian format.
+// 
+// Text ::= VInt, Chars (Length prefixed UTF-8 characters)
+
+class HdfsSequenceScanner : public HdfsScanner {
+ public:
+  HdfsSequenceScanner(HdfsScanNode* scan_node, const TupleDescriptor* tuple_desc,
+                      Tuple* template_tuple, MemPool* tuple_pool);
+
+  virtual Status Prepare(RuntimeState* state, ByteStream* byte_stream);
+  virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eosr);
+
+ private:
+  // Sync indicator
+  const static int SYNC_MARKER = -1;
+
+  // Size of the sync hash field
+  const static int SYNC_HASH_SIZE = 16;
+
+  // The key class name located in the SeqFile Header.
+  // This is always "org.apache.hadoop.io.BytesWritable"
+  static const char* const SEQFILE_KEY_CLASS_NAME;
+
+  // The value class name located in the SeqFile Header.
+  // This is always "org.apache.hadoop.io.Text"
+  static const char* const SEQFILE_VALUE_CLASS_NAME;
+
+  // The four byte SeqFile version header present at the beginning of every
+  // SeqFile file: {'S', 'E', 'Q', 6}
+  static const uint8_t SEQFILE_VERSION_HEADER[4];
+
+  // The key should always be 4 bytes.
+  static const int SEQFILE_KEY_LENGTH;
+
+  // The names of the Codecs we support.
+  static const char* const SEQFILE_DEFAULT_COMPRESSION;
+  static const char* const SEQFILE_GZIP_COMPRESSION;
+  static const char* const SEQFILE_BZIP2_COMPRESSION;
+  static const char* const SEQFILE_SNAPPY_COMPRESSION;
+
+  // Size to read when searching for the first record in a split
+  // This probably ought to be a derived number from the environment.
+  const static int FILE_BLOCK_SIZE = 4096;
+
+  // Initialises any state required at the beginning of a new scan range. 
+  // If not at the begining of the file it will trigger a search for the
+  // next sync block, where the scan will start.
+  virtual Status InitCurrentScanRange(RuntimeState* state,
+                                      HdfsScanRange* scan_range, ByteStream* byte_stream);
+
+  // Writes the intermediate parsed data in to slots, outputting
+  // tuples to row_batch as they complete.
+  // Input Parameters:
+  //  state: Runtime state into which we log errors
+  //  row_batch: Row batch into which to write new tuples
+  //  num_fields: Total number of fields contained in parsed_data_
+  // Input/Output Parameters
+  //  row_idx: Index of current row in row_batch.
+  Status WriteFields(RuntimeState* state, RowBatch*
+                     row_batch, int num_fields, int* row_idx);
+
+  // Find the first record of a scan range.
+  // If the scan range is not at the beginning of the file then this is called to
+  // move the buffered_byte_stream_ seek point to before the next sync field.
+  // If there is none present then the buffered_byte_stream_ will be beyond the
+  // end of the scan range and the scan will end.
+  Status FindFirstRecord(RuntimeState *state);
+
+  // Read the current Sequence file header from the begining of the file.
+  // Verifies:
+  //   version number
+  //   key and data classes
+  // Sets:
+  //   is_compressed_
+  //   is_blk_compressed_
+  //   compression_codec_
+  //   sync_
+  Status ReadFileHeader();
+
+  // Read the Sequence file Header Metadata section in the current file.
+  // We don't use this information, so it is just skipped.
+  Status ReadFileHeaderMetadata();
+
+  // Read and validate a RowGroup sync field.
+  Status ReadSync();
+
+  // Read the record header, return if there was a sync block.
+  // Sets:
+  //   current_block_length_
+  Status ReadBlockHeader(bool* sync);
+
+  // Find first record in a scan range.
+  // Sets the current_byte_stream_ to this record.
+  Status FindFirstRecord();
+
+  // Read compressed blocks and iterate through the records in each block.
+  // Output:
+  //   record_ptr: ponter to the record.
+  //   record_len: length of the record
+  //   eors: set to true if we are at the end of the scan range.
+  Status GetRecordFromCompressedBlock(RuntimeState *state,
+                                      char** record_ptr, int64_t* record_len, bool* eors);
+
+  // Read compressed or uncompressed records from the byte stream into memory
+  // in unparsed_data_buffer_pool_.
+  // Output:
+  //   record_ptr: ponter to the record.
+  //   record_len: length of the record
+  //   eors: set to true if we are at the end of the scan range.
+  Status GetRecord(char** record_ptr, int64_t* record_len, bool* eosr);
+
+  // Read a compressed block.
+  // Decompress to unparsed_data_buffer_ allocated from unparsed_data_buffer_pool_.
+  Status ReadCompressedBlock(RuntimeState *state);
+
+  // sets decompress_block_function_ by reading the compression_codec_.
+  Status SetCompression();
+
+  // read and verify a sync block.
+  // report_error:if false we are scanning for the begining of a range and
+  //              we don't want to report errors.
+  // verified: output true if there was a correct sync hash.
+  Status CheckSync(bool report_error, bool *verified);
+
+  // a buffered byte stream to wrap the stream we are passed.
+  boost::scoped_ptr<BufferedByteStream> buffered_byte_stream_;
+
+  // Helper class for picking fields and rows from delimited text.
+  boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
+  std::vector<DelimitedTextParser::FieldLocation> field_locations_;
+
+  // Parser to find the first record. This uses different delimiters.
+  boost::scoped_ptr<DelimitedTextParser> find_first_parser_;
+
+  // Helper class for converting text fields to internal types.
+  boost::scoped_ptr<TextConverter> text_converter_;
+
+  // Function pointer to the decompression code for the selected codec.
+  // Uncompresses data from 'in' to 'out'.  
+  // Sets too_small to true if output_length is not big enought to hold uncompress the
+  // data.
+  Status (*decompress_block_function_) (int input_length, char* in,
+                                        int output_length, char* out, bool* too_small);
+
+  // Runtime state for reporting file parsing errors.
+  RuntimeState* runtime_state_;
+
+  // The original byte stream we are passed.
+  ByteStream* unbuffered_byte_stream_;
+
+  // The sync hash read in from the file header.
+  char sync_[SYNC_HASH_SIZE];
+
+  // File compression or not.
+  bool is_compressed_;
+  // Block compression or not.
+  bool is_blk_compressed_;
+  
+  // Compression codec specified in the Sequence file Header as a SerDe Text.
+  std::vector<char> compression_codec_;
+  
+  // Location (file name) of previous scan range.
+  std::string previous_location_;
+
+  // Byte offset of the scan range.
+  int end_of_scan_range_;
+
+  // Length of the current sequence file block (or record).
+  int current_block_length_;
+
+  // Length of the current key.  This should always be SEQFILE_KEY_LENGTH.
+  int current_key_length_;
+
+  // Pool for allocating the unparsed_data_buffer_pool_
+  boost::scoped_ptr<MemPool> unparsed_data_buffer_pool_;
+
+  // Buffer for data read from HDFS or from decompressing the HDFS data.
+  char* unparsed_data_buffer_;
+
+  // Size of the unparsed_data_buffer_.
+  int64_t unparsed_data_buffer_size_;
+
+  // Number of buffered records unparsed_data_buffer_ from block compressed data.
+  int64_t num_buffered_records_in_compressed_block_;
+
+  // Next record from block compressed data.
+  char* next_record_in_compressed_block_;
+
+  // Temporary buffer used for reading headers and compressed data.
+  // It will grow to be big enough for the largest compressed record or block.
+  std::vector<char> scratch_buf_;
+};
+
+} // namespace impala
+
+#endif // IMPALA_EXEC_HDFS_SEQUENCE_SCANNER_H
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -2,13 +2,13 @@

 #include "runtime/runtime-state.h"
 #include "exec/hdfs-text-scanner.h"
-#include "util/string-parser.h"
 #include "runtime/tuple.h"
 #include "runtime/row-batch.h"
-#include "runtime/timestamp-value.h"
 #include "exec/text-converter.h"
 #include "util/cpu-info.h"
 #include "exec/hdfs-scan-node.h"
+#include "exec/delimited-text-parser.h"
+#include "exec/text-converter.inline.h"

 using namespace impala;
 using namespace std;
@@ -19,8 +19,8 @@ HdfsTextScanner::HdfsTextScanner(HdfsScanNode* scan_node,
      boundary_mem_pool_(new MemPool()),
      boundary_row_(boundary_mem_pool_.get()),
      boundary_column_(boundary_mem_pool_.get()),
-      column_idx_(0),
      slot_idx_(0),
+      delimited_text_parser_(NULL),
      text_converter_(NULL),
      byte_buffer_pool_(new MemPool()),
      byte_buffer_ptr_(NULL),
@@ -32,10 +32,18 @@ HdfsTextScanner::HdfsTextScanner(HdfsScanNode* scan_node,
  const HdfsTableDescriptor* hdfs_table =
    static_cast<const HdfsTableDescriptor*>(tuple_desc->table_desc());

-  tuple_delim_ = hdfs_table->line_delim();
-  field_delim_ = hdfs_table->field_delim();
-  collection_item_delim_ = hdfs_table->collection_delim();
-  escape_char_ = hdfs_table->escape_char();
+  text_converter_.reset(new TextConverter(hdfs_table->escape_char(), tuple_pool_));
+
+  char field_delim = hdfs_table->field_delim();
+  char collection_delim = hdfs_table->collection_delim();
+  if (scan_node_->materialized_slots().size() == 0) {
+    field_delim = '\0';
+    collection_delim = '\0';
+  }
+  delimited_text_parser_.reset(new DelimitedTextParser(scan_node->column_to_slot_index(),
+      scan_node->GetNumPartitionKeys(), scan_node->parse_time_counter(),
+      hdfs_table->line_delim(), field_delim, collection_delim,
+      hdfs_table->escape_char()));
 }

 Status HdfsTextScanner::InitCurrentScanRange(RuntimeState* state,
@@ -49,7 +57,6 @@ Status HdfsTextScanner::InitCurrentScanRange(RuntimeState* state,
  // entries 0 through N-1 in column_idx_to_slot_idx. If this changes, we will need
  // another layer of indirection to map text-file column indexes onto the
  // column_idx_to_slot_idx table used below.
-  column_idx_ = scan_node_->GetNumPartitionKeys();
  slot_idx_ = 0;

  // Pre-load byte buffer with size of entire range, if possible
@@ -59,18 +66,18 @@ Status HdfsTextScanner::InitCurrentScanRange(RuntimeState* state,

  boundary_column_.Clear();
  boundary_row_.Clear();
+  delimited_text_parser_->ParserReset();

  // Offset may not point to tuple boundary
  if (scan_range->offset != 0) {
-    int first_tuple_offset = FindFirstTupleStart(byte_buffer_, byte_buffer_read_size_);
+    int first_tuple_offset =
+      delimited_text_parser_->FindFirstTupleStart(byte_buffer_, byte_buffer_read_size_);
    DCHECK_LE(first_tuple_offset, min(state->file_buffer_size(),
-                                      current_range_remaining_len_));
+         current_range_remaining_len_));
    byte_buffer_ptr_ += first_tuple_offset;
    current_range_remaining_len_ -= first_tuple_offset;
  }

-  last_char_is_escape_ = false;
-  current_column_has_escape_ = false;
  return Status::OK;
 }

@@ -88,7 +95,7 @@ Status HdfsTextScanner::FillByteBuffer(RuntimeState* state, int64_t size) {
  {
    COUNTER_SCOPED_TIMER(scan_node_->scanner_timer());
    RETURN_IF_ERROR(current_byte_stream_->Read(byte_buffer_, read_size,
-                                               &byte_buffer_read_size_));
+         &byte_buffer_read_size_));
  }
  byte_buffer_end_ = byte_buffer_ + byte_buffer_read_size_;
  byte_buffer_ptr_ = byte_buffer_;
@@ -101,106 +108,19 @@ Status HdfsTextScanner::Prepare(RuntimeState* state, ByteStream* byte_stream) {

  current_range_remaining_len_ = 0;

-  text_converter_.reset(new TextConverter(escape_char_, tuple_pool_));
-
  // Allocate the scratch space for two pass parsing.  The most fields we can go
  // through in one parse pass is the batch size (tuples) * the number of fields per tuple
  // TODO: This should probably be based on L2/L3 cache sizes (as should the batch size)
  field_locations_.resize(state->batch_size() * scan_node_->materialized_slots().size());

-  // Initialize the sse search registers.
-  // TODO: is this safe to do in prepare?  Not sure if the compiler/system
-  // will manage these registers for us.
-  char tmp[SSEUtil::CHARS_PER_128_BIT_REGISTER];
-  memset(tmp, 0, sizeof(tmp));
-  tmp[0] = tuple_delim_;
-  xmm_tuple_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(tmp));
-  tmp[0] = escape_char_;
-  xmm_escape_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(tmp));
-  tmp[0] = field_delim_;
-  tmp[1] = collection_item_delim_;
-  xmm_field_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(tmp));
-
  return Status::OK;
 }

-// Find the start of the first full tuple in buffer by looking for the end of
-// the previous tuple.
-// TODO: most of this is not tested.  We need some tailored data to exercise the boundary
-// cases
-int HdfsTextScanner::FindFirstTupleStart(char* buffer, int len) {
-  int tuple_start = 0;
-  char* buffer_start = buffer;
-  while (tuple_start < len) {
-    if (CpuInfo::Instance()->IsSupported(CpuInfo::SSE4_2)) {
-      __m128i xmm_buffer, xmm_tuple_mask;
-      while (len - tuple_start >= SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-        // TODO: can we parallelize this as well?  Are there multiple sse execution units?
-        // Load the next 16 bytes into the xmm register and do strchr for the
-        // tuple delimiter.
-        xmm_buffer = _mm_loadu_si128(reinterpret_cast<__m128i*>(buffer));
-        xmm_tuple_mask = _mm_cmpistrm(xmm_tuple_search_, xmm_buffer, SSEUtil::STRCHR_MODE);
-        int tuple_mask = _mm_extract_epi16(xmm_tuple_mask, 0);
-        if (tuple_mask != 0) {
-          for (int i = 0; i < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++i) {
-            if ((tuple_mask & SSEUtil::SSE_BITMASK[i]) != 0) {
-              tuple_start += i + 1;
-              buffer += i + 1;
-              goto end;
-            }
-          }
-        }
-        tuple_start += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-        buffer += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      }
-    } else {
-      for (int i = tuple_start; i < len; ++i) {
-        char c = *buffer++;
-        if (c == tuple_delim_) {
-          tuple_start = i + 1;
-          goto end;
-        }
-      }
-    }
-
-  end:
-    if (escape_char_ != '\0') {
-      // Scan backwards for escape characters.  We do this after
-      // finding the tuple break rather than during the (above)
-      // forward scan to make the forward scan faster.  This will
-      // perform worse if there are many characters right before the
-      // tuple break that are all escape characters, but that is
-      // unlikely.
-      int num_escape_chars = 0;
-      int before_tuple_end = tuple_start - 2;
-      for (; before_tuple_end >= 0; --before_tuple_end) {
-        if (buffer_start[before_tuple_end] == escape_char_) {
-          ++num_escape_chars;
-        } else {
-          break;
-        }
-      }
-      // TODO: This sucks.  All the preceding characters before the tuple delim were
-      // escape characters.  We need to read from the previous block to see what to do.
-      DCHECK_GT(before_tuple_end, 0);
-
-      // An even number of escape characters means they cancel out and this tuple break
-      // is *not* escaped.
-      if (num_escape_chars % 2 == 0) {
-        return tuple_start;
-      }
-    } else {
-      return tuple_start;
-    }
-  }
-  return tuple_start;
-}
-
 Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool* eosr) {
  AllocateTupleBuffer(row_batch);
  // Index into current row in row_batch.
  int row_idx = RowBatch::INVALID_ROW_INDEX;
-  int first_materialised_col_idx = scan_node_->GetNumPartitionKeys();
+  char* col_start = NULL;

  // This loop contains a small state machine:
  //  1. byte_buffer_ptr_ != byte_buffer_end_: no need to read more, process what's in
@@ -220,25 +140,25 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*
          // TODO: log an error, we have an incomplete tuple at the end of the file
          current_range_remaining_len_ = 0;
          slot_idx_ = 0;
-          column_idx_ = first_materialised_col_idx;
+          delimited_text_parser_->ParserReset();
          boundary_column_.Clear();
+          byte_buffer_ptr_ = NULL;
          continue;
        }
      } else {
        if (current_range_remaining_len_ == 0) {
          // Check if a tuple is straddling this block and the next:
          //  1. boundary_column_ is not empty
-          //  2. column_idx_ != first_materialised_col_idx if we are halfway through
-          //  reading a tuple
+          //  2. if we are halfway through reading a tuple: !AtStart.
+          //  3. We have are in the middle of the first column
          // We need to continue scanning until the end of the tuple.  Note that
-          // boundary_column_ will be empty if we are on a column boundary, but could still
-          // be inside a tuple. Similarly column_idx_ could be first_materialised_col_idx
-          // if we are in the middle of reading the first column. Therefore we need both
-          // checks.
-          // We cannot use slot_idx, since that is incremented only if we are
-          // materialising slots, which is not true for e.g. count(*)
+          // boundary_column_ will be empty if we are on a column boundary,
+          // but could still be inside a tuple. Similarly column_idx_ could be
+          // first_materialised_col_idx if we are in the middle of reading the first
+          // column. Therefore we need both checks.
          // TODO: test that hits this condition.
-          if (!boundary_column_.Empty() || column_idx_ != first_materialised_col_idx) {
+        if (!boundary_column_.Empty() || !delimited_text_parser_->AtTupleStart() ||
+            (col_start != NULL && col_start != byte_buffer_ptr_)) {
            current_range_remaining_len_ = -1;
            continue;
          }
@@ -262,7 +182,6 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*
    int previous_num_rows = num_rows_returned_;
    // With two pass approach, we need to save some of the state before any of the file
    // was parsed
-    char* col_start = NULL;
    char* line_start = byte_buffer_ptr_;
    int num_tuples = 0;
    int num_fields = 0;
@@ -272,8 +191,10 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*
    if (current_range_remaining_len_ < 0) {
      max_tuples = 1;
    }
-    RETURN_IF_ERROR(ParseFileBuffer(max_tuples, &num_tuples, &num_fields, &col_start));
-
+    char* previous_buffer_ptr = byte_buffer_ptr_;
+    RETURN_IF_ERROR(delimited_text_parser_->ParseFieldLocations(max_tuples,
+         byte_buffer_end_ - byte_buffer_ptr_, &byte_buffer_ptr_,
+         &field_locations_, &num_tuples, &num_fields, &col_start));
    int bytes_processed = byte_buffer_ptr_ - line_start;
    current_range_remaining_len_ -= bytes_processed;

@@ -281,18 +202,28 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*

    if (scan_node_->materialized_slots().size() != 0) {
      if (num_fields != 0) {
+        // There can be one partial tuple which returned no more fields from this buffer.
+        DCHECK_LE(num_tuples, num_fields + 1);
+        if (!boundary_column_.Empty()) {
+          CopyBoundaryField(&field_locations_[0]);
+          boundary_column_.Clear();
+        }
        RETURN_IF_ERROR(WriteFields(state, row_batch, num_fields, &row_idx, &line_start));
-      } 
+      } else if (col_start != previous_buffer_ptr) {
+        // If we saw any delimiters col_start will move, clear the boundry_row_.
+        boundary_row_.Clear();
+      }
    } else if (num_tuples != 0) {
+      // If we are doing count(*) then we return tuples only containing partition keys
      boundary_row_.Clear();
      line_start = byte_buffer_ptr_;
      RETURN_IF_ERROR(WriteTuples(state, row_batch, num_tuples, &row_idx));
    }

    // Cannot reuse file buffer if there are non-copied string slots materialized
-    // TODO: If the tuple data contains very sparse string slots, we waste a lot of memory.
-    // Instead, we should consider copying the tuples to a compact new buffer in this
-    // case.
+    // TODO: If the tuple data contains very sparse string slots, we waste a lot of
+    // memory.  Instead, we should consider copying the tuples to a compact new buffer
+    // in this case.
    if (num_rows_returned_ > previous_num_rows && has_string_slots_) {
      reuse_byte_buffer_ = false;
    }
@@ -301,8 +232,8 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*
      break;
    }

-    // Save contents that are split across files
-    if (col_start != byte_buffer_ptr_) {
+    // Save contents that are split across buffers if we are going to return  this column
+    if (col_start != byte_buffer_ptr_ && delimited_text_parser_->ReturnCurrentColumn()) {
      boundary_column_.Append(col_start, byte_buffer_ptr_ - col_start);
      boundary_row_.Append(line_start, byte_buffer_ptr_ - line_start);
    }
@@ -324,12 +255,12 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*
    // GetNext() call.
    if (row_batch->IsFull()) {
      *eosr = false;
-      break;
+
+      DCHECK(delimited_text_parser_->AtTupleStart());
+      return Status::OK;
    }
  }

-  DCHECK_EQ(column_idx_, first_materialised_col_idx);
-
  // This is the only non-error return path for this function.  There are
  // two return paths:
  // 1. EOS: limit is reached or scan range is complete
@@ -341,11 +272,12 @@ Status HdfsTextScanner::GetNext(RuntimeState* state, RowBatch* row_batch, bool*
  }
  row_batch->tuple_data_pool()->AcquireData(tuple_pool_, !*eosr);

+  DCHECK(delimited_text_parser_->AtTupleStart());
  return Status::OK;
 }

 void HdfsTextScanner::ReportRowParseError(RuntimeState* state, char* line_start,
-    int len) {
+                                          int len) {
  ++num_errors_in_file_;
  if (state->LogHasSpace()) {
    state->error_stream() << "file: " << current_byte_stream_->GetLocation() << endl;
@@ -377,8 +309,20 @@ Status HdfsTextScanner::WriteFields(RuntimeState* state, RowBatch* row_batch,

  // Loop through all the parsed_data and parse out the values to slots
  for (int n = 0; n < num_fields; ++n) {
+    int need_escape = false;
+    int len = field_locations_[n].len;
+    if (len < 0) {
+      len = -len; 
+      need_escape = true;
+    }
+    next_line_offset += (len + 1);

-    WriteSlots(state, n, &next_line_offset);
+    boundary_row_.Clear();
+    if (!text_converter_->WriteSlot(state,
+        scan_node_->materialized_slots()[slot_idx_].second, tuple_,
+        field_locations_[n].start, len, false, need_escape).ok()) {
+      error_in_row_ = true;
+    }

    // If slot_idx_ equals the number of materialized slots, we have completed
    // parsing the tuple.  At this point we can:
@@ -444,90 +388,7 @@ Status HdfsTextScanner::WriteFields(RuntimeState* state, RowBatch* row_batch,
  return Status::OK;
 }

-
-Status HdfsTextScanner::WriteSlots(RuntimeState* state, int tuple_idx,
-    int* next_line_offset) {
-  boundary_row_.Clear();
-  SlotDescriptor* slot_desc = scan_node_->materialized_slots()[slot_idx_].second;
-  char* data = field_locations_[tuple_idx].start;
-  int len = field_locations_[tuple_idx].len;
-  bool need_escape = false;
-  if (len < 0) {
-    len = -len;
-    need_escape = true;
-  }
-  next_line_offset += (len + 1);
-  if (len == 0) {
-    tuple_->SetNull(slot_desc->null_indicator_offset());
-  } else {
-    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
-    void* slot = tuple_->GetSlot(slot_desc->tuple_offset());
-
-    // Parse the raw-text data.  At this point:
-    switch (slot_desc->type()) {
-    case TYPE_STRING:
-      reinterpret_cast<StringValue*>(slot)->ptr = data;
-      reinterpret_cast<StringValue*>(slot)->len = len;
-      if (need_escape) {
-        text_converter_->UnescapeString(reinterpret_cast<StringValue*>(slot));
-      }
-      break;
-    case TYPE_BOOLEAN:
-      *reinterpret_cast<bool*>(slot) =
-        StringParser::StringToBool(data, len, &parse_result);
-      break;
-    case TYPE_TINYINT:
-      *reinterpret_cast<int8_t*>(slot) =
-        StringParser::StringToInt<int8_t>(data, len, &parse_result);
-      break;
-    case TYPE_SMALLINT:
-      *reinterpret_cast<int16_t*>(slot) =
-        StringParser::StringToInt<int16_t>(data, len, &parse_result);
-      break;
-    case TYPE_INT:
-      *reinterpret_cast<int32_t*>(slot) =
-        StringParser::StringToInt<int32_t>(data, len, &parse_result);
-      break;
-    case TYPE_BIGINT:
-      *reinterpret_cast<int64_t*>(slot) =
-        StringParser::StringToInt<int64_t>(data, len, &parse_result);
-      break;
-    case TYPE_FLOAT:
-      *reinterpret_cast<float*>(slot) =
-        StringParser::StringToFloat<float>(data, len, &parse_result);
-      break;
-    case TYPE_DOUBLE:
-      *reinterpret_cast<double*>(slot) =
-        StringParser::StringToFloat<double>(data, len, &parse_result);
-      break;
-    case TYPE_TIMESTAMP: {
-      string strbuf(data, len);
-      *reinterpret_cast<TimestampValue*>(slot) = TimestampValue(strbuf);
-      break;
-    }
-    default:
-      DCHECK(false) << "bad slot type: " << TypeToString(slot_desc->type());
-      break;
-    }
-
-    // TODO: add warning for overflow case
-    if (parse_result == StringParser::PARSE_FAILURE) {
-      error_in_row_ = true;
-      tuple_->SetNull(slot_desc->null_indicator_offset());
-      if (state->LogHasSpace()) {
-        state->error_stream() << "Error converting column: "
-                              << slot_desc->col_pos() << " TO "
-                              // TODO: num_partition_keys_ no longer visible to scanner.
-                              // << slot_desc->col_pos() - num_partition_keys_ << " TO "
-                              << TypeToString(slot_desc->type()) << endl;
-      }
-    }
-  }
-
-  return Status::OK;
-}
-
-void HdfsTextScanner::CopyBoundaryField(FieldLocations* data) {
+void HdfsTextScanner::CopyBoundaryField(DelimitedTextParser::FieldLocation* data) {
  const int total_len = data->len + boundary_column_.Size();
  char* str_data = reinterpret_cast<char*>(tuple_pool_->Allocate(total_len));
  memcpy(str_data, boundary_column_.str().ptr, boundary_column_.Size());
@@ -536,212 +397,3 @@ void HdfsTextScanner::CopyBoundaryField(FieldLocations* data) {
  data->len = total_len;
 }

-// Updates the values in the field and tuple masks, escaping them if necessary.
-// If the character at n is an escape character, then delimiters(tuple/field/escape
-// characters) at n+1 don't count.
-inline void ProcessEscapeMask(int escape_mask, bool* last_char_is_escape, int* field_mask,
-    int* tuple_mask) {
-  // Escape characters can escape escape characters.
-  bool first_char_is_escape = *last_char_is_escape;
-  bool escape_next = first_char_is_escape;
-  for (int i = 0; i < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++i) {
-    if (escape_next) {
-      escape_mask &= ~SSEUtil::SSE_BITMASK[i];
-    }
-    escape_next = escape_mask & SSEUtil::SSE_BITMASK[i];
-  }
-
-  // Remember last character for the next iteration
-  *last_char_is_escape = escape_mask &
-    SSEUtil::SSE_BITMASK[SSEUtil::CHARS_PER_128_BIT_REGISTER - 1];
-
-  // Shift escape mask up one so they match at the same bit index as the tuple and field mask
-  // (instead of being the character before) and set the correct first bit
-  escape_mask = escape_mask << 1 | first_char_is_escape;
-
-  // If escape_mask[n] is true, then tuple/field_mask[n] is escaped
-  *tuple_mask &= ~escape_mask;
-  *field_mask &= ~escape_mask;
-}
-
-// SSE optimized raw text file parsing.  SSE4_2 added an instruction (with 3 modes) for
-// text processing.  The modes mimic strchr, strstr and strcmp.  For text parsing, we can
-// leverage the strchr functionality.
-//
-// The instruction operates on two sse registers:
-//  - the needle (what you are searching for)
-//  - the haystack (where you are searching in)
-// Both registers can contain up to 16 characters.  The result is a 16-bit mask with a bit
-// set for each character in the haystack that matched any character in the needle.
-// For example:
-//  Needle   = 'abcd000000000000' (we're searching for any a's, b's, c's d's)
-//  Haystack = 'asdfghjklhjbdwwc' (the raw string)
-//  Result   = '101000000001101'
-Status HdfsTextScanner::ParseFileBuffer(int max_tuples, int* num_tuples, int* num_fields,
-    char** column_start) {
-  COUNTER_SCOPED_TIMER(scan_node_->parse_time_counter());
-
-  // Start of this batch.
-  *column_start = byte_buffer_ptr_;
-
-  // To parse using SSE, we:
-  //  1. Load into different sse registers the different characters we need to search for
-  //      - tuple breaks, field breaks, escape characters
-  //  2. Load 16 characters at a time into the sse register
-  //  3. Use the SSE instruction to do strchr on those 16 chars, the result is a bitmask
-  //  4. Compute the bitmask for tuple breaks, field breaks and escape characters.
-  //  5. If there are escape characters, fix up the matching masked bits in the field/tuple mask
-  //  6. Go through the mask bit by bit and write the parsed data.
-
-  // xmm registers:
-  //  - xmm_buffer: the register holding the current (16 chars) we're working on from the
-  //  - file
-  //  - xmm_tuple_search_: the tuple search register.  Only contains the tuple_delim char.
-  //  - xmm_field_search_: the field search register.  Contains field delim and
-  //        collection_item delim_char
-  //  - xmm_escape_search_: the escape search register. Only contains escape char
-  //  - xmm_tuple_mask: the result of doing strchr for the tuple delim
-  //  - xmm_field_mask: the result of doing strchr for the field delim
-  //  - xmm_escape_mask: the result of doing strchr for the escape char
-  __m128i xmm_buffer, xmm_tuple_mask, xmm_field_mask, xmm_escape_mask;
-
-  // Length remaining of buffer to process
-  int remaining_len = byte_buffer_end_ - byte_buffer_ptr_;
-
-  const vector<int>& column_idx_to_slot_idx_ = scan_node_->column_to_slot_index();
-
-  if (CpuInfo::Instance()->IsSupported(CpuInfo::SSE4_2)) {
-    while (remaining_len >= SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-      // Load the next 16 bytes into the xmm register
-      xmm_buffer = _mm_loadu_si128(reinterpret_cast<__m128i*>(byte_buffer_ptr_));
-
-      // Do the strchr for tuple and field breaks
-      // TODO: can we parallelize this as well?  Are there multiple sse execution units?
-      xmm_tuple_mask = _mm_cmpistrm(xmm_tuple_search_, xmm_buffer, SSEUtil::STRCHR_MODE);
-      xmm_field_mask = _mm_cmpistrm(xmm_field_search_, xmm_buffer, SSEUtil::STRCHR_MODE);
-
-      // The strchr sse instruction returns the result in the lower bits of the sse
-      // register.  Since we only process 16 characters at a time, only the lower 16 bits
-      // can contain non-zero values.
-      // _mm_extract_epi16 will extract 16 bits out of the xmm register.  The second
-      // parameter specifies which 16 bits to extract (0 for the lowest 16 bits).
-      int tuple_mask = _mm_extract_epi16(xmm_tuple_mask, 0);
-      int field_mask = _mm_extract_epi16(xmm_field_mask, 0);
-      int escape_mask = 0;
-
-      // If the table does not use escape characters, skip processing for it.
-      if (escape_char_ != '\0') {
-        xmm_escape_mask = _mm_cmpistrm(xmm_escape_search_, xmm_buffer,
-                                       SSEUtil::STRCHR_MODE);
-        escape_mask = _mm_extract_epi16(xmm_escape_mask, 0);
-        ProcessEscapeMask(escape_mask, &last_char_is_escape_, &field_mask, &tuple_mask);
-      }
-
-      // Tuple delims are automatically field delims
-      field_mask |= tuple_mask;
-
-      if (field_mask != 0) {
-        // Loop through the mask and find the tuple/column offsets
-        for (int n = 0; n < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++n) {
-          if (escape_mask != 0) {
-            current_column_has_escape_ =
-                current_column_has_escape_ || (escape_mask & SSEUtil::SSE_BITMASK[n]);
-          }
-
-          if (field_mask & SSEUtil::SSE_BITMASK[n]) {
-            char* column_end = byte_buffer_ptr_ + n;
-            // TODO: apparently there can be columns not in the schema which should be
-            // ignored.  This does not handle that.
-            if (column_idx_to_slot_idx_[column_idx_] != HdfsScanNode::SKIP_COLUMN) {
-              DCHECK_LT(*num_fields, field_locations_.size());
-              // Found a column that needs to be parsed, write the start/len to
-              // 'parsed_data_'
-              const int len = column_end - *column_start;
-              field_locations_[*num_fields].start = *column_start;
-              if (!current_column_has_escape_) {
-                field_locations_[*num_fields].len = len;
-              } else {
-                field_locations_[*num_fields].len = -len;
-              }
-              if (!boundary_column_.Empty()) {
-                CopyBoundaryField(&field_locations_[*num_fields]);
-              }
-              ++(*num_fields);
-            }
-            current_column_has_escape_ = false;
-            boundary_column_.Clear();
-            *column_start = column_end + 1;
-            ++column_idx_;
-          }
-
-          if (tuple_mask & SSEUtil::SSE_BITMASK[n]) {
-            column_idx_ = scan_node_->GetNumPartitionKeys();
-            ++(*num_tuples);
-            if (*num_tuples == max_tuples) {
-              byte_buffer_ptr_ += (n + 1);
-              last_char_is_escape_ = false;
-              return Status::OK;
-            }
-          }
-        }
-      } else {
-        current_column_has_escape_ = (current_column_has_escape_ || escape_mask);
-      }
-
-      remaining_len -= SSEUtil::CHARS_PER_128_BIT_REGISTER;
-      byte_buffer_ptr_ += SSEUtil::CHARS_PER_128_BIT_REGISTER;
-    }
-  }
-
-  // Handle the remaining characters
-  while (remaining_len > 0) {
-    bool new_tuple = false;
-    bool new_col = false;
-
-    if (!last_char_is_escape_) {
-      if (*byte_buffer_ptr_ == tuple_delim_) {
-        new_tuple = true;
-        new_col = true;
-      } else if (*byte_buffer_ptr_ == field_delim_
-                 || *byte_buffer_ptr_ == collection_item_delim_) {
-        new_col = true;
-      }
-    }
-    if (*byte_buffer_ptr_ == escape_char_) {
-      current_column_has_escape_ = true;
-      last_char_is_escape_ = !last_char_is_escape_;
-    } else {
-      last_char_is_escape_ = false;
-    }
-
-    if (new_col) {
-      if (column_idx_to_slot_idx_[column_idx_] != HdfsScanNode::SKIP_COLUMN) {
-        DCHECK_LT(*num_fields, field_locations_.size());
-        // Found a column that needs to be parsed, write the start/len to 'parsed_data_'
-        field_locations_[*num_fields].start = *column_start;
-        field_locations_[*num_fields].len = byte_buffer_ptr_ - *column_start;
-        if (current_column_has_escape_) field_locations_[*num_fields].len *= -1;
-        if (!boundary_column_.Empty()) {
-          CopyBoundaryField(&field_locations_[*num_fields]);
-        }
-        ++(*num_fields);
-      }
-      boundary_column_.Clear();
-      current_column_has_escape_ = false;
-      *column_start = byte_buffer_ptr_ + 1;
-      ++column_idx_;
-    }
-
-    if (new_tuple) {
-      column_idx_ = scan_node_->GetNumPartitionKeys();
-      ++(*num_tuples);
-    }
-
-    --remaining_len;
-    ++byte_buffer_ptr_;
-
-    if (*num_tuples == max_tuples) return Status::OK;
-  }
-
-  return Status::OK;
-}
--- a/be/src/exec/hdfs-text-scanner.h
+++ b/be/src/exec/hdfs-text-scanner.h
@@ -1,9 +1,10 @@
 // Copyright (c) 2012 Cloudera, Inc. All rights reserved.

-#ifndef IMPALA_HDFS_TEXT_SCANNER_H_
-#define IMPALA_HDFS_TEXT_SCANNER_H_
+#ifndef IMPALA_EXEC_HDFS_TEXT_SCANNER_H
+#define IMPALA_EXEC_HDFS_TEXT_SCANNER_H

 #include "exec/hdfs-scanner.h"
+#include "exec/delimited-text-parser.h"

 namespace impala {

@@ -22,6 +23,40 @@ class HdfsTextScanner : public HdfsScanner {
  const static char DELIM_INIT = -1;
  const static int NEXT_BLOCK_READ_SIZE = 1024; //bytes

+  // Prepends field data that was from the previous file buffer (This field straddled two
+  // file buffers).  'data' already contains the pointer/len from the current file buffer,
+  // boundary_column_ contains the beginning of the data from the previous file
+  // buffer. This function will allocate a new string from the tuple pool, concatenate the
+  // two pieces and update 'data' to contain the new pointer/len.
+  void CopyBoundaryField(DelimitedTextParser::FieldLocation* data);
+
+  // Initialises any state required at the beginning of a new scan
+  // range. Here this means resetting escaping state.
+  virtual Status InitCurrentScanRange(RuntimeState* state, HdfsScanRange* scan_range,
+                                      ByteStream* byte_stream);
+
+  // Writes the intermediate parsed data in to slots, outputting
+  // tuples to row_batch as they complete.
+  // Input Parameters:
+  //  state: Runtime state into which we log errors
+  //  row_batch: Row batch into which to write new tuples
+  //  first_column_idx: The col idx for the raw file associated with parsed_data_[0]
+  //  num_fields: Total number of fields contained in parsed_data_
+  // Input/Output Parameters
+  //  row_idx: Index of current row in row_batch.
+  //  line_start: pointer to within byte_buffer where the current line starts.  This is
+  //              used for better error reporting
+  Status WriteFields(RuntimeState* state, RowBatch* row_batch, int num_fields,
+                     int* row_idx, char** line_start);
+
+  // Appends the current file and line to the RuntimeState's error log (if there's space).
+  // Also, increments num_errors_in_file_.
+  void ReportRowParseError(RuntimeState* state, char* line_start, int len);
+
+  // Reads up to size bytes from byte_stream into byte_buffer_, and
+  // updates byte_buffer_read_size_
+  Status FillByteBuffer(RuntimeState* state, int64_t size);
+
  // Memory pool for allocations into the boundary row / column
  boost::scoped_ptr<MemPool> boundary_mem_pool_;

@@ -33,12 +68,15 @@ class HdfsTextScanner : public HdfsScanner {
  // Helper string for dealing with columns that span file blocks.
  StringBuffer boundary_column_;

-  // Index to keep track of the current current column in the current file
-  int column_idx_;
-
  // Index into materialized_slots_ for the next slot to output for the current tuple.
  int slot_idx_;

+  // Helper class for picking fields and rows from delimited text.
+  boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
+
+  // Return field locations from the Delimited Text Parser.
+  std::vector<DelimitedTextParser::FieldLocation> field_locations_;
+
  // Helper class for converting text to other types;
  boost::scoped_ptr<TextConverter> text_converter_;

@@ -70,108 +108,10 @@ class HdfsTextScanner : public HdfsScanner {
  // logged.
  bool error_in_row_;

-  // Intermediate structure used for two pass parsing approach. In the first pass,
-  // FieldLocations structs are filled out and contain where all the fields start and
-  // their lengths.  In the second pass, the FieldLocations is used to write out the
-  // slots. We want to keep this struct as small as possible.
-  struct FieldLocations {
-    //start of field
-    char* start;
-    // Encodes the length and whether or not this fields needs to be unescaped.
-    // If len < 0, then the field needs to be unescaped.
-    int len;
-  };
-  std::vector<FieldLocations> field_locations_;
-
-  // SSE(xmm) register containing the tuple search character.
-  __m128i xmm_tuple_search_;
-
-  // SSE(xmm) register containing the field search character.
-  __m128i xmm_field_search_;
-
-  // SSE(xmm) register containing the escape search character.
-  __m128i xmm_escape_search_;
-
-  // Character delimiting tuples.
-  char tuple_delim_;
-
-  // Character delimiting fields (to become slots).
-  char field_delim_;
-
-  // Character delimiting collection items (to become slots).
-  char collection_item_delim_;
-
-  // Escape character.
-  char escape_char_;
-
-  // Whether or not the previous character was the escape character
-  bool last_char_is_escape_;
-
-  // Whether or not the current column has an escape character in it
-  // (and needs to be unescaped)
-  bool current_column_has_escape_;
-
  // Tracks the number of bytes left to read in the current scan
  // range. When <= 0, GetNext will prepare to exit.
  int current_range_remaining_len_;

-  // Prepends field data that was from the previous file buffer (This field straddled two
-  // file buffers).  'data' already contains the pointer/len from the current file buffer,
-  // boundary_column_ contains the beginning of the data from the previous file
-  // buffer. This function will allocate a new string from the tuple pool, concatenate the
-  // two pieces and update 'data' to contain the new pointer/len.
-  void CopyBoundaryField(FieldLocations* data);
-
-  // Parses the current file_buffer_ for the field and tuple breaks.
-  // This function will write the field start & len to 'parsed_data_'
-  // which can then be written out to tuples.
-  // This function will use SSE ("Intel x86 instruction set extension
-  // 'Streaming Simd Extension') if the hardware supports SSE4.2
-  // instructions.  SSE4.2 added string processing instructions that
-  // allow for processing 16 characters at a time.  Otherwise, this
-  // function will walk the file_buffer_ character by character.
-  // Input Parameters:
-  //  max_tuples: The maximum number of tuples that should be parsed.
-  //              This is used to control how the batching works.
-  // Output Parameters:
-  //  num_tuples: Number of tuples parsed
-  //  num_fields: Number of materialized fields parsed
-  //  col_start: pointer within file_buffer_ where the next field starts
-  Status ParseFileBuffer(int max_tuples, int* num_tuples, int* num_fields,
-                         char** column_start);
-
-  // Initialises any state required at the beginning of a new scan
-  // range. Here this means resetting escaping state.
-  virtual Status InitCurrentScanRange(RuntimeState* state, HdfsScanRange* scan_range,
-                                      ByteStream* byte_stream);
-
-  // Searches for the offset of the first full tuple in the supplied buffer.
-  int FindFirstTupleStart(char* buffer, int len);
-
-  // Writes the intermediate parsed data in to slots, outputting
-  // tuples to row_batch as they complete.
-  // Input Parameters:
-  //  state: Runtime state into which we log errors
-  //  row_batch: Row batch into which to write new tuples
-  //  first_column_idx: The col idx for the raw file associated with parsed_data_[0]
-  //  num_fields: Total number of fields contained in parsed_data_
-  // Input/Output Parameters
-  //  row_idx: Index of current row in row_batch.
-  //  line_start: pointer to within byte_buffer where the current line starts.  This is
-  //              used for better error reporting
-  Status WriteFields(RuntimeState* state, RowBatch* row_batch, int num_fields,
-                     int* row_idx, char** line_start);
-
-  Status WriteSlots(RuntimeState* state,  int tuple_idx, int* next_line_offset);
-
-  // Appends the current file and line to the RuntimeState's error log (if there's space).
-  // Also, increments num_errors_in_file_.
-  void ReportRowParseError(RuntimeState* state, char* line_start, int len);
-
-  // Reads up to size bytes from byte_stream into byte_buffer_, and
-  // updates byte_buffer_read_size_
-  Status FillByteBuffer(RuntimeState* state, int64_t size);
-
 };

 }
--- a/be/src/exec/serde-utils.cc
+++ b/be/src/exec/serde-utils.cc
@@ -23,16 +23,10 @@ Status SerDeUtils::ReadBoolean(ByteStream* byte_stream, bool* boolean) {
 }

 Status SerDeUtils::ReadInt(ByteStream* byte_stream, int32_t* integer) {
-  uint8_t buf[sizeof(int)];
+  char buf[sizeof(int32_t)];
  RETURN_IF_ERROR(SerDeUtils::ReadBytes(byte_stream, sizeof(int32_t),
    reinterpret_cast<char*>(&buf)));
-
-  *integer =
-      ((buf[0] & 0xff) << 24)
-      | ((buf[1] & 0xff) << 16)
-      | ((buf[2] & 0xff) << 8)
-      |  (buf[3] & 0xff);
-  return Status::OK;
+  return SerDeUtils::ReadInt(buf, integer);
 }

 Status SerDeUtils::ReadVLong(ByteStream* byte_stream, int64_t* vlong) {
@@ -145,6 +139,13 @@ Status SerDeUtils::ReadText(ByteStream* byte_stream, std::vector<char>* text) {
  return Status::OK;
 }

+Status SerDeUtils::SkipText(ByteStream* byte_stream) {
+  int32_t length;
+  RETURN_IF_ERROR(ReadVInt(byte_stream, &length));
+  RETURN_IF_ERROR(SkipBytes(byte_stream, length));
+  return Status::OK;
+}
+
 std::string SerDeUtils::HexDump(const char* buf, int64_t length) {
  std::stringstream ss;
  ss << std::hex;
--- a/be/src/exec/serde-utils.h
+++ b/be/src/exec/serde-utils.h
@@ -33,6 +33,18 @@ public:
  // Equivalent to java.io.DataInput.readInt()
  static Status ReadInt(ByteStream* byte_stream, int32_t* integer);

+  // Read an Integer from a buffer.
+  static Status ReadInt(char* in_buf, int32_t* integer) {
+    // TODO: all buffers should be typed to uint8_t*
+    uint8_t* buf = reinterpret_cast<uint8_t*>(in_buf);
+    *integer =
+        (buf[0] << 24)
+        | (buf[1] << 16)
+        | (buf[2] << 8)
+        |  buf[3];
+    return Status::OK;
+  }
+
  // Read a variable-length Long value written using Writable serialization.
  // Ref: org.apache.hadoop.io.WritableUtils.readVLong()
  static Status ReadVLong(ByteStream* byte_stream, int64_t* vlong);
@@ -62,6 +74,9 @@ public:
  // Ref: org.apache.hadoop.io.WritableUtils.readString()
  static Status ReadText(ByteStream* byte_stream, std::vector<char>* text);

+  // Skip this text object.
+  static Status SkipText(ByteStream* byte_stream);
+
  // Dump the first length bytes of buf to a Hex string.
  static std::string HexDump(const char* buf, int64_t length);

--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -1,10 +1,12 @@
 // Copyright (c) 2011 Cloudera, Inc. All rights reserved.

+#include "runtime/runtime-state.h"
 #include "text-converter.h"
 #include <boost/algorithm/string.hpp>
 #include <glog/logging.h>
 #include "runtime/descriptors.h"
 #include "runtime/tuple.h"
+#include "util/string-parser.h"
 #include "runtime/string-value.h"
 #include "runtime/timestamp-value.h"
 #include "runtime/mem-pool.h"
@@ -18,103 +20,6 @@ TextConverter::TextConverter(char escape_char, MemPool* var_len_pool)
    var_len_pool_(var_len_pool) {
 }

-bool TextConverter::ConvertAndWriteSlotBytes(const char* begin, const char* end, Tuple* tuple,
-    const SlotDescriptor* slot_desc, bool copy_string, bool unescape_string) {
-  // Check for null columns.
-  // The below code implies that unquoted empty strings
-  // such as "...,,..." become NULLs, and not empty strings.
-  if (begin == end) {
-    tuple->SetNull(slot_desc->null_indicator_offset());
-    return true;
-  }
-  // Will be changed in conversion functions for error checking.
-  char* end_ptr = const_cast<char*>(end);
-  // TODO: Handle out-of-range conditions.
-  switch (slot_desc->type()) {
-    case TYPE_BOOLEAN: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      if (iequals(begin, "true")) {
-        *reinterpret_cast<char*>(slot) = true;
-      } else if (iequals(begin, "false")) {
-        *reinterpret_cast<char*>(slot) = false;
-      } else {
-        // Inconvertible value. Set to NULL after switch statement.
-        end_ptr = const_cast<char*>(begin);
-      }
-      break;
-    }
-    case TYPE_TINYINT: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      *reinterpret_cast<int8_t*>(slot) =
-          static_cast<int8_t>(strtol(begin, &end_ptr, 0));
-      break;
-    }
-    case TYPE_SMALLINT: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      *reinterpret_cast<int16_t*>(slot) =
-          static_cast<int16_t>(strtol(begin, &end_ptr, 0));
-      break;
-    }
-    case TYPE_INT: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      *reinterpret_cast<int32_t*>(slot) =
-          static_cast<int32_t>(strtol(begin, &end_ptr, 0));
-      break;
-    }
-    case TYPE_BIGINT: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      *reinterpret_cast<int64_t*>(slot) = strtol(begin, &end_ptr, 0);
-      break;
-    }
-    case TYPE_FLOAT: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      *reinterpret_cast<float*>(slot) =
-          static_cast<float>(strtod(begin, &end_ptr));
-      break;
-    }
-    case TYPE_DOUBLE: {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      *reinterpret_cast<double*>(slot) = strtod(begin, &end_ptr);
-      break;
-    }
-    case TYPE_STRING: {
-      StringValue* slot = tuple->GetStringSlot(slot_desc->tuple_offset());
-      const char* data_start = NULL;
-      slot->len = end - begin;
-      data_start = begin;
-
-      if (!copy_string) {
-        DCHECK(!unescape_string);
-        slot->ptr = const_cast<char*>(data_start);
-      } else {
-        char* slot_data = reinterpret_cast<char*>(var_len_pool_->Allocate(slot->len));
-        if (unescape_string) {
-          UnescapeString(data_start, slot_data, &slot->len);
-        } else {
-          memcpy(slot_data, data_start, slot->len);
-        }
-        slot->ptr = slot_data;
-      }
-      break;
-    }
-    case TYPE_TIMESTAMP : {
-      void* slot = tuple->GetSlot(slot_desc->tuple_offset());
-      string strbuf(begin, end - begin);
-      *reinterpret_cast<TimestampValue*>(slot) = TimestampValue(strbuf);
-      break;
-    }
-    default:
-      DCHECK(false) << "bad slot type: " << TypeToString(slot_desc->type());
-  }
-  // Set NULL if inconvertible.
-  if (*end_ptr != '\0' && slot_desc->type() != TYPE_STRING) {
-    tuple->SetNull(slot_desc->null_indicator_offset());
-    return false;
-  }
-
-  return true;
-}
-
 void TextConverter::UnescapeString(StringValue* value) {
  char* new_data = reinterpret_cast<char*>(var_len_pool_->Allocate(value->len));
  UnescapeString(value->ptr, new_data, &value->len);
--- a/be/src/exec/text-converter.h
+++ b/be/src/exec/text-converter.h
@@ -3,29 +3,32 @@
 #ifndef IMPALA_EXEC_TEXT_CONVERTER_H
 #define IMPALA_EXEC_TEXT_CONVERTER_H

+#include "runtime/runtime-state.h"
 namespace impala {

 class Tuple;
 class SlotDescriptor;
 class MemPool;
 class StringValue;
+class Status;

-// Helper class for dealing with text data, e.g., converting text data to numeric types, etc.
+// Helper class for dealing with text data, e.g., converting text data to
+// numeric types, etc.
 class TextConverter {
 public:
  TextConverter(char escape_char, MemPool* var_len_pool);

-  // Converts slot data (begin, end) into type of slot_desc,
+  // Converts slot data, of length 'len',  into type of slot_desc,
  // and writes the result into the tuples's slot.
  // copy_string indicates whether we need to make a separate copy of the string data:
  // For regular unescaped strings, we point to the original data in the file_buf_.
  // For regular escaped strings,
  // we copy an its unescaped string into a separate buffer and point to it.
  // Unsuccessful conversions are turned into NULLs.
-  // Returns true if value was converted and written successfully, false otherwise.
-  bool ConvertAndWriteSlotBytes(const char* begin,
-      const char* end, Tuple* tuple, const SlotDescriptor* slot_desc,
-      bool copy_string, bool unescape_string);
+  // Returns Status::OK if the value was written successfully, error otherwise
+  Status WriteSlot(RuntimeState* state, const SlotDescriptor* slot_desc,
+                    Tuple* tuple, const char* data, int len,
+                    bool copy_string, bool need_escape);

  // Removes escape characters from len characters of the null-terminated string src,
  // and copies the unescaped string into dest, changing *len to the unescaped length.
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+
+#include "runtime/runtime-state.h"
+#include "text-converter.h"
+#include <boost/algorithm/string.hpp>
+#include <glog/logging.h>
+#include "runtime/descriptors.h"
+#include "runtime/tuple.h"
+#include "util/string-parser.h"
+#include "runtime/string-value.h"
+#include "runtime/timestamp-value.h"
+#include "runtime/mem-pool.h"
+
+using namespace boost;
+using namespace impala;
+using namespace std;
+
+// TODO: Needs to be codegen rather than inline.
+inline Status TextConverter::WriteSlot(RuntimeState* state,
+                                       const SlotDescriptor* slot_desc, Tuple* tuple,
+                                       const char* data, int len,
+                                       bool copy_string, bool need_escape) {
+
+  bool fail = false;
+
+  if (len == 0) {
+    tuple->SetNull(slot_desc->null_indicator_offset());
+  } else {
+    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+    void* slot = tuple->GetSlot(slot_desc->tuple_offset());
+
+    // Parse the raw-text data. Translate the text string to internal format.
+    switch (slot_desc->type()) {
+      case TYPE_STRING: {
+        StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
+        str_slot->ptr = const_cast<char*>(data);
+        str_slot->len = len;
+        if (copy_string || need_escape) {
+          char* slot_data = reinterpret_cast<char*>(var_len_pool_->Allocate(len));
+          if (need_escape) {
+            UnescapeString(data, slot_data, &str_slot->len);
+          } else {
+            memcpy(slot_data, data, str_slot->len);
+          }
+          str_slot->ptr = slot_data;
+        }
+        break;
+      }
+      case TYPE_BOOLEAN:
+        *reinterpret_cast<bool*>(slot) =
+          StringParser::StringToBool(data, len, &parse_result);
+        break;
+      case TYPE_TINYINT:
+        *reinterpret_cast<int8_t*>(slot) =
+          StringParser::StringToInt<int8_t>(data, len, &parse_result);
+        break;
+      case TYPE_SMALLINT:
+        *reinterpret_cast<int16_t*>(slot) =
+          StringParser::StringToInt<int16_t>(data, len, &parse_result);
+        break;
+      case TYPE_INT:
+        *reinterpret_cast<int32_t*>(slot) =
+          StringParser::StringToInt<int32_t>(data, len, &parse_result);
+        break;
+      case TYPE_BIGINT:
+        *reinterpret_cast<int64_t*>(slot) =
+          StringParser::StringToInt<int64_t>(data, len, &parse_result);
+        break;
+      case TYPE_FLOAT:
+        *reinterpret_cast<float*>(slot) =
+          StringParser::StringToFloat<float>(data, len, &parse_result);
+        break;
+      case TYPE_DOUBLE:
+        *reinterpret_cast<double*>(slot) =
+          StringParser::StringToFloat<double>(data, len, &parse_result);
+        break;
+      case TYPE_TIMESTAMP: {
+        string strbuf(data, len);
+        *reinterpret_cast<TimestampValue*>(slot) = TimestampValue(strbuf);
+        break;
+      }
+      default:
+        DCHECK(false) << "bad slot type: " << TypeToString(slot_desc->type());
+        break;
+    }
+
+    // TODO: add warning for overflow case
+    if (parse_result == StringParser::PARSE_FAILURE) {
+      fail = true;
+      tuple->SetNull(slot_desc->null_indicator_offset());
+      if (state->LogHasSpace()) {
+        state->error_stream()
+            << "Error converting column: " << slot_desc->col_pos() << " TO "
+            // TODO: num_partition_keys_ no longer visible to scanner.
+            // << slot_desc->col_pos() - num_partition_keys_ << " TO "
+            << TypeToString(slot_desc->type()) << "Data is: " << string(data,len) << endl;
+      }
+    }
+  }
+
+  if (fail) return Status("Conversion from text failed");
+  return Status::OK;
+}
+
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -64,6 +64,7 @@ target_link_libraries(expr-test
  gtest
  ${Boost_LIBRARIES}
  ${LLVM_MODULE_LIBS}
+  -lz -lbz2 -lsnappy
 )

 add_test(expr-test ${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/expr-test)
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -32,6 +32,7 @@ target_link_libraries(Runtime
  Exec
  TestUtil
  ${Boost_LIBRARIES}
+  -lz -lbz2 -lsnappy
 )

 add_executable(mem-pool-test
@@ -98,6 +99,7 @@ target_link_libraries(data-stream-test
  gtest
  ${Boost_LIBRARIES}
  ${LLVM_MODULE_LIBS}
+  -lz -lbz2 -lsnappy
 )

 add_test(mem-pool-test ${BUILD_OUTPUT_ROOT_DIRECTORY}/runtime/mem-pool-test)
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -231,6 +231,7 @@ Status DescriptorTbl::Create(ObjectPool* pool, const TDescriptorTable& thrift_tb
    switch (tdesc.tableType) {
      case TTableType::HDFS_TEXT_TABLE:
      case TTableType::HDFS_RCFILE_TABLE:
+      case TTableType::HDFS_SEQFILE_TABLE:
        desc = pool->Add(new HdfsTableDescriptor(tdesc));
        break;
      case TTableType::HBASE_TABLE:
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -46,6 +46,7 @@ target_link_libraries(backend
  gflagsstatic
 #  tcmallocstatic
  pprofstatic
+  -lz -lbz2 -lsnappy
 )

 add_executable(runquery 
@@ -84,6 +85,7 @@ target_link_libraries(runquery
  gflagsstatic
  tcmallocstatic
  pprofstatic
+  -lz
 )

 add_executable(impalad
--- a/be/src/testutil/CMakeLists.txt
+++ b/be/src/testutil/CMakeLists.txt
@@ -19,6 +19,7 @@ target_link_libraries(TestUtil
  ImpalaThrift
  glogstatic
  gflagsstatic  
+  -lz -lbz2 -lsnappy
 )

 add_executable(query-jitter
--- a/bin/make_release.sh
+++ b/bin/make_release.sh
@@ -13,7 +13,7 @@ make -j
 cd $IMPALA_HOME

 # Run sample queries - outputs .gcda files 
-be/build/release/service/runquery -query="select count(field) from grep1gb where field like '%xyz%';select sourceIP, SUM(adRevenue) FROM uservisits GROUP by sourceIP order by SUM(adRevenue) desc limit 10;select uv.sourceip, avg(r.pagerank), sum(uv.adrevenue) as totalrevenue from uservisits uv join rankings r on (r.pageurl = uv.desturl) where uv.visitdate > '1999-01-01' and uv.visitdate < '2000-01-01' group by uv.sourceip order by totalrevenue desc limit 1" -profile_output_file=""
+be/build/release/service/runquery -query="select count(field) from grep1gb where field like '%xyz%';select count(field) from grep1gb_seq_snap where field like '%xyz%';select sourceIP, SUM(adRevenue) FROM uservisits_seq GROUP by sourceIP order by SUM(adRevenue) desc limit 10;select sourceIP, SUM(adRevenue) FROM uservisits GROUP by sourceIP order by SUM(adRevenue) desc limit 10;select uv.sourceip, avg(r.pagerank), sum(uv.adrevenue) as totalrevenue from uservisits uv join rankings r on (r.pageurl = uv.desturl) where uv.visitdate > '1999-01-01' and uv.visitdate < '2000-01-01' group by uv.sourceip order by totalrevenue desc limit 1" -profile_output_file=""

 # Build again using the PGO data
 cmake -DCMAKE_BUILD_TYPE=PROFILE_BUILD .
--- a/bin/run_benchmark.py
+++ b/bin/run_benchmark.py
@@ -214,10 +214,17 @@ queries = [
  ["select count(*) from grep1gb", 5, 5],
  ["select count(field) from grep1gb", 0, 5],
  ["select count(field) from grep1gb where field like '%xyz%'", 0, 5],
+  ["select count(*) from grep1gb_seq_snap", 5, 5],
+  ["select count(field) from grep1gb_seq_snap", 0, 5],
+  ["select count(field) from grep1gb_seq_snap where field like '%xyz%'", 0, 5],
  ["select uv.sourceip, avg(r.pagerank), sum(uv.adrevenue) as totalrevenue "\
   "from uservisits uv join rankings r on (r.pageurl = uv.desturl) "\
   "where uv.visitdate > '1999-01-01' and uv.visitdate < '2000-01-01' "\
   "group by uv.sourceip order by totalrevenue desc limit 1", 5, 5],
+  ["select uv.sourceip, avg(r.pagerank), sum(uv.adrevenue) as totalrevenue "\
+   "from uservisits_seq uv join rankings r on (r.pageurl = uv.desturl) "\
+   "where uv.visitdate > '1999-01-01' and uv.visitdate < '2000-01-01' "\
+   "group by uv.sourceip order by totalrevenue desc limit 1", 5, 5],
  ["select sourceIP, SUM(adRevenue) FROM uservisits GROUP by sourceIP "\
   "order by SUM(adRevenue) desc limit 10", 5, 5],
  ["select pageRank, pageURL from rankings where pageRank > 10 "\
--- a/common/thrift/Descriptors.thrift
+++ b/common/thrift/Descriptors.thrift
@@ -19,6 +19,7 @@ struct TSlotDescriptor {
 enum TTableType {
  HDFS_TEXT_TABLE,
  HDFS_RCFILE_TABLE,
+  HDFS_SEQFILE_TABLE,
  HBASE_TABLE
 }

--- a/common/thrift/PlanNodes.thrift
+++ b/common/thrift/PlanNodes.thrift
@@ -9,6 +9,7 @@ include "Types.thrift"
 enum TPlanNodeType {  
  HDFS_TEXT_SCAN_NODE,
  HDFS_RCFILE_SCAN_NODE,
+  HDFS_SEQFILE_SCAN_NODE,
  HBASE_SCAN_NODE,
  HASH_JOIN_NODE,
  AGGREGATION_NODE,
--- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsSeqFileTable.java
+++ b/fe/src/main/java/com/cloudera/impala/catalog/HdfsSeqFileTable.java
@@ -0,0 +1,41 @@
+// Copyright (c) 2011 Cloudera, Inc. All rights reserved.
+package com.cloudera.impala.catalog;
+
+import java.util.List;
+
+import com.cloudera.impala.analysis.Expr;
+import com.cloudera.impala.planner.DataSink;
+import com.cloudera.impala.thrift.TTableDescriptor;
+import com.cloudera.impala.thrift.TTableType;
+
+/**
+ * Sequence Table.
+ *
+ */
+public class HdfsSeqFileTable extends HdfsTable {
+
+  // Input format class for Sequence tables read by Hive.
+  private static final String sequenceFileInputFormat =
+      "org.apache.hadoop.mapred.SequenceFileInputFormat";
+
+  protected HdfsSeqFileTable(TableId id, Db db, String name, String owner) {
+    super(id, db, name, owner);
+  }
+
+  @Override
+  public TTableDescriptor toThrift() {
+    TTableDescriptor tTable = super.toThrift();
+    tTable.setTableType(TTableType.HDFS_SEQFILE_TABLE);
+    return tTable;
+  }
+
+  public static boolean isSeqFileTable(org.apache.hadoop.hive.metastore.api.Table msTbl) {
+    return msTbl.getSd().getInputFormat().equals(sequenceFileInputFormat);
+  }
+
+  @Override
+  public DataSink createDataSink(List<Expr> partitionKeyExprs, boolean overwrite) {
+    throw new UnsupportedOperationException("HdfsSeqFile Output Sink not implemented.");
+  }
+}
+
--- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java
+++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java
@@ -25,9 +25,6 @@ import com.google.common.collect.Maps;
 * for the clustering columns, those two rows are most likely colocated. Note that this
 * is more general than Hive's CLUSTER BY ... INTO BUCKETS clause (which partitions
 * a key range into a fixed number of buckets).
- *
- * Current subclasses are HdfsTextTable, HdfsRCFileTable, and HBaseTable.
- *
 */
 public abstract class Table {
  protected final TableId id;
@@ -81,7 +78,7 @@ public abstract class Table {
   * @param db
   * @param tblName
   * @return
-   *         new instance of Hdfs[Text|RCFile]Table or HBaseTable
+   *         new instance of Hdfs[Text|RCFile|Seq]Table or HBaseTable
   *         null if loading table failed
   */
  public static Table load(TableId id, HiveMetaStoreClient client, Db db,
@@ -98,6 +95,8 @@ public abstract class Table {
        table = new HdfsTextTable(id, db, tblName, msTbl.getOwner());
      } else if (HdfsRCFileTable.isRCFileTable(msTbl)) {
        table = new HdfsRCFileTable(id, db, tblName, msTbl.getOwner());
+      } else if (HdfsSeqFileTable.isSeqFileTable(msTbl)) {
+        table = new HdfsSeqFileTable(id, db, tblName, msTbl.getOwner());
      } else {
        throw new UnsupportedOperationException("Unrecognized table type");
      }
--- a/fe/src/main/java/com/cloudera/impala/planner/HdfsSeqFileScanNode.java
+++ b/fe/src/main/java/com/cloudera/impala/planner/HdfsSeqFileScanNode.java
@@ -0,0 +1,24 @@
+// Copyright (c) 2012 Cloudera, Inc. All rights reserved.
+package com.cloudera.impala.planner;
+
+import com.cloudera.impala.analysis.TupleDescriptor;
+import com.cloudera.impala.catalog.HdfsTable;
+import com.cloudera.impala.thrift.TPlanNode;
+import com.cloudera.impala.thrift.TPlanNodeType;
+
+/**
+ * HdfsSeqFileScanNode.
+ *
+ */
+public class HdfsSeqFileScanNode extends HdfsScanNode {
+
+  public HdfsSeqFileScanNode(int id, TupleDescriptor desc, HdfsTable tbl) {
+    super(id, desc, tbl);
+  }
+
+  @Override
+  protected void toThrift(TPlanNode msg) {
+    super.toThrift(msg);
+    msg.node_type = TPlanNodeType.HDFS_SEQFILE_SCAN_NODE;
+  }
+}
--- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java
+++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java
@@ -28,6 +28,7 @@ import com.cloudera.impala.analysis.TableRef;
 import com.cloudera.impala.analysis.TupleDescriptor;
 import com.cloudera.impala.analysis.TupleId;
 import com.cloudera.impala.catalog.HdfsRCFileTable;
+import com.cloudera.impala.catalog.HdfsSeqFileTable;
 import com.cloudera.impala.catalog.HdfsTextTable;
 import com.cloudera.impala.catalog.PrimitiveType;
 import com.cloudera.impala.common.InternalException;
@@ -184,6 +185,10 @@ public class Planner {
      // Hive RCFile table
      scanNode = new HdfsRCFileScanNode(
          getNextNodeId(), tblRef.getDesc(), (HdfsRCFileTable) tblRef.getTable());
+    } else if (tblRef.getTable() instanceof HdfsSeqFileTable) {
+      // Hive Sequence table
+      scanNode = new HdfsSeqFileScanNode(
+          getNextNodeId(), tblRef.getDesc(), (HdfsSeqFileTable) tblRef.getTable());
    } else {
      // HBase table
      scanNode = new HBaseScanNode(getNextNodeId(), tblRef.getDesc());
--- a/fe/src/test/java/com/cloudera/impala/dataerror/DataErrorsTest.java
+++ b/fe/src/test/java/com/cloudera/impala/dataerror/DataErrorsTest.java
@@ -23,6 +23,7 @@ public class DataErrorsTest {
  private static Executor executor;
  private static StringBuilder testErrorLog;
  private final String testDir = "DataErrorsTest";
+  private static ArrayList<String>  tableList;

  @BeforeClass
  public static void setUp() throws Exception {
@@ -30,81 +31,91 @@ public class DataErrorsTest {
    catalog = new Catalog(client);
    executor = new Executor(catalog);
    testErrorLog = new StringBuilder();
+    tableList = new ArrayList<String>();
+    tableList.add("");
+    tableList.add("_rc");
+    tableList.add("_seq");
+    tableList.add("_seq_def");
+    tableList.add("_seq_gzip");
+    tableList.add("_seq_bzip");
+    tableList.add("_seq_snap");
+    tableList.add("_seq_record_def");
+    tableList.add("_seq_record_gzip");
+    tableList.add("_seq_record_bzip");
+    tableList.add("_seq_record_snap");
  }

-  private void runErrorTestFile(String testFile, boolean abortOnError, int maxErrors) {
+  private void runErrorTestFile(String testFile, boolean abortOnError, int maxErrors,
+      ArrayList<String> tables) {
    StringBuilder errorLog = new StringBuilder();
    String fileName = testDir + "/" + testFile + ".test";
    TestFileParser queryFileParser = new TestFileParser(fileName);
-    queryFileParser.parseFile();
-    for (TestCase testCase : queryFileParser.getTestCases()) {
-      ArrayList<String> expectedErrors = testCase.getSectionContents(Section.ERRORS);
-      // The test file is assumed to contain all errors. We may only want to compare a few of them.
-      int errorsToCompare = Math.min(expectedErrors.size(), maxErrors);
-      int lastLine = 0;
-      int errorCount = 0;
-      for (String line : expectedErrors) {
-        // Indicates the last line of one error message.
-        // The final line of an Hdfs error message starts with "line:",
-        // and for Hbase tables with "row key:".
-        if (line.startsWith("line:") || line.startsWith("row key:")) {
-          errorCount++;
-        }
-        lastLine++;
-        if (errorCount >= errorsToCompare) {
-          break;
+    for (int f = 0; f < (tables == null ? 1 : tables.size()); f++) {
+      queryFileParser.parseFile(tables == null ? null : tables.get(f));
+      for (TestCase testCase : queryFileParser.getTestCases()) {
+        ArrayList<String> expectedErrors = testCase.getSectionContents(Section.ERRORS);
+        // The test file is assumed to contain all errors.
+        // We may only want to compare a few of them.
+        int errorsToCompare = Math.min(expectedErrors.size(), maxErrors);
+        int lastLine = 0;
+        int errorCount = 0;
+        for (String line : expectedErrors) {
+          // Indicates the last line of one error message.
+          // The final line of an Hdfs error message starts with "line:",
+          // and for Hbase tables with "row key:".
+          if (line.startsWith("line:") || line.startsWith("row key:")) {
+            errorCount++;
+          }
+          lastLine++;
+          if (errorCount >= errorsToCompare) {
+            break;
+          }
+          while (expectedErrors.size() > lastLine) {
+            expectedErrors.remove(expectedErrors.size() - 1);
+          }
+          // File error entries must be sorted by filename within .test file.
+          ArrayList<String> expectedFileErrors =
+              testCase.getSectionContents(Section.FILEERRORS);
+          if (abortOnError && !expectedFileErrors.isEmpty()) {
+            String[] fileErrSplits = expectedFileErrors.get(0).split(",");
+            // We are expecting only a single file with a single error.
+            String expectedFileError = fileErrSplits[0] + ",1";
+            expectedFileErrors.clear();
+            expectedFileErrors.add(expectedFileError);
+          }
+          // run query 3 ways: with backend's default batch size, with small batch size,
+          // and with batch size of 1, which should trigger a lot of corner cases
+          // in the execution engine code
+          String query = testCase.getQuery();
+          TestUtils.runQuery(executor, query, 1, 0,
+              abortOnError, maxErrors, testCase.getStartingLineNum(), null, null, null,
+              expectedErrors, expectedFileErrors, testErrorLog);
+          TestUtils.runQuery(executor, query, 1, 16,
+              abortOnError, maxErrors, testCase.getStartingLineNum(), null, null, null,
+              expectedErrors, expectedFileErrors, testErrorLog);
+          TestUtils.runQuery(executor, query, 1, 1,
+              abortOnError, maxErrors, testCase.getStartingLineNum(), null, null, null,
+              expectedErrors, expectedFileErrors, testErrorLog);
        }
      }
-      while (expectedErrors.size() > lastLine) {
-        expectedErrors.remove(expectedErrors.size() - 1);
-      }
-      // File error entries must be sorted by filename within .test file.
-      ArrayList<String> expectedFileErrors = testCase.getSectionContents(Section.FILEERRORS);
-      if (abortOnError && !expectedFileErrors.isEmpty()) {
-        String[] fileErrSplits = expectedFileErrors.get(0).split(",");
-        // We are expecting only a single file with a single error.
-        String expectedFileError = fileErrSplits[0] + ",1";
-        expectedFileErrors.clear();
-        expectedFileErrors.add(expectedFileError);
-      }
-      // run query 3 ways: with backend's default batch size, with small batch size,
-      // and with batch size of 1, which should trigger a lot of corner cases
-      // in the execution engine code
-      String query = testCase.getQuery();
-      TestUtils.runQuery(executor, query,
-          1, 0, abortOnError, maxErrors, testCase.getStartingLineNum(), null, null, null,
-          expectedErrors, expectedFileErrors, testErrorLog);
-      TestUtils.runQuery(executor, query,
-          1, 16, abortOnError, maxErrors, testCase.getStartingLineNum(), null, null, null,
-          expectedErrors, expectedFileErrors, testErrorLog);
-      TestUtils.runQuery(executor, query,
-          1, 1, abortOnError, maxErrors, testCase.getStartingLineNum(), null, null, null,
-          expectedErrors, expectedFileErrors, testErrorLog);
-    }

-    if (errorLog.length() != 0) {
-      fail(errorLog.toString());
+      if (errorLog.length() != 0) {
+        fail(errorLog.toString());
+      }
    }
  }

  @Test
  public void TestHdfsScanNodeErrors() {
-    runErrorTestFile("hdfs-scan-node-errors", false, 100);
-    runErrorTestFile("hdfs-scan-node-errors", false, 5);
-    runErrorTestFile("hdfs-scan-node-errors", true, 1);
-  }
-
-  @Test
-  public void TestHdfsRCFileScanNodeErrors() {
-    runErrorTestFile("hdfs-rcfile-scan-node-errors", false, 100);
-    runErrorTestFile("hdfs-rcfile-scan-node-errors", false, 5);
-    runErrorTestFile("hdfs-rcfile-scan-node-errors", true, 1);
+    runErrorTestFile("hdfs-scan-node-errors", false, 100, tableList);
+    runErrorTestFile("hdfs-scan-node-errors", false, 5, tableList);
+    runErrorTestFile("hdfs-scan-node-errors", true, 1, tableList);
  }

  @Test
  public void TestHBaseScanNodeErrors() {
-    runErrorTestFile("hbase-scan-node-errors", false, 100);
-    runErrorTestFile("hbase-scan-node-errors", false, 5);
-    runErrorTestFile("hbase-scan-node-errors", true, 1);
+    runErrorTestFile("hbase-scan-node-errors", false, 100, null);
+    runErrorTestFile("hbase-scan-node-errors", false, 5, null);
+    runErrorTestFile("hbase-scan-node-errors", true, 1, null);
  }
 }
--- a/fe/src/test/java/com/cloudera/impala/service/QueryTest.java
+++ b/fe/src/test/java/com/cloudera/impala/service/QueryTest.java
@@ -21,43 +21,64 @@ public class QueryTest {
  private static Catalog catalog;
  private static Executor executor;
  private final String testDir = "QueryTest";
+  private static ArrayList<String> tableSubsitutionList;

  @BeforeClass
  public static void setUp() throws Exception {
    HiveMetaStoreClient client = TestSchemaUtils.createClient();
    catalog = new Catalog(client);
    executor = new Executor(catalog);
+    tableSubsitutionList = new ArrayList<String>();
+    tableSubsitutionList.add("");
+    tableSubsitutionList.add("_rc");
+    tableSubsitutionList.add("_seq");
+    tableSubsitutionList.add("_seq_def");
+    tableSubsitutionList.add("_seq_gzip");
+    tableSubsitutionList.add("_seq_bzip");
+    tableSubsitutionList.add("_seq_snap");
+    tableSubsitutionList.add("_seq_record_def");
+    tableSubsitutionList.add("_seq_record_gzip");
+    tableSubsitutionList.add("_seq_record_bzip");
+    tableSubsitutionList.add("_seq_record_snap");
  }

  private void runQueryTestFile(String testFile, boolean abortOnError, int maxErrors) {
+    runQueryTestFile(testFile, abortOnError, maxErrors, null);
+  }
+
+  private void runQueryTestFile(String testFile, boolean abortOnError, int maxErrors,
+      ArrayList<String> tables) {
    String fileName = testDir + "/" + testFile + ".test";
    TestFileParser queryFileParser = new TestFileParser(fileName);
-    queryFileParser.parseFile();
-    StringBuilder errorLog = new StringBuilder();
-    for (TestCase testCase : queryFileParser.getTestCases()) {
-      ArrayList<String> expectedTypes =
-        testCase.getSectionContents(Section.TYPES);
-      ArrayList<String> expectedResults =
-        testCase.getSectionContents(Section.RESULTS);
-      // run each test against all possible combinations of batch sizes and
-      // number of execution nodes
-      int[] batchSizes = {0, 16, 1};
-      int[] numNodes = {1, 2, 3, 0};
-      for (int i = 0; i < batchSizes.length; ++i) {
-        for (int j = 0; j < numNodes.length; ++j) {
-          TestUtils.runQuery(
-              executor, testCase.getSectionAsString(Section.QUERY, false, " "),
-              numNodes[j], batchSizes[i], abortOnError, maxErrors,
-              testCase.getStartingLineNum(), null, expectedTypes,
-              expectedResults, null, null, errorLog);
+    for (int f = 0; f < (tables == null ? 1 : tables.size()); f++) {
+        queryFileParser.parseFile(tables == null ? null : tables.get(f));
+      StringBuilder errorLog = new StringBuilder();
+      for (TestCase testCase : queryFileParser.getTestCases()) {
+        ArrayList<String> expectedTypes =
+          testCase.getSectionContents(Section.TYPES);
+        ArrayList<String> expectedResults =
+          testCase.getSectionContents(Section.RESULTS);
+        // run each test against all possible combinations of batch sizes and
+        // number of execution nodes
+        int[] batchSizes = {0, 16, 1};
+        int[] numNodes = {1, 2, 3, 0};
+        for (int i = 0; i < batchSizes.length; ++i) {
+          for (int j = 0; j < numNodes.length; ++j) {
+            TestUtils.runQuery(
+                executor, testCase.getSectionAsString(Section.QUERY, false, " "),
+                numNodes[j], batchSizes[i], abortOnError, maxErrors,
+                testCase.getStartingLineNum(), null, expectedTypes,
+                expectedResults, null, null, errorLog);
+          }
        }
      }
-    }
-    if (errorLog.length() != 0) {
-      fail(errorLog.toString());
+      if (errorLog.length() != 0) {
+        fail(errorLog.toString());
+      }
    }
  }

+
  @Test
  public void TestDistinct() {
    runQueryTestFile("distinct", false, 1000);
@@ -74,23 +95,13 @@ public class QueryTest {
  }

  @Test
-  public void TestHdfsTextScanNode() {
-    runQueryTestFile("hdfs-scan-node", false, 1000);
+  public void TestHdfsScanNode() {
+    runQueryTestFile("hdfs-scan-node", false, 1000, tableSubsitutionList);
  }

  @Test
-  public void TestHdfsTextPartitions() {
-    runQueryTestFile("hdfs-partitions", false, 1000);
-  }
-
-  @Test
-  public void TestHdfsRCFileScanNode() {
-    runQueryTestFile("hdfs-rcfile-scan-node", false, 1000);
-  }
-
-  @Test
-  public void TestHdfsRCFilePartitions() {
-    runQueryTestFile("hdfs-rcfile-partitions", false, 1000);
+  public void TestFilePartions() {
+    runQueryTestFile("hdfs-partitions", false, 1000, tableSubsitutionList);
  }

  @Test
--- a/fe/src/test/java/com/cloudera/impala/testutil/TestFileParser.java
+++ b/fe/src/test/java/com/cloudera/impala/testutil/TestFileParser.java
@@ -141,6 +141,7 @@ public class TestFileParser {
  private final String fileName;
  private InputStream stream;
  private Scanner scanner;
+  private String table;

  /**
   * For backwards compatibility, if no title is found this is the order in which
@@ -160,7 +161,8 @@ public class TestFileParser {
  /**
   * Initialises the scanner and the input stream corresponding to the test file name
   */
-  private void open() {
+  private void open(String table) {
+    this.table = table;
    try {
      ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
      stream = classLoader.getResourceAsStream(fileName);
@@ -218,6 +220,9 @@ public class TestFileParser {

        sectionContents = Lists.newArrayList();
      } else {
+          if (table != null && currentSection == Section.QUERY) {
+            line = line.replaceAll("\\$TABLE", table);
+          }
        sectionContents.add(line);
      }
    }
@@ -229,7 +234,11 @@ public class TestFileParser {
   * Parses a test file in its entirety and constructs a list of TestCases.
   */
  public void parseFile() {
-    open();
+    parseFile(null);
+  }
+
+  public void parseFile(String table) {
+    open(table);
    while (scanner.hasNextLine()) {
      testCases.add(parseOneTestCase());
    }
--- a/fe/src/test/resources/DataErrorsTest/hdfs-scan-node-errors.test
+++ b/fe/src/test/resources/DataErrorsTest/hdfs-scan-node-errors.test
@@ -1,4 +1,5 @@
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col from alltypeserror
+<<<<<<< HEAD
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col timestamp_col from alltypeserror$TABLE
 ---- ERRORS
 Error converting column: 1 TO BOOL
 file: alltypeserror/year=2009/month=1/0901.txt
@@ -58,7 +59,7 @@ file: alltypeserror/year=2009/month=1/0901.txt,8
 file: alltypeserror/year=2009/month=2/0902.txt,3
 file: alltypeserror/year=2009/month=3/0903.txt,4
 ====
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col from alltypeserrornonulls
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col from alltypeserrornonulls$TABLE
 ---- ERRORS
 Error converting column: 1 TO BOOL
 file: alltypeserrornonulls/year=2009/month=1/0901.txt
--- a/fe/src/test/resources/QueryTest/hdfs-partitions.test
+++ b/fe/src/test/resources/QueryTest/hdfs-partitions.test
@@ -1,11 +1,11 @@
-select year, count(*) from alltypes group by 1
+select year, count(*) from alltypes$TABLE group by 1
 ----
 int, bigint
 ----
 2009,3650
 2010,3650
 ====
-select month, count(*) from alltypes group by 1
+select month, count(*) from alltypes$TABLE group by 1
 ----
 int, bigint
 ----
@@ -22,7 +22,7 @@ int, bigint
 6,600
 7,620
 ====
-select year, month, count(*) from alltypes group by 1, 2
+select year, month, count(*) from alltypes$TABLE group by 1, 2
 ----
 int, int, bigint
 ----
@@ -51,82 +51,82 @@ int, int, bigint
 2009,1,310
 2009,2,280
 ====
-select count(*) from alltypes where year=2009
+select count(*) from alltypes$TABLE where year=2009
 ----
 bigint
 ----
 3650
 ====
 # still works if 'year' needs a cast
-select count(*) from alltypes where year = 2009.0
+select count(*) from alltypes$TABLE where year = 2009.0
 ----
 bigint
 ----
 3650
 ====
 # finds bindings for partition keys regardless of order of operands
-select count(*) from alltypes where 2009 = year
+select count(*) from alltypes$TABLE where 2009 = year
 ----
 bigint
 ----
 3650
 ====
-select count(*) from alltypes where 2009.0 = year
+select count(*) from alltypes$TABLE where 2009.0 = year
 ----
 bigint
 ----
 3650
 ====
-select count(*) from alltypes where month=1
+select count(*) from alltypes$TABLE where month=1
 ----
 bigint
 ----
 620
 ====
-select count(*) from alltypes where year=2009 and month=1
+select count(*) from alltypes$TABLE where year=2009 and month=1
 ----
 bigint
 ----
 310
 ====
-select count(*) from alltypes where year=2009 and month > 6
+select count(*) from alltypes$TABLE where year=2009 and month > 6
 ----
 bigint
 ----
 1840
 ====
-select count(*) from alltypes where year=2009 and month < 6
+select count(*) from alltypes$TABLE where year=2009 and month < 6
 ----
 bigint
 ----
 1510
 ====
-select count(*) from alltypes where year<=2009 and month < 6
+select count(*) from alltypes$TABLE where year<=2009 and month < 6
 ----
 bigint
 ----
 1510
 ====
-select count(*) from alltypes where month < 9 and month > 6
+select count(*) from alltypes$TABLE where month < 9 and month > 6
 ----
 bigint
 ----
 1240
 ====
-select count(*) from alltypes where year < 2010 and year < 2009 and month > 6
+select count(*) from alltypes$TABLE where year < 2010 and year < 2009 and month > 6
 ----
 bigint
 ----
 0
 ====
-select count(*) from alltypes where year < 2010 and month > 6 and month > 12
+select count(*) from alltypes$TABLE where year < 2010 and month > 6 and month > 12
 ----
 bigint
 ----
 0
 ====
 # Test multi files partitioned table (hdfs)
-select count(*) from alltypesaggmultifiles
+select count(*) from alltypesaggmultifiles$TABLE
 ----
 bigint
 ----
--- a/fe/src/test/resources/QueryTest/hdfs-rcfile-partitions.test
+++ b/fe/src/test/resources/QueryTest/hdfs-rcfile-partitions.test
@@ -1,134 +0,0 @@
-select year, count(*) from alltypes_rc group by 1
----
-int, bigint
----
-2009,3650
-2010,3650
-====
-select month, count(*) from alltypes_rc group by 1
----
-int, bigint
----
-8,620
-9,600
-10,620
-11,600
-12,620
-1,620
-2,560
-3,620
-4,600
-5,620
-6,600
-7,620
-====
-select year, month, count(*) from alltypes_rc group by 1, 2
----
-int, int, bigint
----
-2010,2,280
-2009,5,310
-2010,1,310
-2009,6,300
-2009,3,310
-2009,4,300
-2009,9,300
-2009,10,310
-2010,6,300
-2009,7,310
-2010,5,310
-2009,8,310
-2010,4,300
-2010,3,310
-2010,10,310
-2009,11,300
-2010,9,300
-2009,12,310
-2010,8,310
-2010,7,310
-2010,12,310
-2010,11,300
-2009,1,310
-2009,2,280
-====
-select count(*) from alltypes_rc where year=2009
----
-bigint
----
-3650
-====
-# still works if 'year' needs a cast
-select count(*) from alltypes_rc where year = 2009.0
----
-bigint
----
-3650
-====
-# finds bindings for partition keys regardless of order of operands
-select count(*) from alltypes_rc where 2009 = year
----
-bigint
----
-3650
-====
-select count(*) from alltypes_rc where 2009.0 = year
----
-bigint
----
-3650
-====
-select count(*) from alltypes_rc where month=1
----
-bigint
----
-620
-====
-select count(*) from alltypes_rc where year=2009 and month=1
----
-bigint
----
-310
-====
-select count(*) from alltypes_rc where year=2009 and month > 6
----
-bigint
----
-1840
-====
-select count(*) from alltypes_rc where year=2009 and month < 6
----
-bigint
----
-1510
-====
-select count(*) from alltypes_rc where year<=2009 and month < 6
----
-bigint
----
-1510
-====
-select count(*) from alltypes_rc where month < 9 and month > 6
----
-bigint
----
-1240
-====
-select count(*) from alltypes_rc where year < 2010 and year < 2009 and month > 6
----
-bigint
----
-0
-====
-select count(*) from alltypes_rc where year < 2010 and month > 6 and month > 12
----
-bigint
----
-0
-====
-# Test multi files partitioned table (rc)
-select count(*) from alltypesaggmultifiles_rc
----
-bigint
----
-10000
-====
--- a/fe/src/test/resources/QueryTest/hdfs-rcfile-scan-node.test
+++ b/fe/src/test/resources/QueryTest/hdfs-rcfile-scan-node.test
@@ -1,509 +0,0 @@
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
-double_col, date_string_col, string_col
-from alltypessmall_rc
-----
-int,boolean,tinyint,smallint,int,bigint,float,double,string,string
-----
-0,true,0,0,0,0,0,0,'01/01/09','0'
-1,false,1,1,1,10,1.1,10.1,'01/01/09','1'
-2,true,2,2,2,20,2.2,20.2,'01/01/09','2'
-3,false,3,3,3,30,3.3,30.3,'01/01/09','3'
-4,true,4,4,4,40,4.4,40.4,'01/01/09','4'
-5,false,5,5,5,50,5.5,50.5,'01/01/09','5'
-6,true,6,6,6,60,6.6,60.6,'01/01/09','6'
-7,false,7,7,7,70,7.7,70.7,'01/01/09','7'
-8,true,8,8,8,80,8.8,80.8,'01/01/09','8'
-9,false,9,9,9,90,9.9,90.9,'01/01/09','9'
-10,true,0,0,0,0,0,0,'01/02/09','0'
-11,false,1,1,1,10,1.1,10.1,'01/02/09','1'
-12,true,2,2,2,20,2.2,20.2,'01/02/09','2'
-13,false,3,3,3,30,3.3,30.3,'01/02/09','3'
-14,true,4,4,4,40,4.4,40.4,'01/02/09','4'
-15,false,5,5,5,50,5.5,50.5,'01/02/09','5'
-16,true,6,6,6,60,6.6,60.6,'01/02/09','6'
-17,false,7,7,7,70,7.7,70.7,'01/02/09','7'
-18,true,8,8,8,80,8.8,80.8,'01/02/09','8'
-19,false,9,9,9,90,9.9,90.9,'01/02/09','9'
-20,true,0,0,0,0,0,0,'01/03/09','0'
-21,false,1,1,1,10,1.1,10.1,'01/03/09','1'
-22,true,2,2,2,20,2.2,20.2,'01/03/09','2'
-23,false,3,3,3,30,3.3,30.3,'01/03/09','3'
-24,true,4,4,4,40,4.4,40.4,'01/03/09','4'
-25,false,0,0,0,0,0,0,'02/01/09','0'
-26,true,1,1,1,10,1.1,10.1,'02/01/09','1'
-27,false,2,2,2,20,2.2,20.2,'02/01/09','2'
-28,true,3,3,3,30,3.3,30.3,'02/01/09','3'
-29,false,4,4,4,40,4.4,40.4,'02/01/09','4'
-30,true,5,5,5,50,5.5,50.5,'02/01/09','5'
-31,false,6,6,6,60,6.6,60.6,'02/01/09','6'
-32,true,7,7,7,70,7.7,70.7,'02/01/09','7'
-33,false,8,8,8,80,8.8,80.8,'02/01/09','8'
-34,true,9,9,9,90,9.9,90.9,'02/01/09','9'
-35,false,0,0,0,0,0,0,'02/02/09','0'
-36,true,1,1,1,10,1.1,10.1,'02/02/09','1'
-37,false,2,2,2,20,2.2,20.2,'02/02/09','2'
-38,true,3,3,3,30,3.3,30.3,'02/02/09','3'
-39,false,4,4,4,40,4.4,40.4,'02/02/09','4'
-40,true,5,5,5,50,5.5,50.5,'02/02/09','5'
-41,false,6,6,6,60,6.6,60.6,'02/02/09','6'
-42,true,7,7,7,70,7.7,70.7,'02/02/09','7'
-43,false,8,8,8,80,8.8,80.8,'02/02/09','8'
-44,true,9,9,9,90,9.9,90.9,'02/02/09','9'
-45,false,0,0,0,0,0,0,'02/03/09','0'
-46,true,1,1,1,10,1.1,10.1,'02/03/09','1'
-47,false,2,2,2,20,2.2,20.2,'02/03/09','2'
-48,true,3,3,3,30,3.3,30.3,'02/03/09','3'
-49,false,4,4,4,40,4.4,40.4,'02/03/09','4'
-50,true,0,0,0,0,0,0,'03/01/09','0'
-51,false,1,1,1,10,1.1,10.1,'03/01/09','1'
-52,true,2,2,2,20,2.2,20.2,'03/01/09','2'
-53,false,3,3,3,30,3.3,30.3,'03/01/09','3'
-54,true,4,4,4,40,4.4,40.4,'03/01/09','4'
-55,false,5,5,5,50,5.5,50.5,'03/01/09','5'
-56,true,6,6,6,60,6.6,60.6,'03/01/09','6'
-57,false,7,7,7,70,7.7,70.7,'03/01/09','7'
-58,true,8,8,8,80,8.8,80.8,'03/01/09','8'
-59,false,9,9,9,90,9.9,90.9,'03/01/09','9'
-60,true,0,0,0,0,0,0,'03/02/09','0'
-61,false,1,1,1,10,1.1,10.1,'03/02/09','1'
-62,true,2,2,2,20,2.2,20.2,'03/02/09','2'
-63,false,3,3,3,30,3.3,30.3,'03/02/09','3'
-64,true,4,4,4,40,4.4,40.4,'03/02/09','4'
-65,false,5,5,5,50,5.5,50.5,'03/02/09','5'
-66,true,6,6,6,60,6.6,60.6,'03/02/09','6'
-67,false,7,7,7,70,7.7,70.7,'03/02/09','7'
-68,true,8,8,8,80,8.8,80.8,'03/02/09','8'
-69,false,9,9,9,90,9.9,90.9,'03/02/09','9'
-70,true,0,0,0,0,0,0,'03/03/09','0'
-71,false,1,1,1,10,1.1,10.1,'03/03/09','1'
-72,true,2,2,2,20,2.2,20.2,'03/03/09','2'
-73,false,3,3,3,30,3.3,30.3,'03/03/09','3'
-74,true,4,4,4,40,4.4,40.4,'03/03/09','4'
-75,false,0,0,0,0,0,0,'04/01/09','0'
-76,true,1,1,1,10,1.1,10.1,'04/01/09','1'
-77,false,2,2,2,20,2.2,20.2,'04/01/09','2'
-78,true,3,3,3,30,3.3,30.3,'04/01/09','3'
-79,false,4,4,4,40,4.4,40.4,'04/01/09','4'
-80,true,5,5,5,50,5.5,50.5,'04/01/09','5'
-81,false,6,6,6,60,6.6,60.6,'04/01/09','6'
-82,true,7,7,7,70,7.7,70.7,'04/01/09','7'
-83,false,8,8,8,80,8.8,80.8,'04/01/09','8'
-84,true,9,9,9,90,9.9,90.9,'04/01/09','9'
-85,false,0,0,0,0,0,0,'04/02/09','0'
-86,true,1,1,1,10,1.1,10.1,'04/02/09','1'
-87,false,2,2,2,20,2.2,20.2,'04/02/09','2'
-88,true,3,3,3,30,3.3,30.3,'04/02/09','3'
-89,false,4,4,4,40,4.4,40.4,'04/02/09','4'
-90,true,5,5,5,50,5.5,50.5,'04/02/09','5'
-91,false,6,6,6,60,6.6,60.6,'04/02/09','6'
-92,true,7,7,7,70,7.7,70.7,'04/02/09','7'
-93,false,8,8,8,80,8.8,80.8,'04/02/09','8'
-94,true,9,9,9,90,9.9,90.9,'04/02/09','9'
-95,false,0,0,0,0,0,0,'04/03/09','0'
-96,true,1,1,1,10,1.1,10.1,'04/03/09','1'
-97,false,2,2,2,20,2.2,20.2,'04/03/09','2'
-98,true,3,3,3,30,3.3,30.3,'04/03/09','3'
-99,false,4,4,4,40,4.4,40.4,'04/03/09','4'
-=====
-select id from alltypessmall_rc
-----
-int
-----
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-11
-12
-13
-14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
-89
-90
-91
-92
-93
-94
-95
-96
-97
-98
-99
-=====
-select * from alltypessmall_rc
-----
-int,int,int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp
-----
-2009,1,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00
-2009,1,1,false,1,1,1,10,1.1,10.1,'01/01/09','1',2009-01-01 00:01:00
-2009,1,10,true,0,0,0,0,0,0,'01/02/09','0',2009-01-02 00:10:00.450000000
-2009,1,11,false,1,1,1,10,1.1,10.1,'01/02/09','1',2009-01-02 00:11:00.450000000
-2009,1,12,true,2,2,2,20,2.2,20.2,'01/02/09','2',2009-01-02 00:12:00.460000000
-2009,1,13,false,3,3,3,30,3.3,30.3,'01/02/09','3',2009-01-02 00:13:00.480000000
-2009,1,14,true,4,4,4,40,4.4,40.4,'01/02/09','4',2009-01-02 00:14:00.510000000
-2009,1,15,false,5,5,5,50,5.5,50.5,'01/02/09','5',2009-01-02 00:15:00.550000000
-2009,1,16,true,6,6,6,60,6.6,60.6,'01/02/09','6',2009-01-02 00:16:00.600000000
-2009,1,17,false,7,7,7,70,7.7,70.7,'01/02/09','7',2009-01-02 00:17:00.660000000
-2009,1,18,true,8,8,8,80,8.8,80.8,'01/02/09','8',2009-01-02 00:18:00.730000000
-2009,1,19,false,9,9,9,90,9.9,90.9,'01/02/09','9',2009-01-02 00:19:00.810000000
-2009,1,2,true,2,2,2,20,2.2,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000
-2009,1,20,true,0,0,0,0,0,0,'01/03/09','0',2009-01-03 00:20:00.900000000
-2009,1,21,false,1,1,1,10,1.1,10.1,'01/03/09','1',2009-01-03 00:21:00.900000000
-2009,1,22,true,2,2,2,20,2.2,20.2,'01/03/09','2',2009-01-03 00:22:00.910000000
-2009,1,23,false,3,3,3,30,3.3,30.3,'01/03/09','3',2009-01-03 00:23:00.930000000
-2009,1,24,true,4,4,4,40,4.4,40.4,'01/03/09','4',2009-01-03 00:24:00.960000000
-2009,1,3,false,3,3,3,30,3.3,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000
-2009,1,4,true,4,4,4,40,4.4,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000
-2009,1,5,false,5,5,5,50,5.5,50.5,'01/01/09','5',2009-01-01 00:05:00.100000000
-2009,1,6,true,6,6,6,60,6.6,60.6,'01/01/09','6',2009-01-01 00:06:00.150000000
-2009,1,7,false,7,7,7,70,7.7,70.7,'01/01/09','7',2009-01-01 00:07:00.210000000
-2009,1,8,true,8,8,8,80,8.8,80.8,'01/01/09','8',2009-01-01 00:08:00.280000000
-2009,1,9,false,9,9,9,90,9.9,90.9,'01/01/09','9',2009-01-01 00:09:00.360000000
-2009,2,25,false,0,0,0,0,0,0,'02/01/09','0',2009-02-01 00:00:00
-2009,2,26,true,1,1,1,10,1.1,10.1,'02/01/09','1',2009-02-01 00:01:00
-2009,2,27,false,2,2,2,20,2.2,20.2,'02/01/09','2',2009-02-01 00:02:00.100000000
-2009,2,28,true,3,3,3,30,3.3,30.3,'02/01/09','3',2009-02-01 00:03:00.300000000
-2009,2,29,false,4,4,4,40,4.4,40.4,'02/01/09','4',2009-02-01 00:04:00.600000000
-2009,2,30,true,5,5,5,50,5.5,50.5,'02/01/09','5',2009-02-01 00:05:00.100000000
-2009,2,31,false,6,6,6,60,6.6,60.6,'02/01/09','6',2009-02-01 00:06:00.150000000
-2009,2,32,true,7,7,7,70,7.7,70.7,'02/01/09','7',2009-02-01 00:07:00.210000000
-2009,2,33,false,8,8,8,80,8.8,80.8,'02/01/09','8',2009-02-01 00:08:00.280000000
-2009,2,34,true,9,9,9,90,9.9,90.9,'02/01/09','9',2009-02-01 00:09:00.360000000
-2009,2,35,false,0,0,0,0,0,0,'02/02/09','0',2009-02-02 00:10:00.450000000
-2009,2,36,true,1,1,1,10,1.1,10.1,'02/02/09','1',2009-02-02 00:11:00.450000000
-2009,2,37,false,2,2,2,20,2.2,20.2,'02/02/09','2',2009-02-02 00:12:00.460000000
-2009,2,38,true,3,3,3,30,3.3,30.3,'02/02/09','3',2009-02-02 00:13:00.480000000
-2009,2,39,false,4,4,4,40,4.4,40.4,'02/02/09','4',2009-02-02 00:14:00.510000000
-2009,2,40,true,5,5,5,50,5.5,50.5,'02/02/09','5',2009-02-02 00:15:00.550000000
-2009,2,41,false,6,6,6,60,6.6,60.6,'02/02/09','6',2009-02-02 00:16:00.600000000
-2009,2,42,true,7,7,7,70,7.7,70.7,'02/02/09','7',2009-02-02 00:17:00.660000000
-2009,2,43,false,8,8,8,80,8.8,80.8,'02/02/09','8',2009-02-02 00:18:00.730000000
-2009,2,44,true,9,9,9,90,9.9,90.9,'02/02/09','9',2009-02-02 00:19:00.810000000
-2009,2,45,false,0,0,0,0,0,0,'02/03/09','0',2009-02-03 00:20:00.900000000
-2009,2,46,true,1,1,1,10,1.1,10.1,'02/03/09','1',2009-02-03 00:21:00.900000000
-2009,2,47,false,2,2,2,20,2.2,20.2,'02/03/09','2',2009-02-03 00:22:00.910000000
-2009,2,48,true,3,3,3,30,3.3,30.3,'02/03/09','3',2009-02-03 00:23:00.930000000
-2009,2,49,false,4,4,4,40,4.4,40.4,'02/03/09','4',2009-02-03 00:24:00.960000000
-2009,3,50,true,0,0,0,0,0,0,'03/01/09','0',2009-03-01 00:00:00
-2009,3,51,false,1,1,1,10,1.1,10.1,'03/01/09','1',2009-03-01 00:01:00
-2009,3,52,true,2,2,2,20,2.2,20.2,'03/01/09','2',2009-03-01 00:02:00.100000000
-2009,3,53,false,3,3,3,30,3.3,30.3,'03/01/09','3',2009-03-01 00:03:00.300000000
-2009,3,54,true,4,4,4,40,4.4,40.4,'03/01/09','4',2009-03-01 00:04:00.600000000
-2009,3,55,false,5,5,5,50,5.5,50.5,'03/01/09','5',2009-03-01 00:05:00.100000000
-2009,3,56,true,6,6,6,60,6.6,60.6,'03/01/09','6',2009-03-01 00:06:00.150000000
-2009,3,57,false,7,7,7,70,7.7,70.7,'03/01/09','7',2009-03-01 00:07:00.210000000
-2009,3,58,true,8,8,8,80,8.8,80.8,'03/01/09','8',2009-03-01 00:08:00.280000000
-2009,3,59,false,9,9,9,90,9.9,90.9,'03/01/09','9',2009-03-01 00:09:00.360000000
-2009,3,60,true,0,0,0,0,0,0,'03/02/09','0',2009-03-02 00:10:00.450000000
-2009,3,61,false,1,1,1,10,1.1,10.1,'03/02/09','1',2009-03-02 00:11:00.450000000
-2009,3,62,true,2,2,2,20,2.2,20.2,'03/02/09','2',2009-03-02 00:12:00.460000000
-2009,3,63,false,3,3,3,30,3.3,30.3,'03/02/09','3',2009-03-02 00:13:00.480000000
-2009,3,64,true,4,4,4,40,4.4,40.4,'03/02/09','4',2009-03-02 00:14:00.510000000
-2009,3,65,false,5,5,5,50,5.5,50.5,'03/02/09','5',2009-03-02 00:15:00.550000000
-2009,3,66,true,6,6,6,60,6.6,60.6,'03/02/09','6',2009-03-02 00:16:00.600000000
-2009,3,67,false,7,7,7,70,7.7,70.7,'03/02/09','7',2009-03-02 00:17:00.660000000
-2009,3,68,true,8,8,8,80,8.8,80.8,'03/02/09','8',2009-03-02 00:18:00.730000000
-2009,3,69,false,9,9,9,90,9.9,90.9,'03/02/09','9',2009-03-02 00:19:00.810000000
-2009,3,70,true,0,0,0,0,0,0,'03/03/09','0',2009-03-03 00:20:00.900000000
-2009,3,71,false,1,1,1,10,1.1,10.1,'03/03/09','1',2009-03-03 00:21:00.900000000
-2009,3,72,true,2,2,2,20,2.2,20.2,'03/03/09','2',2009-03-03 00:22:00.910000000
-2009,3,73,false,3,3,3,30,3.3,30.3,'03/03/09','3',2009-03-03 00:23:00.930000000
-2009,3,74,true,4,4,4,40,4.4,40.4,'03/03/09','4',2009-03-03 00:24:00.960000000
-2009,4,75,false,0,0,0,0,0,0,'04/01/09','0',2009-04-01 00:00:00
-2009,4,76,true,1,1,1,10,1.1,10.1,'04/01/09','1',2009-04-01 00:01:00
-2009,4,77,false,2,2,2,20,2.2,20.2,'04/01/09','2',2009-04-01 00:02:00.100000000
-2009,4,78,true,3,3,3,30,3.3,30.3,'04/01/09','3',2009-04-01 00:03:00.300000000
-2009,4,79,false,4,4,4,40,4.4,40.4,'04/01/09','4',2009-04-01 00:04:00.600000000
-2009,4,80,true,5,5,5,50,5.5,50.5,'04/01/09','5',2009-04-01 00:05:00.100000000
-2009,4,81,false,6,6,6,60,6.6,60.6,'04/01/09','6',2009-04-01 00:06:00.150000000
-2009,4,82,true,7,7,7,70,7.7,70.7,'04/01/09','7',2009-04-01 00:07:00.210000000
-2009,4,83,false,8,8,8,80,8.8,80.8,'04/01/09','8',2009-04-01 00:08:00.280000000
-2009,4,84,true,9,9,9,90,9.9,90.9,'04/01/09','9',2009-04-01 00:09:00.360000000
-2009,4,85,false,0,0,0,0,0,0,'04/02/09','0',2009-04-02 00:10:00.450000000
-2009,4,86,true,1,1,1,10,1.1,10.1,'04/02/09','1',2009-04-02 00:11:00.450000000
-2009,4,87,false,2,2,2,20,2.2,20.2,'04/02/09','2',2009-04-02 00:12:00.460000000
-2009,4,88,true,3,3,3,30,3.3,30.3,'04/02/09','3',2009-04-02 00:13:00.480000000
-2009,4,89,false,4,4,4,40,4.4,40.4,'04/02/09','4',2009-04-02 00:14:00.510000000
-2009,4,90,true,5,5,5,50,5.5,50.5,'04/02/09','5',2009-04-02 00:15:00.550000000
-2009,4,91,false,6,6,6,60,6.6,60.6,'04/02/09','6',2009-04-02 00:16:00.600000000
-2009,4,92,true,7,7,7,70,7.7,70.7,'04/02/09','7',2009-04-02 00:17:00.660000000
-2009,4,93,false,8,8,8,80,8.8,80.8,'04/02/09','8',2009-04-02 00:18:00.730000000
-2009,4,94,true,9,9,9,90,9.9,90.9,'04/02/09','9',2009-04-02 00:19:00.810000000
-2009,4,95,false,0,0,0,0,0,0,'04/03/09','0',2009-04-03 00:20:00.900000000
-2009,4,96,true,1,1,1,10,1.1,10.1,'04/03/09','1',2009-04-03 00:21:00.900000000
-2009,4,97,false,2,2,2,20,2.2,20.2,'04/03/09','2',2009-04-03 00:22:00.910000000
-2009,4,98,true,3,3,3,30,3.3,30.3,'04/03/09','3',2009-04-03 00:23:00.930000000
-2009,4,99,false,4,4,4,40,4.4,40.4,'04/03/09','4',2009-04-03 00:24:00.960000000
-=====
-select month, date_string_col, year from alltypessmall_rc
-----
-int,string,int
-----
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/01/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/02/09',2009
-1,'01/03/09',2009
-1,'01/03/09',2009
-1,'01/03/09',2009
-1,'01/03/09',2009
-1,'01/03/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/01/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/02/09',2009
-2,'02/03/09',2009
-2,'02/03/09',2009
-2,'02/03/09',2009
-2,'02/03/09',2009
-2,'02/03/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/01/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/02/09',2009
-3,'03/03/09',2009
-3,'03/03/09',2009
-3,'03/03/09',2009
-3,'03/03/09',2009
-3,'03/03/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/01/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/02/09',2009
-4,'04/03/09',2009
-4,'04/03/09',2009
-4,'04/03/09',2009
-4,'04/03/09',2009
-4,'04/03/09',2009
-========
-select id from alltypessmall_rc where id = 10
----
-int
----
-10
-====
-# We expect that conversion errors are turned into NULLs
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col 
-from alltypeserror_rc
-----
-int,boolean,tinyint,smallint,int,bigint,float,double,string,string
-----
-0,NULL,NULL,0,0,0,0,0,'01/01/09','0'
-1,NULL,NULL,1,1,10,1,10.1,'01/01/09','1'
-2,true,NULL,NULL,2,20,2,20.2,'01/01/09','2'
-3,false,3,NULL,NULL,30,3,30.3,'01/01/09','3'
-4,true,4,4,NULL,NULL,4,40.4,'01/01/09','4'
-5,false,5,5,5,NULL,NULL,50.5,'01/01/09','5'
-6,true,6,6,6,60,NULL,NULL,'01/01/09','6'
-7,NULL,NULL,7,7,70,7,NULL,'01/01/09','7'
-8,false,NULL,NULL,8,80,8,80.8,'01/01/09','8'
-9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9'
-10,NULL,NULL,NULL,0,0,0,0,'02/01/09','0'
-11,false,NULL,NULL,NULL,10,1,10.1,'02/01/09','1'
-12,true,2,NULL,NULL,NULL,2,20.2,'02/01/09','2'
-13,false,3,3,NULL,NULL,NULL,NULL,'02/01/09','3'
-14,true,4,4,4,40,NULL,NULL,'02/01/09','4'
-15,false,NULL,5,5,50,5,50.5,'02/01/09','5'
-16,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'02/01/09','6'
-17,false,7,7,7,70,7,NULL,'02/01/09','7'
-18,true,8,8,8,80,8,80.8,'02/01/09','8'
-19,false,9,9,9,90,9,90.9,'02/01/09','9'
-20,true,0,0,0,0,0,0,'03/01/09','0'
-21,false,1,1,1,10,1,10.1,'03/01/09','1'
-22,true,2,2,2,20,2,20.2,'03/01/09','2'
-23,false,3,NULL,3,30,3,30.3,'03/01/09','3'
-24,true,4,4,4,40,4,40.4,'03/01/09','4'
-25,false,5,5,NULL,50,5,50.5,'03/01/09','5'
-26,true,6,6,6,60,6,60.6,'03/01/09','6'
-27,false,NULL,7,7,70,7,70.7,'03/01/09','7'
-28,true,8,8,8,80,8,80.8,'03/01/09','8'
-29,false,9,9,NULL,90,9,90.9,'03/01/09','9'
-====
-# We expect that conversion errors are turned into NULLs
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col 
-from alltypeserrornonulls_rc
-----
-int,boolean,tinyint,smallint,int,bigint,float,double,string,string
-----
-0,true,0,0,0,0,0,0,'01/01/09','0'
-1,NULL,1,1,1,10,1,10.1,'01/01/09','1'
-2,true,NULL,2,2,20,2,20.2,'01/01/09','2'
-3,false,3,NULL,3,30,3,30.3,'01/01/09','3'
-4,true,4,4,NULL,40,4,40.4,'01/01/09','4'
-5,false,5,5,5,NULL,5,50.5,'01/01/09','5'
-6,true,6,6,6,60,NULL,60.6,'01/01/09','6'
-7,false,7,7,7,70,7,NULL,'01/01/09','7'
-8,false,8,8,8,80,8,80.8,'01/01/09','8'
-9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9'
-10,true,0,0,0,0,0,0,'02/01/09','0'
-11,false,1,1,1,10,1,10.1,'02/01/09','1'
-12,true,2,2,2,20,2,20.2,'02/01/09','2'
-13,false,3,3,3,30,NULL,NULL,'02/01/09','3'
-14,true,4,4,4,40,4,40.4,'02/01/09','4'
-15,false,NULL,5,5,50,5,50.5,'02/01/09','5'
-16,true,6,6,6,60,6,60.6,'02/01/09','6'
-17,false,7,7,7,70,7,NULL,'02/01/09','7'
-18,true,8,8,8,80,8,80.8,'02/01/09','8'
-19,false,9,9,9,90,9,90.9,'02/01/09','9'
-20,true,0,0,0,0,0,0,'03/01/09','0'
-21,false,1,1,1,10,1,10.1,'03/01/09','1'
-22,true,2,2,2,20,2,20.2,'03/01/09','2'
-23,false,3,NULL,3,30,3,30.3,'03/01/09','3'
-24,true,4,4,4,40,4,40.4,'03/01/09','4'
-25,false,5,5,NULL,50,5,50.5,'03/01/09','5'
-26,true,6,6,6,60,6,60.6,'03/01/09','6'
-27,false,NULL,7,7,70,7,70.7,'03/01/09','7'
-28,true,8,8,8,80,8,80.8,'03/01/09','8'
-29,false,9,9,NULL,90,9,90.9,'03/01/09','9'
-====
-# Test multi files non-partitioned table (rc)
-select count(*) from alltypesaggmultifilesnopart_rc
----
-bigint
----
-10000
-====
--- a/fe/src/test/resources/QueryTest/hdfs-scan-node.test
+++ b/fe/src/test/resources/QueryTest/hdfs-scan-node.test
@@ -1,6 +1,6 @@
 select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
 double_col, date_string_col, string_col, timestamp_col
-from alltypessmall
+from alltypessmall$TABLE
 -----
 int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp
 -----
@@ -105,7 +105,7 @@ int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp
 98,true,3,3,3,30,3.3,30.3,'04/03/09','3',2009-04-03 00:23:00.930000000
 99,false,4,4,4,40,4.4,40.4,'04/03/09','4',2009-04-03 00:24:00.960000000
 =====
-select id from alltypessmall
+select id from alltypessmall$TABLE
 -----
 int
 -----
@@ -210,7 +210,7 @@ int
 98
 99
 =====
-select * from alltypessmall
+select * from alltypessmall$TABLE
 -----
 int,int,int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp
 -----
@@ -315,7 +315,7 @@ int,int,int,boolean,tinyint,smallint,int,bigint,float,double,string,string,times
 2009,4,98,true,3,3,3,30,3.3,30.3,'04/03/09','3',2009-04-03 00:23:00.930000000
 2009,4,99,false,4,4,4,40,4.4,40.4,'04/03/09','4',2009-04-03 00:24:00.960000000
 =====
-select month, date_string_col, year from alltypessmall
+select month, date_string_col, year from alltypessmall$TABLE
 -----
 int,string,int
 -----
@@ -420,7 +420,7 @@ int,string,int
 4,'04/03/09',2009
 4,'04/03/09',2009
 ========
-select id from alltypessmall where id = 10
+select id from alltypessmall$TABLE where id = 10
 ----
 int
 ----
@@ -428,7 +428,7 @@ int
 ====
 # We expect that conversion errors are turned into NULLs
 select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col 
-from alltypeserror
+from alltypeserror$TABLE
 -----
 int,boolean,tinyint,smallint,int,bigint,float,double,string,string
 -----
@@ -465,7 +465,7 @@ int,boolean,tinyint,smallint,int,bigint,float,double,string,string
 ====
 # We expect that conversion errors are turned into NULLs
 select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col 
-from alltypeserrornonulls
+from alltypeserrornonulls$TABLE
 -----
 int,boolean,tinyint,smallint,int,bigint,float,double,string,string
 -----
@@ -502,7 +502,7 @@ int,boolean,tinyint,smallint,int,bigint,float,double,string,string
 ====
 # partition key values are materialized correctly across file boundaries
 select day, month, year, string_col
-from alltypesagg
+from alltypesagg$TABLE
 where string_col = '0'
 -----
 int, int, int, string
@@ -539,7 +539,7 @@ tinyint, smallint, int, bigint, float, double
 -128,-32768,-2147483648,-9223372036854775808,-inf,-inf
 ====
 # Test multi files non-partitioned table (hdfs)
-select count(*) from AllTypesAggMultiFilesNoPart
+select count(*) from AllTypesAggMultiFilesNoPart$TABLE
 ----
 bigint
 ----
--- a/testdata/bin/create-benchmark.sql
+++ b/testdata/bin/create-benchmark.sql
@@ -28,3 +28,21 @@ CREATE TABLE UserVisits (
  avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;

+DROP TABLE IF EXISTS UserVisits_seq;
+CREATE TABLE UserVisits_seq (
+  sourceIP string,
+  destURL string,
+  visitDate string,
+  adRevenue float,
+  userAgent string,
+  cCode string,
+  lCode string,
+  sKeyword string,
+  avgTimeOnSite int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS Grep1GB_seq_snap;
+CREATE TABLE Grep1GB_seq_snap (
+  field string)
+partitioned by (chunk int)
+STORED AS SEQUENCEFILE;
--- a/testdata/bin/create.sql
+++ b/testdata/bin/create.sql
@@ -45,24 +45,146 @@ CREATE TABLE AllTypes_rc (
 partitioned by (year int, month int)
 STORED AS RCFILE;

+DROP TABLE IF EXISTS AllTypes_seq;
+CREATE TABLE AllTypes_seq (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int)
+STORED AS SEQUENCEFILE;
+
+
+DROP TABLE IF EXISTS AllTypes_seq_def;
+CREATE TABLE AllTypes_seq_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_gzip;
+CREATE TABLE AllTypes_seq_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_bzip;
+CREATE TABLE AllTypes_seq_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_snap;
+CREATE TABLE AllTypes_seq_snap LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_record_def;
+CREATE TABLE AllTypes_seq_record_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_record_gzip;
+CREATE TABLE AllTypes_seq_record_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_record_bzip;
+CREATE TABLE AllTypes_seq_record_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypes_seq_record_snap;
+CREATE TABLE AllTypes_seq_record_snap LIKE AllTypes_seq;
+
 DROP TABLE IF EXISTS AllTypesSmall;
 CREATE TABLE AllTypesSmall LIKE AllTypes;

 DROP TABLE IF EXISTS AllTypesSmall_rc;
 CREATE TABLE AllTypesSmall_rc LIKE AllTypes_rc;

+DROP TABLE IF EXISTS AllTypesSmall_seq;
+CREATE TABLE AllTypesSmall_seq LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_def;
+CREATE TABLE AllTypesSmall_seq_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_gzip;
+CREATE TABLE AllTypesSmall_seq_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_bzip;
+CREATE TABLE AllTypesSmall_seq_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_snap;
+CREATE TABLE AllTypesSmall_seq_snap LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_record_def;
+CREATE TABLE AllTypesSmall_seq_record_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_record_gzip;
+CREATE TABLE AllTypesSmall_seq_record_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_record_bzip;
+CREATE TABLE AllTypesSmall_seq_record_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AllTypesSmall_seq_record_snap;
+CREATE TABLE AllTypesSmall_seq_record_snap LIKE AllTypes_seq;
+
 DROP TABLE IF EXISTS AlltypesError;
 CREATE TABLE AllTypesError LIKE AllTypes;

 DROP TABLE IF EXISTS AlltypesError_rc;
 CREATE TABLE AllTypesError_rc LIKE AllTypes_rc;

+DROP TABLE IF EXISTS AlltypesError_seq;
+CREATE TABLE AllTypesError_seq LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_def;
+CREATE TABLE AllTypesError_seq_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_gzip;
+CREATE TABLE AllTypesError_seq_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_bzip;
+CREATE TABLE AllTypesError_seq_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_snap;
+CREATE TABLE AllTypesError_seq_snap LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_record_def;
+CREATE TABLE AllTypesError_seq_record_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_record_gzip;
+CREATE TABLE AllTypesError_seq_record_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_record_bzip;
+CREATE TABLE AllTypesError_seq_record_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesError_seq_record_snap;
+CREATE TABLE AllTypesError_seq_record_snap LIKE AllTypes_seq;
+
 DROP TABLE IF EXISTS AlltypesErrorNoNulls;
 CREATE TABLE AllTypesErrorNoNulls LIKE AllTypes;

 DROP TABLE IF EXISTS AlltypesErrorNoNulls_rc;
 CREATE TABLE AllTypesErrorNoNulls_rc LIKE AllTypes_rc;

+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq;
+CREATE TABLE AllTypesErrorNoNulls_seq LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_def;
+CREATE TABLE AllTypesErrorNoNulls_seq_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_gzip;
+CREATE TABLE AllTypesErrorNoNulls_seq_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_bzip;
+CREATE TABLE AllTypesErrorNoNulls_seq_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_snap;
+CREATE TABLE AllTypesErrorNoNulls_seq_snap LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_record_def;
+CREATE TABLE AllTypesErrorNoNulls_seq_record_def LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_record_gzip;
+CREATE TABLE AllTypesErrorNoNulls_seq_record_gzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_record_bzip;
+CREATE TABLE AllTypesErrorNoNulls_seq_record_bzip LIKE AllTypes_seq;
+
+DROP TABLE IF EXISTS AlltypesErrorNoNulls_seq_record_snap;
+CREATE TABLE AllTypesErrorNoNulls_seq_record_snap LIKE AllTypes_seq;
+
 DROP TABLE IF EXISTS AllTypesAgg;
 CREATE TABLE AllTypesAgg (
  id int,
@@ -95,12 +217,79 @@ CREATE TABLE AllTypesAgg_rc (
 partitioned by (year int, month int, day int)
 STORED AS RCFILE;

+DROP TABLE IF EXISTS AllTypesAgg_seq;
+CREATE TABLE AllTypesAgg_seq (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_def;
+CREATE TABLE AllTypesAgg_seq_def LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_gzip;
+CREATE TABLE AllTypesAgg_seq_gzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_bzip;
+CREATE TABLE AllTypesAgg_seq_bzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_snap;
+CREATE TABLE AllTypesAgg_seq_snap LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_record_def;
+CREATE TABLE AllTypesAgg_seq_record_def LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_record_gzip;
+CREATE TABLE AllTypesAgg_seq_record_gzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_record_bzip;
+CREATE TABLE AllTypesAgg_seq_record_bzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAgg_seq_record_snap;
+CREATE TABLE AllTypesAgg_seq_record_snap LIKE AllTypesAgg_seq;
+
 DROP TABLE IF EXISTS AllTypesAggNoNulls;
 CREATE TABLE AllTypesAggNoNulls LIKE AllTypesAgg;

 DROP TABLE IF EXISTS AllTypesAggNoNulls_rc;
 CREATE TABLE AllTypesAggNoNulls_rc LIKE AllTypesAgg_rc;

+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq;
+CREATE TABLE AllTypesAggNoNulls_seq LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_def;
+CREATE TABLE AllTypesAggNoNulls_seq_def LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_gzip;
+CREATE TABLE AllTypesAggNoNulls_seq_gzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_bzip;
+CREATE TABLE AllTypesAggNoNulls_seq_bzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_snap;
+CREATE TABLE AllTypesAggNoNulls_seq_snap LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_record_def;
+CREATE TABLE AllTypesAggNoNulls_seq_record_def LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_record_gzip;
+CREATE TABLE AllTypesAggNoNulls_seq_record_gzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_record_bzip;
+CREATE TABLE AllTypesAggNoNulls_seq_record_bzip LIKE AllTypesAgg_seq;
+
+DROP TABLE IF EXISTS AllTypesAggNoNulls_seq_record_snap;
+CREATE TABLE AllTypesAggNoNulls_seq_record_snap LIKE AllTypesAgg_seq;
+
 DROP TABLE IF EXISTS DelimErrorTable;
 CREATE TABLE DelimErrorTable (
  id int,
@@ -126,6 +315,37 @@ CREATE TABLE TestTbl_rc (
  zip int)
 STORED AS RCFILE;

+DROP TABLE IF EXISTS TestTbl_seq;
+CREATE TABLE TestTbl_seq (
+  id bigint,
+  name string,
+  zip int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS TestTbl_seq_def;
+CREATE TABLE TestTbl_seq_def LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_gzip;
+CREATE TABLE TestTbl_seq_gzip LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_bzip;
+CREATE TABLE TestTbl_seq_bzip LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_snap;
+CREATE TABLE TestTbl_seq_snap LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_record_def;
+CREATE TABLE TestTbl_seq_record_def LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_record_gzip;
+CREATE TABLE TestTbl_seq_record_gzip LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_record_bzip;
+CREATE TABLE TestTbl_seq_record_bzip LIKE TestTbl_seq;
+
+DROP TABLE IF EXISTS TestTbl_seq_record_snap;
+CREATE TABLE TestTbl_seq_record_snap LIKE TestTbl_seq;
+
 DROP TABLE IF EXISTS DimTbl;
 CREATE TABLE DimTbl (
  id bigint,
@@ -368,6 +588,150 @@ CREATE TABLE AllTypesAggMultiFiles_rc (
 partitioned by (year int, month int, day int)
 STORED AS RCFILE;

+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq;
+CREATE TABLE AllTypesAggMultiFiles_seq (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_def;
+CREATE TABLE AllTypesAggMultiFiles_seq_def (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_gzip;
+CREATE TABLE AllTypesAggMultiFiles_seq_gzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_bzip;
+CREATE TABLE AllTypesAggMultiFiles_seq_bzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_snap;
+CREATE TABLE AllTypesAggMultiFiles_seq_snap (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_record_def;
+CREATE TABLE AllTypesAggMultiFiles_seq_record_def (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_record_gzip;
+CREATE TABLE AllTypesAggMultiFiles_seq_record_gzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_record_bzip;
+CREATE TABLE AllTypesAggMultiFiles_seq_record_bzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFiles_seq_record_snap;
+CREATE TABLE AllTypesAggMultiFiles_seq_record_snap (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+partitioned by (year int, month int, day int)
+STORED AS SEQUENCEFILE;
+
 DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart;
 CREATE TABLE AllTypesAggMultiFilesNoPart (
  id int,
@@ -411,3 +775,138 @@ CREATE TABLE AllTypesAggMultiFilesNoPart_rc (
  string_col string,
  timestamp_col timestamp)
 STORED AS RCFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_def;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_def (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_gzip;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_gzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_bzip;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_bzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_snap;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_snap (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_record_def;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_record_def (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_record_gzip;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_record_gzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_record_bzip;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_record_bzip (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
+
+DROP TABLE IF EXISTS AllTypesAggMultiFilesNoPart_seq_record_snap;
+CREATE TABLE AllTypesAggMultiFilesNoPart_seq_record_snap (
+  id int,
+  bool_col boolean,
+  tinyint_col tinyint,
+  smallint_col smallint,
+  int_col int,
+  bigint_col bigint,
+  float_col float,
+  double_col double,
+  date_string_col string,
+  string_col string,
+  timestamp_col timestamp)
+STORED AS SEQUENCEFILE;
--- a/testdata/bin/load-benchmark.sql
+++ b/testdata/bin/load-benchmark.sql
@@ -14,3 +14,11 @@ LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/grep10GB/part

 LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/html1GB/Rankings.dat' OVERWRITE INTO TABLE Rankings;
 LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/hive_benchmark/html1GB/UserVisits.dat' OVERWRITE INTO TABLE UserVisits;
+
+INSERT OVERWRITE TABLE UserVisits_seq SELECT * from UserVisits;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.dynamic.partition=true;
+SET hive.exec.compress.output=true;
+set mapred.output.compression.type=BLOCK;
+INSERT OVERWRITE TABLE Grep1GB_seq_snap PARTITION (chunk) select * from Grep1GB;
--- a/testdata/bin/load-raw-data.sql
+++ b/testdata/bin/load-raw-data.sql
--- a/testdata/bin/load.sql
+++ b/testdata/bin/load.sql