Fix snappy block decompressor and mem pool usage in rc file.

2026-01-09 06:05:09 -05:00 · 2013-01-25 10:24:35 -08:00
parent 99aa94149a
commit e11c5a772c
11 changed files with 153 additions and 82 deletions
--- a/be/src/exec/base-sequence-scanner.cc
+++ b/be/src/exec/base-sequence-scanner.cc
@@ -61,6 +61,13 @@ BaseSequenceScanner::~BaseSequenceScanner() {
      data_buffer_pool_->peak_allocated_bytes());
 }

+Status BaseSequenceScanner::Prepare() {
+  RETURN_IF_ERROR(HdfsScanner::Prepare());
+  decompress_timer_ = ADD_COUNTER(
+      scan_node_->runtime_profile(), "DecompressionTime", TCounterType::CPU_TICKS);
+  return Status::OK;
+}
+
 Status BaseSequenceScanner::Close() {
  context_->AcquirePool(data_buffer_pool_.get());
  context_->Flush();
--- a/be/src/exec/base-sequence-scanner.h
+++ b/be/src/exec/base-sequence-scanner.h
@@ -38,6 +38,7 @@ class BaseSequenceScanner : public HdfsScanner {
  // Issue the initial ranges for all sequence container files.
  static void IssueInitialRanges(HdfsScanNode*, const std::vector<HdfsFileDesc*>&);

+  virtual Status Prepare();
  virtual Status Close();
  virtual Status ProcessScanRange(ScanRangeContext* context);

@@ -137,6 +138,9 @@ class BaseSequenceScanner : public HdfsScanner {
  // Pool to allocate per data block memory.  This should be used with the 
  // decompressor and any other per data block allocations.
  boost::scoped_ptr<MemPool> data_buffer_pool_;
+
+  // Time spent decompressing bytes
+  RuntimeProfile::Counter* decompress_timer_;
 };

 }
--- a/be/src/exec/hdfs-rcfile-scanner.cc
+++ b/be/src/exec/hdfs-rcfile-scanner.cc
@@ -58,7 +58,7 @@ HdfsRCFileScanner::~HdfsRCFileScanner() {
 }

 Status HdfsRCFileScanner::Prepare() {
-  RETURN_IF_ERROR(HdfsScanner::Prepare());
+  RETURN_IF_ERROR(BaseSequenceScanner::Prepare());

  text_converter_.reset(new TextConverter(0));

@@ -228,6 +228,12 @@ void HdfsRCFileScanner::ResetRowGroup() {
    columns_[i].current_field_len = 0;
    columns_[i].current_field_len_rep = 0;
  }
+
+  if (!context_->compact_data()) {
+    // We are done with this row group, pass along non-compact external buffers
+    context_->AcquirePool(data_buffer_pool_.get());
+    row_group_buffer_size_ = 0;
+  }
 }

 Status HdfsRCFileScanner::ReadRowGroup() {
@@ -236,7 +242,7 @@ Status HdfsRCFileScanner::ReadRowGroup() {
  while (num_rows_ == 0) {
    RETURN_IF_ERROR(ReadRowGroupHeader());
    RETURN_IF_ERROR(ReadKeyBuffers());
-    if (has_noncompact_strings_ || row_group_buffer_size_ < row_group_length_) {
+    if (context_->compact_data() || row_group_buffer_size_ < row_group_length_) {
      // Allocate a new buffer for reading the row group.  Row groups have a
      // fixed number of rows so take a guess at how big it will be based on
      // the previous row group size.
@@ -491,7 +497,7 @@ Status HdfsRCFileScanner::ProcessRange() {
            reinterpret_cast<const char*>(row_group_buffer_ + row_group_length_));

        if (!text_converter_->WriteSlot(slot_desc, tuple, 
-              col_start, field_len, !has_noncompact_strings_, false, pool)) {
+              col_start, field_len, context_->compact_data(), false, pool)) {
          ReportColumnParseError(slot_desc, col_start, field_len);
          error_in_row = true;
        }
--- a/be/src/exec/hdfs-rcfile-scanner.h
+++ b/be/src/exec/hdfs-rcfile-scanner.h
@@ -139,7 +139,7 @@
 //
 // rowgroup-key-length ::= Int
 //
-// -- Total compressed length in bytes of the rowgroup's key sections.
+// -- Total compressed length in bytes of the rowgroup's key sections.  
 //
 // rowgroup-compressed-key-length ::= Int
 //
--- a/be/src/exec/hdfs-sequence-scanner.cc
+++ b/be/src/exec/hdfs-sequence-scanner.cc
@@ -92,16 +92,13 @@ Status HdfsSequenceScanner::InitNewRange() {
 }

 Status HdfsSequenceScanner::Prepare() {
-  RETURN_IF_ERROR(HdfsScanner::Prepare());
+  RETURN_IF_ERROR(BaseSequenceScanner::Prepare());

  // Allocate the scratch space for two pass parsing.  The most fields we can go
  // through in one parse pass is the batch size (tuples) * the number of fields per tuple
  // TODO: This should probably be based on L2/L3 cache sizes (as should the batch size)
  record_locations_.resize(state_->batch_size());
  field_locations_.resize(state_->batch_size() * scan_node_->materialized_slots().size());
-  
-  decompress_timer_ = ADD_COUNTER(
-      scan_node_->runtime_profile(), "DecompressionTime", TCounterType::CPU_TICKS);
  return Status::OK;
 }

--- a/be/src/exec/hdfs-sequence-scanner.h
+++ b/be/src/exec/hdfs-sequence-scanner.h
@@ -247,9 +247,6 @@ class HdfsSequenceScanner : public BaseSequenceScanner {

  // Next record from block compressed data.
  uint8_t* next_record_in_compressed_block_;
-
-  // Time spent decompressing bytes
-  RuntimeProfile::Counter* decompress_timer_;
 };

 } // namespace impala
--- a/be/src/util/codec.h
+++ b/be/src/util/codec.h
@@ -84,14 +84,15 @@ class Codec {

  virtual ~Codec() {}

-  // Process a block of data.  The operator will allocate the output buffer
-  // if output_length is passed as 0 and return the length in output_length.
-  // If it is non-zero the length must be the correct size to hold the transformed output.
+  // Process a block of data, either compressing or decompressing it.  
+  // If *output_length is 0, the function will allocate from its mempool.  
+  // If *output_length is non-zero, it should be the length of *output and must
+  // be exactly the size of the transformed output.  
  // Inputs:
  //   input_length: length of the data to process
  //   input: data to process
  // In/Out:
-  //   output_length: Length of the output, if known, 0 otherwise.
+  //   output_length: Length of the output, if known, 0 otherwise.  
  // Output:
  //   output: Pointer to processed data
  virtual Status ProcessBlock(int input_length, uint8_t* input,
@@ -99,13 +100,14 @@ class Codec {

  // Return the name of a compression algorithm.
  static std::string GetCodecName(THdfsCompression::type);
-
- protected:
+  
  // Largest block we will compress/decompress: 2GB.
  // We are dealing with compressed blocks that are never this big but we
  // want to guard against a corrupt file that has the block length as some
  // large number.
  static const int MAX_BLOCK_SIZE = (2L * 1024 * 1024 * 1024) - 1;
+
+ protected:
  // Create a compression operator
  // Inputs:
  //   mem_pool: memory pool to allocate the output buffer, this implies that the
--- a/be/src/util/compress.h
+++ b/be/src/util/compress.h
@@ -33,7 +33,7 @@ class GzipCompressor : public Codec {
  GzipCompressor(MemPool* mem_pool, bool reuse_buffer, bool is_gzip);
  virtual ~GzipCompressor();

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);

@@ -51,7 +51,6 @@ class GzipCompressor : public Codec {
  // These are magic numbers from zlib.h.  Not clear why they are not defined there.
  const static int WINDOW_BITS = 15;    // Maximum window size
  const static int GZIP_CODEC = 16;     // Output Gzip.
-  
 };

 class BzipCompressor : public Codec {
@@ -59,12 +58,11 @@ class BzipCompressor : public Codec {
  BzipCompressor(MemPool* mem_pool, bool reuse_buffer);
  virtual ~BzipCompressor() { }

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);
  // Initialize the compressor.
  virtual Status Init() { return Status::OK; }
-
 };

 class SnappyBlockCompressor : public Codec {
@@ -72,14 +70,13 @@ class SnappyBlockCompressor : public Codec {
  SnappyBlockCompressor(MemPool* mem_pool, bool reuse_buffer);
  virtual ~SnappyBlockCompressor() { }

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);

 protected:
  // Snappy does not need initialization
  virtual Status Init() { return Status::OK; }
-
 };

 class SnappyCompressor : public Codec {
@@ -87,14 +84,13 @@ class SnappyCompressor : public Codec {
  SnappyCompressor(MemPool* mem_pool, bool reuse_buffer);
  virtual ~SnappyCompressor() { }

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);

 protected:
  // Snappy does not need initialization
  virtual Status Init() { return Status::OK; }
-
 };

 }
--- a/be/src/util/decompress.cc
+++ b/be/src/util/decompress.cc
@@ -27,7 +27,6 @@ using namespace std;
 using namespace boost;
 using namespace impala;

-//TODO: Decompressors should log their errors.
 GzipDecompressor::GzipDecompressor(MemPool* mem_pool, bool reuse_buffer)
  : Codec(mem_pool, reuse_buffer) {
  bzero(&stream_, sizeof(stream_));
@@ -87,7 +86,6 @@ Status GzipDecompressor::ProcessBlock(int input_length, uint8_t* input,
        }
        out_buffer_ = temp_memory_pool_.Allocate(buffer_length_);
        if (inflateReset(&stream_) != Z_OK) {
-          DCHECK(false);
          return Status("zlib inflateEnd failed: " + string(stream_.msg));
        }
        continue;
@@ -96,7 +94,6 @@ Status GzipDecompressor::ProcessBlock(int input_length, uint8_t* input,
    }
  }
  if (inflateReset(&stream_) != Z_OK) {
-          DCHECK(false);
    return Status("zlib inflateEnd failed: " + string(stream_.msg));
  }

@@ -204,57 +201,121 @@ SnappyBlockDecompressor::SnappyBlockDecompressor(MemPool* mem_pool, bool reuse_b
  : Codec(mem_pool, reuse_buffer) {
 }

-Status SnappyBlockDecompressor::ProcessBlock(int input_length, uint8_t* input,
-                                        int* output_length, uint8_t** output) {
-  // Hadoop uses a block compression scheme on top of snappy.  First there is
-  // an integer which is the size of the decompressed data followed by a
-  // sequence of compressed blocks each preceded with an integer size.
-  int32_t length = SerDeUtils::GetInt(input);
-  DCHECK(*output_length == 0 || length == *output_length);
+// Hadoop uses a block compression scheme on top of snappy.  As per the hadoop docs
+// the input is split into blocks.  Each block "contains the uncompressed length for
+// the block followed by one of more length-prefixed blocks of compressed data."
+// This is essentially blocks of blocks.  
+// The outer block consists of:
+//   - 4 byte little endian uncompressed_size
+//   < inner blocks >
+//   ... repeated until input_len is consumed ..
+// The inner blocks have:
+//   - 4-byte little endian compressed_size
+//   < snappy compressed block >
+//   - 4-byte little endian compressed_size
+//   < snappy compressed block >
+//   ... repeated until uncompressed_size from outer block is consumed ...

-  // If length is non-zero then the output has been allocated.
-  if (*output_length != 0) {
-    buffer_length_ = *output_length;
-    out_buffer_ = *output;
-  } else if (!reuse_buffer_ || out_buffer_ == NULL || buffer_length_ < length) {
-    buffer_length_ = length;
-    if (buffer_length_ > MAX_BLOCK_SIZE) {
-      return Status("Decompressor: block size is too big");
+// Utility function to decompress snappy block compressed data.  If size_only is true,
+// this function does not decompress but only computes the output size and writes 
+// the result to *output_len.  
+// If size_only is false, output must be preallocated to output_len and this needs to 
+// be exactly big enough to hold the decompressed output.
+// size_only is a O(1) operations (just reads a single varint for each snappy block).
+static Status SnappyBlockDecompress(int input_len, uint8_t* input, bool size_only,
+    int* output_len, char* output) {
+  
+  int uncompressed_total_len = 0;
+  while (input_len > 0) {
+    size_t uncompressed_block_len = SerDeUtils::GetInt(input);
+    input += sizeof(int32_t);
+    input_len -= sizeof(int32_t);
+
+    if (uncompressed_block_len > Codec::MAX_BLOCK_SIZE || uncompressed_block_len == 0) {
+      if (uncompressed_total_len == 0) {
+        return Status("Decompressor: block size is too big.  Data is likely corrupt.");
+      }
+      break;
+    }
+
+    if (!size_only) {
+      int remaining_output_size = *output_len - uncompressed_total_len;
+      DCHECK_GE(remaining_output_size, uncompressed_block_len);
+    }
+
+    while (uncompressed_block_len > 0) {
+      // Read the length of the next snappy compressed block.
+      size_t compressed_len = SerDeUtils::GetInt(input);
+      input += sizeof(int32_t);
+      input_len -= sizeof(int32_t);
+
+      if (compressed_len == 0 || compressed_len > input_len) {
+        if (uncompressed_total_len == 0) {
+          return Status(
+              "Decompressor: invalid compressed length.  Data is likely corrupt.");
+        }
+        input_len =0;
+        break;
+      }
+    
+      // Read how big the output will be.
+      size_t uncompressed_len;
+      if (!snappy::GetUncompressedLength(reinterpret_cast<char*>(input), 
+            input_len, &uncompressed_len)) {
+        if (uncompressed_total_len == 0) {
+          return Status("Snappy: GetUncompressedLength failed");
+        }
+        input_len =0;
+        break;
+      }
+      DCHECK_GT(uncompressed_len, 0);
+    
+      if (!size_only) {
+        // Decompress this snappy block
+        if (!snappy::RawUncompress(reinterpret_cast<char*>(input), 
+              compressed_len, output)) {
+          return Status("Snappy: RawUncompress failed");
+        }
+        output += uncompressed_len;
+      }
+    
+      input += compressed_len;
+      input_len -= compressed_len;
+      uncompressed_block_len -= uncompressed_len;
+      uncompressed_total_len += uncompressed_len;
    }
-    out_buffer_ = memory_pool_->Allocate(buffer_length_);
  }

-  input += sizeof(length);
-  input_length -= sizeof(length);
-
-  uint8_t* outp = out_buffer_;
-  do {
-    // Read the length of the next block.
-    length = SerDeUtils::GetInt(input);
-
-    if (length == 0) break;
-
-    input += sizeof(length);
-    input_length -= sizeof(length);
-
-    // Read how big the output will be.
-    size_t uncompressed_length;
-    if (!snappy::GetUncompressedLength(reinterpret_cast<const char*>(input),
-        input_length, &uncompressed_length)) {
-      return Status("Snappy: GetUncompressedLength failed");
-    }
-
-    DCHECK_GT(uncompressed_length, 0);
-    if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
-        static_cast<size_t>(length), reinterpret_cast<char*>(outp))) {
-      return Status("Snappy: RawUncompress failed");
-    }
-    input += length;
-    input_length -= length;
-    outp += uncompressed_length;
-  } while (input_length > 0);
-
-  *output = out_buffer_;
-  if (*output_length == 0) *output_length = outp - out_buffer_;
+  if (size_only) {
+    *output_len = uncompressed_total_len;
+  } else if (*output_len != uncompressed_total_len) {
+    return Status("Snappy: Decompressed size is not correct.");
+  }
+  return Status::OK;
+}
+
+Status SnappyBlockDecompressor::ProcessBlock(int input_len, uint8_t* input,
+    int* output_len, uint8_t** output) {
+  if (*output_len == 0) {
+    // If we don't know the size beforehand, compute it.
+    RETURN_IF_ERROR(SnappyBlockDecompress(input_len, input, true, output_len, NULL));
+    DCHECK_NE(*output_len, 0);
+    
+    if (!reuse_buffer_ || out_buffer_ == NULL || buffer_length_ < *output_len) {
+      // Need to allocate a new buffer
+      buffer_length_ = *output_len;
+      out_buffer_ = memory_pool_->Allocate(buffer_length_);
+    } 
+    *output = out_buffer_;
+  }
+  DCHECK(*output != NULL);
+  
+  if (*output_len > MAX_BLOCK_SIZE) {
+    // TODO: is this check really robust?
+    return Status("Decompressor: block size is too big.  Data is likely corrupt.");
+  }
+    
+  char* out_ptr = reinterpret_cast<char*>(*output);
+  RETURN_IF_ERROR(SnappyBlockDecompress(input_len, input, false, output_len, out_ptr));
  return Status::OK;
 }
--- a/be/src/util/decompress.h
+++ b/be/src/util/decompress.h
@@ -30,7 +30,7 @@ class GzipDecompressor : public Codec {
  GzipDecompressor(MemPool* mem_pool, bool reuse_buffer);
  virtual ~GzipDecompressor();

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);

@@ -51,13 +51,12 @@ class BzipDecompressor : public Codec {
  BzipDecompressor(MemPool* mem_pool, bool reuse_buffer);
  virtual ~BzipDecompressor() { }

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);
 protected:
  // Bzip does not need initialization
-  virtual Status Init() { return Status::OK;  }
-
+  virtual Status Init() { return Status::OK; }
 };

 class SnappyDecompressor : public Codec {
@@ -65,7 +64,7 @@ class SnappyDecompressor : public Codec {
  SnappyDecompressor(MemPool* mem_pool, bool reuse_buffer);
  virtual ~SnappyDecompressor() { }

-  //Process a block of data.
+  // Process a block of data.
  virtual Status ProcessBlock(int input_length, uint8_t* input,
                              int* output_length, uint8_t** output);

@@ -74,6 +73,7 @@ class SnappyDecompressor : public Codec {
  virtual Status Init() { return Status::OK; }

 };
+
 class SnappyBlockDecompressor : public Codec {
 public:
  SnappyBlockDecompressor(MemPool* mem_pool, bool reuse_buffer);
@@ -86,7 +86,6 @@ class SnappyBlockDecompressor : public Codec {
 protected:
  // Snappy does not need initialization
  virtual Status Init() { return Status::OK; }
-
 };

 }
--- a/tests/query_test/test_compressed_formats.py
+++ b/tests/query_test/test_compressed_formats.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # Copyright (c) 2012 Cloudera, Inc. All rights reserved.

+import pytest
 from os.path import join
 from subprocess import call
 from tests.common.test_vector import *
@@ -33,6 +34,7 @@ class TestCompressedFormats(ImpalaTestSuite):
    cls.TestMatrix.add_dimension(\
        TestDimension('compression_format', *compression_formats))

+  @pytest.mark.execute_serially
  def test_compressed_formats(self, vector):
    file_format = vector.get_value('file_format')
    extension, suffix = vector.get_value('compression_format')