Make CLZWDecoder decode in a single pass.

Currently, CLZWDecoder does decoding with 2 passes. The first decode
pass calculates how much space the decoded data takes. Then the caller
has to allocate a buffer of that size. The second decode pass fills in
the buffer with the decoded data.

This CL changes decoding to do all the work in a single pass. The decode
function allocates a small buffer and fills it while decoding. When the
buffer is full, the decode function increases the buffer size
geometrically. This improves overall rendering speed for LZW encoded
PDFs by as much as 4%.

Change-Id: Ieec2e1eda7347a46af434dfba0ac728361d3b5c2
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/57490
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fxcodec/flate/flatemodule.cpp b/core/fxcodec/flate/flatemodule.cpp
index 4173e70..31aa519 100644
--- a/core/fxcodec/flate/flatemodule.cpp
+++ b/core/fxcodec/flate/flatemodule.cpp
@@ -110,17 +110,23 @@
  public:
   CLZWDecoder(pdfium::span<const uint8_t> src_span, bool early_change);
 
-  bool Decode(uint8_t* dest_buf, uint32_t dest_size, uint32_t src_size);
+  bool Decode();
   uint32_t GetSrcSize() const { return (src_bit_pos_ + 7) / 8; }
   uint32_t GetDestSize() const { return dest_byte_pos_; }
+  std::unique_ptr<uint8_t, FxFreeDeleter> TakeDestBuf() {
+    return std::move(dest_buf_);
+  }
 
  private:
   void AddCode(uint32_t prefix_code, uint8_t append_char);
   void DecodeString(uint32_t code);
+  void ExpandDestBuf(uint32_t additional_size);
 
   pdfium::span<const uint8_t> const src_span_;
+  std::unique_ptr<uint8_t, FxFreeDeleter> dest_buf_;
   uint32_t src_bit_pos_ = 0;
-  uint32_t dest_byte_pos_ = 0;
+  uint32_t dest_buf_size_ = 0;  // Actual allocated size.
+  uint32_t dest_byte_pos_ = 0;  // Size used.
   uint32_t stack_len_ = 0;
   uint8_t decode_stack_[4000];
   const uint8_t early_change_;
@@ -165,13 +171,28 @@
   decode_stack_[stack_len_++] = static_cast<uint8_t>(code);
 }
 
-bool CLZWDecoder::Decode(uint8_t* dest_buf,
-                         uint32_t dest_size,
-                         uint32_t src_size) {
+void CLZWDecoder::ExpandDestBuf(uint32_t additional_size) {
+  FX_SAFE_UINT32 new_size = std::max(dest_buf_size_ / 2, additional_size);
+  new_size += dest_buf_size_;
+  if (!new_size.IsValid()) {
+    dest_buf_.reset();
+    return;
+  }
+
+  dest_buf_size_ = new_size.ValueOrDie();
+  dest_buf_.reset(FX_Realloc(uint8_t, dest_buf_.release(), dest_buf_size_));
+}
+
+bool CLZWDecoder::Decode() {
   uint32_t old_code = 0xFFFFFFFF;
   uint8_t last_char = 0;
+
+  // In one PDF test set, 40% of Decode() calls did not need to realloc with
+  // this size.
+  dest_buf_size_ = 512;
+  dest_buf_.reset(FX_Alloc(uint8_t, dest_buf_size_));
   while (1) {
-    if (src_bit_pos_ + code_len_ > src_size * 8)
+    if (src_bit_pos_ + code_len_ > src_span_.size() * 8)
       break;
 
     int byte_pos = src_bit_pos_ / 8;
@@ -193,11 +214,13 @@
     src_bit_pos_ += code_len_;
 
     if (code < 256) {
-      if (dest_byte_pos_ == dest_size)
-        return false;
+      if (dest_byte_pos_ >= dest_buf_size_) {
+        ExpandDestBuf(dest_byte_pos_ - dest_buf_size_ + 1);
+        if (!dest_buf_)
+          return false;
+      }
 
-      if (dest_buf)
-        dest_buf[dest_byte_pos_] = (uint8_t)code;
+      dest_buf_.get()[dest_byte_pos_] = (uint8_t)code;
       dest_byte_pos_++;
       last_char = (uint8_t)code;
       if (old_code != 0xFFFFFFFF)
@@ -228,15 +251,20 @@
       DecodeString(code);
     }
 
-    FX_SAFE_UINT32 required_size = dest_byte_pos_;
-    required_size += stack_len_;
-    if (!required_size.IsValid() || required_size.ValueOrDie() > dest_size)
+    FX_SAFE_UINT32 safe_required_size = dest_byte_pos_;
+    safe_required_size += stack_len_;
+    if (!safe_required_size.IsValid())
       return false;
 
-    if (dest_buf) {
-      for (uint32_t i = 0; i < stack_len_; i++)
-        dest_buf[dest_byte_pos_ + i] = decode_stack_[stack_len_ - i - 1];
+    uint32_t required_size = safe_required_size.ValueOrDie();
+    if (required_size > dest_buf_size_) {
+      ExpandDestBuf(required_size - dest_buf_size_);
+      if (!dest_buf_)
+        return false;
     }
+
+    for (uint32_t i = 0; i < stack_len_; i++)
+      dest_buf_.get()[dest_byte_pos_ + i] = decode_stack_[stack_len_ - i - 1];
     dest_byte_pos_ += stack_len_;
     last_char = decode_stack_[stack_len_ - 1];
     if (old_code >= 258 && old_code - 258 >= current_code_)
@@ -790,16 +818,12 @@
 
   if (bLZW) {
     auto decoder = pdfium::MakeUnique<CLZWDecoder>(src_span, bEarlyChange);
-    *dest_size = 0xFFFFFFFF;
-    offset = src_span.size();
-    if (!decoder->Decode(nullptr, *dest_size, offset))
+    if (!decoder->Decode())
       return FX_INVALID_OFFSET;
 
     offset = decoder->GetSrcSize();
     *dest_size = decoder->GetDestSize();
-    decoder = pdfium::MakeUnique<CLZWDecoder>(src_span, bEarlyChange);
-    dest_buf->reset(FX_Alloc(uint8_t, *dest_size));
-    decoder->Decode(dest_buf->get(), *dest_size, offset);
+    *dest_buf = decoder->TakeDestBuf();
   } else {
     FlateUncompress(src_span, estimated_size, dest_buf, dest_size, &offset);
   }