Split PDF_DataDecode() in two

Currently PDF_DataDecode() does two separate tasks. First, it generates
|DecoderArray| from the object's parameters, checking its validity. Then
it runs the decoders from the array in order on the image's data.

Refactoring this function into two separate functions will simplify
PDF_DataDecode() while making it easy to alter the array of decoders in
the future.

Bug: pdfium:1327
Change-Id: Idf9a78bda98f5d5cbcacea3cbe13e84c0211ebfa
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/56170
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_stream_acc.cpp b/core/fpdfapi/parser/cpdf_stream_acc.cpp
index fd5b857..2666fe6 100644
--- a/core/fpdfapi/parser/cpdf_stream_acc.cpp
+++ b/core/fpdfapi/parser/cpdf_stream_acc.cpp
@@ -7,6 +7,7 @@
 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
 
 #include <utility>
+#include <vector>
 
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
@@ -118,8 +119,14 @@
 
   std::unique_ptr<uint8_t, FxFreeDeleter> pDecodedData;
   uint32_t dwDecodedSize = 0;
-  if (!PDF_DataDecode({pSrcData.Get(), dwSrcSize}, m_pStream->GetDict(),
-                      estimated_size, bImageAcc, &pDecodedData, &dwDecodedSize,
+
+  Optional<std::vector<std::pair<ByteString, const CPDF_Object*>>>
+      decoder_array = GetDecoderArray(m_pStream->GetDict());
+  if (!decoder_array.has_value())
+    return;
+
+  if (!PDF_DataDecode({pSrcData.Get(), dwSrcSize}, estimated_size, bImageAcc,
+                      decoder_array.value(), &pDecodedData, &dwDecodedSize,
                       &m_ImageDecoder, &m_pImageParam)) {
     m_pData = std::move(pSrcData);
     m_dwSize = dwSrcSize;
diff --git a/core/fpdfapi/parser/fpdf_parser_decode.cpp b/core/fpdfapi/parser/fpdf_parser_decode.cpp
index 0dccfbd..ed3d08a 100644
--- a/core/fpdfapi/parser/fpdf_parser_decode.cpp
+++ b/core/fpdfapi/parser/fpdf_parser_decode.cpp
@@ -365,46 +365,52 @@
                                        estimated_size, dest_buf, dest_size);
 }
 
-bool PDF_DataDecode(pdfium::span<const uint8_t> src_span,
-                    const CPDF_Dictionary* pDict,
-                    uint32_t last_estimated_size,
-                    bool bImageAcc,
-                    std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
-                    uint32_t* dest_size,
-                    ByteString* ImageEncoding,
-                    UnownedPtr<const CPDF_Dictionary>* pImageParams) {
+Optional<std::vector<std::pair<ByteString, const CPDF_Object*>>>
+GetDecoderArray(const CPDF_Dictionary* pDict) {
   const CPDF_Object* pDecoder = pDict->GetDirectObjectFor("Filter");
   if (!pDecoder || (!pDecoder->IsArray() && !pDecoder->IsName()))
-    return false;
+    return {};
 
   const CPDF_Object* pParams =
       pDict->GetDirectObjectFor(pdfium::stream::kDecodeParms);
 
-  std::vector<std::pair<ByteString, const CPDF_Object*>> DecoderArray;
+  std::vector<std::pair<ByteString, const CPDF_Object*>> decoder_array;
   if (const CPDF_Array* pDecoders = pDecoder->AsArray()) {
     if (!ValidateDecoderPipeline(pDecoders))
-      return false;
+      return {};
 
     const CPDF_Array* pParamsArray = ToArray(pParams);
     for (size_t i = 0; i < pDecoders->size(); ++i) {
-      DecoderArray.push_back(
+      decoder_array.push_back(
           {pDecoders->GetStringAt(i),
            pParamsArray ? pParamsArray->GetDictAt(i) : nullptr});
     }
   } else {
-    DecoderArray.push_back(
+    decoder_array.push_back(
         {pDecoder->GetString(), pParams ? pParams->GetDict() : nullptr});
   }
 
+  return decoder_array;
+}
+
+bool PDF_DataDecode(
+    pdfium::span<const uint8_t> src_span,
+    uint32_t last_estimated_size,
+    bool bImageAcc,
+    const std::vector<std::pair<ByteString, const CPDF_Object*>>& decoder_array,
+    std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
+    uint32_t* dest_size,
+    ByteString* ImageEncoding,
+    UnownedPtr<const CPDF_Dictionary>* pImageParams) {
   std::unique_ptr<uint8_t, FxFreeDeleter> result;
   // May be changed to point to |result| in the for-loop below. So put it below
   // |result| and let it get destroyed first.
   pdfium::span<const uint8_t> last_span = src_span;
-  size_t nSize = DecoderArray.size();
+  size_t nSize = decoder_array.size();
   for (size_t i = 0; i < nSize; ++i) {
     int estimated_size = i == nSize - 1 ? last_estimated_size : 0;
-    ByteString decoder = DecoderArray[i].first;
-    const CPDF_Dictionary* pParam = ToDictionary(DecoderArray[i].second);
+    ByteString decoder = decoder_array[i].first;
+    const CPDF_Dictionary* pParam = ToDictionary(decoder_array[i].second);
     std::unique_ptr<uint8_t, FxFreeDeleter> new_buf;
     uint32_t new_size = 0xFFFFFFFF;
     uint32_t offset = FX_INVALID_OFFSET;
diff --git a/core/fpdfapi/parser/fpdf_parser_decode.h b/core/fpdfapi/parser/fpdf_parser_decode.h
index 6442bf1..197fe07 100644
--- a/core/fpdfapi/parser/fpdf_parser_decode.h
+++ b/core/fpdfapi/parser/fpdf_parser_decode.h
@@ -8,14 +8,18 @@
 #define CORE_FPDFAPI_PARSER_FPDF_PARSER_DECODE_H_
 
 #include <memory>
+#include <utility>
+#include <vector>
 
 #include "core/fxcrt/fx_memory.h"
 #include "core/fxcrt/fx_string.h"
 #include "core/fxcrt/unowned_ptr.h"
+#include "third_party/base/optional.h"
 #include "third_party/base/span.h"
 
 class CPDF_Array;
 class CPDF_Dictionary;
+class CPDF_Object;
 
 namespace fxcodec {
 class ScanlineDecoder;
@@ -71,13 +75,17 @@
                           std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
                           uint32_t* dest_size);
 
-bool PDF_DataDecode(pdfium::span<const uint8_t> src_span,
-                    const CPDF_Dictionary* pDict,
-                    uint32_t estimated_size,
-                    bool bImageAcc,
-                    std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
-                    uint32_t* dest_size,
-                    ByteString* ImageEncoding,
-                    UnownedPtr<const CPDF_Dictionary>* pImageParms);
+Optional<std::vector<std::pair<ByteString, const CPDF_Object*>>>
+GetDecoderArray(const CPDF_Dictionary* pDict);
+
+bool PDF_DataDecode(
+    pdfium::span<const uint8_t> src_span,
+    uint32_t estimated_size,
+    bool bImageAcc,
+    const std::vector<std::pair<ByteString, const CPDF_Object*>>& decoder_array,
+    std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
+    uint32_t* dest_size,
+    ByteString* ImageEncoding,
+    UnownedPtr<const CPDF_Dictionary>* pImageParms);
 
 #endif  // CORE_FPDFAPI_PARSER_FPDF_PARSER_DECODE_H_