Fix parsing of PDFs with multiple cross reference streams

The fix for crbug.com/40932618 removed the ability to follow a chain of
trailers in hybrid-reference files. While that met the spec, it
regressed other PDFs because there was no call to code that follows the
chain of cross reference streams in hybrid-reference files. Add that
missing code to fix the regression.

Bug: 332462378
Change-Id: I520994ec2273f608f63849a3abdd41005abc19df
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/118330
Reviewed-by: Thomas Sepez <tsepez@google.com>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 50782dc..dcdcb7b 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -387,43 +387,52 @@
     m_CrossRefTable->SetObjectMapSize(xrefsize);
 
   FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
+  if (xref_stm > 0) {
+    // In hybrid-reference files, use /Prev from the cross-reference stream and
+    // ignore the /Prev in the trailer.
+    // See ISO 32000-1:2008 spec, table 17.
+    if (!LoadAllSecondaryCrossRefStreams(xref_stm)) {
+      return false;
+    }
+
+    // Then load the reference table in the trailer.
+    return LoadCrossRefTable(xref_offset, /*skip=*/false);
+  }
+
+  // Otherwise, follow the /Prev from the trailer.
   std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
   std::vector<FX_FILESIZE> xref_list{xref_offset};
   std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
 
-  // Ignore /Prev for hybrid-reference files.
-  // See ISO 32000-1:2008 spec, table 17.
-  if (!xref_stm) {
-    // When the trailer doesn't have Prev entry or Prev entry value is not
-    // numerical, GetDirectInteger() returns 0. Loading will end.
-    xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
-    while (xref_offset > 0) {
-      // Check for circular references.
-      if (pdfium::Contains(seen_xref_offset, xref_offset)) {
-        return false;
-      }
-
-      seen_xref_offset.insert(xref_offset);
-      xref_list.insert(xref_list.begin(), xref_offset);
-
-      // SLOW ...
-      LoadCrossRefTable(xref_offset, /*skip=*/true);
-
-      RetainPtr<CPDF_Dictionary> trailer_dict = LoadTrailer();
-      if (!trailer_dict) {
-        return false;
-      }
-
-      xref_offset = trailer_dict->GetDirectIntegerFor("Prev");
-      xref_stm = trailer_dict->GetIntegerFor("XRefStm");
-      xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
-
-      // SLOW ...
-      m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
-          std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
-                                               kNoTrailerObjectNumber),
-          std::move(m_CrossRefTable));
+  // When the trailer doesn't have Prev entry or Prev entry value is not
+  // numerical, GetDirectInteger() returns 0. Loading will end.
+  xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
+  while (xref_offset > 0) {
+    // Check for circular references.
+    if (pdfium::Contains(seen_xref_offset, xref_offset)) {
+      return false;
     }
+
+    seen_xref_offset.insert(xref_offset);
+    xref_list.insert(xref_list.begin(), xref_offset);
+
+    // SLOW ...
+    LoadCrossRefTable(xref_offset, /*skip=*/true);
+
+    RetainPtr<CPDF_Dictionary> trailer_dict = LoadTrailer();
+    if (!trailer_dict) {
+      return false;
+    }
+
+    xref_offset = trailer_dict->GetDirectIntegerFor("Prev");
+    xref_stm = trailer_dict->GetIntegerFor("XRefStm");
+    xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
+
+    // SLOW ...
+    m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
+        std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
+                                             kNoTrailerObjectNumber),
+        std::move(m_CrossRefTable));
   }
 
   for (size_t i = 0; i < xref_list.size(); ++i) {
@@ -685,6 +694,16 @@
     return false;
   }
 
+  if (!LoadAllSecondaryCrossRefStreams(xref_offset)) {
+    return false;
+  }
+
+  m_ObjectStreamMap.clear();
+  m_bXRefStream = true;
+  return true;
+}
+
+bool CPDF_Parser::LoadAllSecondaryCrossRefStreams(FX_FILESIZE xref_offset) {
   std::set<FX_FILESIZE> seen_xref_offset;
   while (xref_offset > 0) {
     seen_xref_offset.insert(xref_offset);
@@ -710,8 +729,6 @@
       return false;
     }
   }
-  m_ObjectStreamMap.clear();
-  m_bXRefStream = true;
   return true;
 }
 
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 4bf92fd..aaed8bc 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -145,6 +145,7 @@
 
   bool LoadAllCrossRefTable(FX_FILESIZE xref_offset);
   bool LoadAllCrossRefStream(FX_FILESIZE xref_offset);
+  bool LoadAllSecondaryCrossRefStreams(FX_FILESIZE xref_offset);
   bool LoadCrossRefStream(FX_FILESIZE* pos, bool is_main_xref);
   void ProcessCrossRefStreamEntry(pdfium::span<const uint8_t> entry_span,
                                   pdfium::span<const uint32_t> field_widths,
diff --git a/testing/SUPPRESSIONS b/testing/SUPPRESSIONS
index f7b7efa..96c84d12 100644
--- a/testing/SUPPRESSIONS
+++ b/testing/SUPPRESSIONS
@@ -662,9 +662,6 @@
 # TODO(chromium:237527): Remove after associated bug is fixed
 bug_237527_1.in * * * *
 
-# TODO(crbug.com/332462378): Remove after associated bug is fixed
-bug_332462378.pdf * * * *
-
 # TODO(chromium:451366): Remove after associated bug is fixed
 bug_451366.in * * * agg