Fix parsing of PDFs with multiple cross reference streams
The fix for crbug.com/40932618 removed the ability to follow a chain of
trailers in hybrid-reference files. While that met the spec, it
regressed other PDFs because there was no call to code that follows the
chain of cross reference streams in hybrid-reference files. Add that
missing code to fix the regression.
Bug: 332462378
Change-Id: I520994ec2273f608f63849a3abdd41005abc19df
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/118330
Reviewed-by: Thomas Sepez <tsepez@google.com>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 50782dc..dcdcb7b 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -387,43 +387,52 @@
m_CrossRefTable->SetObjectMapSize(xrefsize);
FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
+ if (xref_stm > 0) {
+ // In hybrid-reference files, use /Prev from the cross-reference stream and
+ // ignore the /Prev in the trailer.
+ // See ISO 32000-1:2008 spec, table 17.
+ if (!LoadAllSecondaryCrossRefStreams(xref_stm)) {
+ return false;
+ }
+
+ // Then load the reference table in the trailer.
+ return LoadCrossRefTable(xref_offset, /*skip=*/false);
+ }
+
+ // Otherwise, follow the /Prev from the trailer.
std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
std::vector<FX_FILESIZE> xref_list{xref_offset};
std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
- // Ignore /Prev for hybrid-reference files.
- // See ISO 32000-1:2008 spec, table 17.
- if (!xref_stm) {
- // When the trailer doesn't have Prev entry or Prev entry value is not
- // numerical, GetDirectInteger() returns 0. Loading will end.
- xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
- while (xref_offset > 0) {
- // Check for circular references.
- if (pdfium::Contains(seen_xref_offset, xref_offset)) {
- return false;
- }
-
- seen_xref_offset.insert(xref_offset);
- xref_list.insert(xref_list.begin(), xref_offset);
-
- // SLOW ...
- LoadCrossRefTable(xref_offset, /*skip=*/true);
-
- RetainPtr<CPDF_Dictionary> trailer_dict = LoadTrailer();
- if (!trailer_dict) {
- return false;
- }
-
- xref_offset = trailer_dict->GetDirectIntegerFor("Prev");
- xref_stm = trailer_dict->GetIntegerFor("XRefStm");
- xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
-
- // SLOW ...
- m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
- kNoTrailerObjectNumber),
- std::move(m_CrossRefTable));
+ // When the trailer doesn't have Prev entry or Prev entry value is not
+ // numerical, GetDirectInteger() returns 0. Loading will end.
+ xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
+ while (xref_offset > 0) {
+ // Check for circular references.
+ if (pdfium::Contains(seen_xref_offset, xref_offset)) {
+ return false;
}
+
+ seen_xref_offset.insert(xref_offset);
+ xref_list.insert(xref_list.begin(), xref_offset);
+
+ // SLOW ...
+ LoadCrossRefTable(xref_offset, /*skip=*/true);
+
+ RetainPtr<CPDF_Dictionary> trailer_dict = LoadTrailer();
+ if (!trailer_dict) {
+ return false;
+ }
+
+ xref_offset = trailer_dict->GetDirectIntegerFor("Prev");
+ xref_stm = trailer_dict->GetIntegerFor("XRefStm");
+ xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
+
+ // SLOW ...
+ m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
+ std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
+ kNoTrailerObjectNumber),
+ std::move(m_CrossRefTable));
}
for (size_t i = 0; i < xref_list.size(); ++i) {
@@ -685,6 +694,16 @@
return false;
}
+ if (!LoadAllSecondaryCrossRefStreams(xref_offset)) {
+ return false;
+ }
+
+ m_ObjectStreamMap.clear();
+ m_bXRefStream = true;
+ return true;
+}
+
+bool CPDF_Parser::LoadAllSecondaryCrossRefStreams(FX_FILESIZE xref_offset) {
std::set<FX_FILESIZE> seen_xref_offset;
while (xref_offset > 0) {
seen_xref_offset.insert(xref_offset);
@@ -710,8 +729,6 @@
return false;
}
}
- m_ObjectStreamMap.clear();
- m_bXRefStream = true;
return true;
}
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 4bf92fd..aaed8bc 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -145,6 +145,7 @@
bool LoadAllCrossRefTable(FX_FILESIZE xref_offset);
bool LoadAllCrossRefStream(FX_FILESIZE xref_offset);
+ bool LoadAllSecondaryCrossRefStreams(FX_FILESIZE xref_offset);
bool LoadCrossRefStream(FX_FILESIZE* pos, bool is_main_xref);
void ProcessCrossRefStreamEntry(pdfium::span<const uint8_t> entry_span,
pdfium::span<const uint32_t> field_widths,
diff --git a/testing/SUPPRESSIONS b/testing/SUPPRESSIONS
index f7b7efa..96c84d12 100644
--- a/testing/SUPPRESSIONS
+++ b/testing/SUPPRESSIONS
@@ -662,9 +662,6 @@
# TODO(chromium:237527): Remove after associated bug is fixed
bug_237527_1.in * * * *
-# TODO(crbug.com/332462378): Remove after associated bug is fixed
-bug_332462378.pdf * * * *
-
# TODO(chromium:451366): Remove after associated bug is fixed
bug_451366.in * * * agg