Save the trailer's object number when parsing
CPDF_CrossRefTable stores a CPDF_Dictionary for the trailer dictionary
object. For V5 trailers, the dictionary may be part of a stream object.
Since the dictionary is inline, the CPDF_Dictionary that represents it
does not know the object number. store this object number in
CPDF_CrossRefTable, alongside the CPDF_Dictionary, and expose it in a
pass-through method in CPDF_Parser.
Having this object number will make PDF object tree traversal return
more accurate results in an upcoming CL.
Bug: chromium:1428724,pdfium:1409
Change-Id: I86f980a09d2214c50412ce65a905dd92ebc85a6a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105610
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
index 2474931..0bb5bb6 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
@@ -27,8 +27,10 @@
CPDF_CrossRefTable::CPDF_CrossRefTable() = default;
-CPDF_CrossRefTable::CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer)
- : trailer_(std::move(trailer)) {}
+CPDF_CrossRefTable::CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number)
+ : trailer_(std::move(trailer)),
+ trailer_object_number_(trailer_object_number) {}
CPDF_CrossRefTable::~CPDF_CrossRefTable() = default;
@@ -90,8 +92,10 @@
info.pos = 0;
}
-void CPDF_CrossRefTable::SetTrailer(RetainPtr<CPDF_Dictionary> trailer) {
+void CPDF_CrossRefTable::SetTrailer(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number) {
trailer_ = std::move(trailer);
+ trailer_object_number_ = trailer_object_number;
}
const CPDF_CrossRefTable::ObjectInfo* CPDF_CrossRefTable::GetObjectInfo(
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.h b/core/fpdfapi/parser/cpdf_cross_ref_table.h
index c1d639f..246e129 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.h
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.h
@@ -49,7 +49,8 @@
std::unique_ptr<CPDF_CrossRefTable> top);
CPDF_CrossRefTable();
- explicit CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer);
+ CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number);
~CPDF_CrossRefTable();
void AddCompressed(uint32_t obj_num,
@@ -58,7 +59,9 @@
void AddNormal(uint32_t obj_num, uint16_t gen_num, FX_FILESIZE pos);
void SetFree(uint32_t obj_num);
- void SetTrailer(RetainPtr<CPDF_Dictionary> trailer);
+ void SetTrailer(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number);
+ uint32_t trailer_object_number() const { return trailer_object_number_; }
const CPDF_Dictionary* trailer() const { return trailer_.Get(); }
CPDF_Dictionary* GetMutableTrailerForTesting() { return trailer_.Get(); }
@@ -77,6 +80,10 @@
void UpdateTrailer(RetainPtr<CPDF_Dictionary> new_trailer);
RetainPtr<CPDF_Dictionary> trailer_;
+ // `trailer_` can be the dictionary part of a XRef stream object. Since it is
+ // inline, it has no object number. Store the stream's object number, or 0 if
+ // there is none.
+ uint32_t trailer_object_number_ = 0;
std::map<uint32_t, ObjectInfo> objects_info_;
};
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index e2bbfbf..302aec1 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -51,6 +51,9 @@
// dictionary.
constexpr size_t kMinFieldCount = 3;
+// V4 trailers are inline.
+constexpr uint32_t kNoV4TrailerObjectNumber = 0;
+
struct CrossRefV5IndexEntry {
uint32_t start_obj_num;
uint32_t obj_count;
@@ -378,7 +381,7 @@
if (!trailer)
return false;
- m_CrossRefTable->SetTrailer(std::move(trailer));
+ m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
ShrinkObjectMap(xrefsize);
@@ -412,7 +415,8 @@
// SLOW ...
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(pDict)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
+ kNoV4TrailerObjectNumber),
std::move(m_CrossRefTable));
}
@@ -451,7 +455,8 @@
// Merge the trailers.
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
+ kNoV4TrailerObjectNumber),
std::move(m_CrossRefTable));
// Now GetTrailer() returns the merged trailer, where /Prev is from the
@@ -478,7 +483,8 @@
// SLOW ...
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(pDict)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
+ kNoV4TrailerObjectNumber),
std::move(m_CrossRefTable));
}
@@ -693,11 +699,17 @@
} else if (word == "trailer") {
RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
if (pTrailer) {
+ CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
+ // Grab the object number from `pTrailer` before potentially calling
+ // std::move(pTrailer) below.
+ const uint32_t trailer_object_number = pTrailer->GetObjNum();
+ RetainPtr<CPDF_Dictionary> trailer_dict =
+ stream_trailer ? stream_trailer->GetMutableDict()
+ : ToDictionary(std::move(pTrailer));
cross_ref_table = CPDF_CrossRefTable::MergeUp(
std::move(cross_ref_table),
- std::make_unique<CPDF_CrossRefTable>(ToDictionary(
- pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone()
- : std::move(pTrailer))));
+ std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
+ trailer_object_number));
}
} else if (word == "obj" && numbers.size() == 2u) {
const FX_FILESIZE obj_pos = numbers[0].second;
@@ -713,7 +725,8 @@
cross_ref_table = CPDF_CrossRefTable::MergeUp(
std::move(cross_ref_table),
std::make_unique<CPDF_CrossRefTable>(
- ToDictionary(pStream->GetDict()->Clone())));
+ ToDictionary(pStream->GetDict()->Clone()),
+ pStream->GetObjNum()));
}
if (obj_num < kMaxObjectNumber) {
@@ -763,12 +776,13 @@
RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
if (bMainXRef) {
- m_CrossRefTable =
- std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer));
+ m_CrossRefTable = std::make_unique<CPDF_CrossRefTable>(
+ std::move(pNewTrailer), pStream->GetObjNum());
m_CrossRefTable->ShrinkObjectMap(size);
} else {
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer),
+ pStream->GetObjNum()),
std::move(m_CrossRefTable));
}
@@ -912,6 +926,10 @@
return m_CrossRefTable->GetMutableTrailerForTesting();
}
+uint32_t CPDF_Parser::GetTrailerObjectNumber() const {
+ return m_CrossRefTable->trailer_object_number();
+}
+
RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
return m_CrossRefTable->trailer()
? ToDictionary(m_CrossRefTable->trailer()->Clone())
@@ -1076,7 +1094,7 @@
if (!trailer)
return SUCCESS;
- m_CrossRefTable->SetTrailer(std::move(trailer));
+ m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
if (xrefsize > 0) {
// Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 5d036a2..21dd8aa 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -79,6 +79,7 @@
const CPDF_Dictionary* GetTrailer() const;
CPDF_Dictionary* GetMutableTrailerForTesting();
+ uint32_t GetTrailerObjectNumber() const;
// Returns a new trailer which combines the last read trailer with the /Root
// and /Info from previous ones.
diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
index da68641..efde90e 100644
--- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
@@ -102,6 +102,10 @@
EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
for (size_t i = 0; i < std::size(versions); ++i)
EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
+
+ const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+ ASSERT_TRUE(cross_ref_table);
+ EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
}
TEST(ParserTest, RebuildCrossRefFailed) {
@@ -334,6 +338,10 @@
CPDF_TestParser parser;
parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
EXPECT_TRUE(parser.ParseLinearizedHeader());
+
+ const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+ ASSERT_TRUE(cross_ref_table);
+ EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
}
TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
@@ -434,8 +442,11 @@
"%%EOF\n";
ASSERT_TRUE(parser.InitTestFromBuffer(kData));
EXPECT_EQ(CPDF_Parser::SUCCESS, parser.StartParseInternal());
- ASSERT_TRUE(parser.GetCrossRefTable());
- const auto& objects_info = parser.GetCrossRefTable()->objects_info();
+
+ const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+ ASSERT_TRUE(cross_ref_table);
+ EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
+ const auto& objects_info = cross_ref_table->objects_info();
EXPECT_EQ(2u, objects_info.size());
// Skip over the first object, and continue parsing the remaining objects.