Save the trailer's object number when parsing

CPDF_CrossRefTable stores a CPDF_Dictionary for the trailer dictionary
object. For V5 trailers, the dictionary may be part of a stream object.
Since the dictionary is inline, the CPDF_Dictionary that represents it
does not know the object number. store this object number in
CPDF_CrossRefTable, alongside the CPDF_Dictionary, and expose it in a
pass-through method in CPDF_Parser.

Having this object number will make PDF object tree traversal return
more accurate results in an upcoming CL.

Bug: chromium:1428724,pdfium:1409
Change-Id: I86f980a09d2214c50412ce65a905dd92ebc85a6a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105610
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
index 2474931..0bb5bb6 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
@@ -27,8 +27,10 @@
 
 CPDF_CrossRefTable::CPDF_CrossRefTable() = default;
 
-CPDF_CrossRefTable::CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer)
-    : trailer_(std::move(trailer)) {}
+CPDF_CrossRefTable::CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer,
+                                       uint32_t trailer_object_number)
+    : trailer_(std::move(trailer)),
+      trailer_object_number_(trailer_object_number) {}
 
 CPDF_CrossRefTable::~CPDF_CrossRefTable() = default;
 
@@ -90,8 +92,10 @@
   info.pos = 0;
 }
 
-void CPDF_CrossRefTable::SetTrailer(RetainPtr<CPDF_Dictionary> trailer) {
+void CPDF_CrossRefTable::SetTrailer(RetainPtr<CPDF_Dictionary> trailer,
+                                    uint32_t trailer_object_number) {
   trailer_ = std::move(trailer);
+  trailer_object_number_ = trailer_object_number;
 }
 
 const CPDF_CrossRefTable::ObjectInfo* CPDF_CrossRefTable::GetObjectInfo(
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.h b/core/fpdfapi/parser/cpdf_cross_ref_table.h
index c1d639f..246e129 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.h
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.h
@@ -49,7 +49,8 @@
       std::unique_ptr<CPDF_CrossRefTable> top);
 
   CPDF_CrossRefTable();
-  explicit CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer);
+  CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer,
+                     uint32_t trailer_object_number);
   ~CPDF_CrossRefTable();
 
   void AddCompressed(uint32_t obj_num,
@@ -58,7 +59,9 @@
   void AddNormal(uint32_t obj_num, uint16_t gen_num, FX_FILESIZE pos);
   void SetFree(uint32_t obj_num);
 
-  void SetTrailer(RetainPtr<CPDF_Dictionary> trailer);
+  void SetTrailer(RetainPtr<CPDF_Dictionary> trailer,
+                  uint32_t trailer_object_number);
+  uint32_t trailer_object_number() const { return trailer_object_number_; }
   const CPDF_Dictionary* trailer() const { return trailer_.Get(); }
   CPDF_Dictionary* GetMutableTrailerForTesting() { return trailer_.Get(); }
 
@@ -77,6 +80,10 @@
   void UpdateTrailer(RetainPtr<CPDF_Dictionary> new_trailer);
 
   RetainPtr<CPDF_Dictionary> trailer_;
+  // `trailer_` can be the dictionary part of a XRef stream object. Since it is
+  // inline, it has no object number. Store the stream's object number, or 0 if
+  // there is none.
+  uint32_t trailer_object_number_ = 0;
   std::map<uint32_t, ObjectInfo> objects_info_;
 };
 
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index e2bbfbf..302aec1 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -51,6 +51,9 @@
 // dictionary.
 constexpr size_t kMinFieldCount = 3;
 
+// V4 trailers are inline.
+constexpr uint32_t kNoV4TrailerObjectNumber = 0;
+
 struct CrossRefV5IndexEntry {
   uint32_t start_obj_num;
   uint32_t obj_count;
@@ -378,7 +381,7 @@
   if (!trailer)
     return false;
 
-  m_CrossRefTable->SetTrailer(std::move(trailer));
+  m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
   const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
     ShrinkObjectMap(xrefsize);
@@ -412,7 +415,8 @@
 
     // SLOW ...
     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
-        std::make_unique<CPDF_CrossRefTable>(std::move(pDict)),
+        std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
+                                             kNoV4TrailerObjectNumber),
         std::move(m_CrossRefTable));
   }
 
@@ -451,7 +455,8 @@
 
   // Merge the trailers.
   m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
-      std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer)),
+      std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
+                                           kNoV4TrailerObjectNumber),
       std::move(m_CrossRefTable));
 
   // Now GetTrailer() returns the merged trailer, where /Prev is from the
@@ -478,7 +483,8 @@
 
     // SLOW ...
     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
-        std::make_unique<CPDF_CrossRefTable>(std::move(pDict)),
+        std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
+                                             kNoV4TrailerObjectNumber),
         std::move(m_CrossRefTable));
   }
 
@@ -693,11 +699,17 @@
     } else if (word == "trailer") {
       RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
       if (pTrailer) {
+        CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
+        // Grab the object number from `pTrailer` before potentially calling
+        // std::move(pTrailer) below.
+        const uint32_t trailer_object_number = pTrailer->GetObjNum();
+        RetainPtr<CPDF_Dictionary> trailer_dict =
+            stream_trailer ? stream_trailer->GetMutableDict()
+                           : ToDictionary(std::move(pTrailer));
         cross_ref_table = CPDF_CrossRefTable::MergeUp(
             std::move(cross_ref_table),
-            std::make_unique<CPDF_CrossRefTable>(ToDictionary(
-                pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone()
-                                     : std::move(pTrailer))));
+            std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
+                                                 trailer_object_number));
       }
     } else if (word == "obj" && numbers.size() == 2u) {
       const FX_FILESIZE obj_pos = numbers[0].second;
@@ -713,7 +725,8 @@
         cross_ref_table = CPDF_CrossRefTable::MergeUp(
             std::move(cross_ref_table),
             std::make_unique<CPDF_CrossRefTable>(
-                ToDictionary(pStream->GetDict()->Clone())));
+                ToDictionary(pStream->GetDict()->Clone()),
+                pStream->GetObjNum()));
       }
 
       if (obj_num < kMaxObjectNumber) {
@@ -763,12 +776,13 @@
 
   RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
   if (bMainXRef) {
-    m_CrossRefTable =
-        std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer));
+    m_CrossRefTable = std::make_unique<CPDF_CrossRefTable>(
+        std::move(pNewTrailer), pStream->GetObjNum());
     m_CrossRefTable->ShrinkObjectMap(size);
   } else {
     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
-        std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer)),
+        std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer),
+                                             pStream->GetObjNum()),
         std::move(m_CrossRefTable));
   }
 
@@ -912,6 +926,10 @@
   return m_CrossRefTable->GetMutableTrailerForTesting();
 }
 
+uint32_t CPDF_Parser::GetTrailerObjectNumber() const {
+  return m_CrossRefTable->trailer_object_number();
+}
+
 RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
   return m_CrossRefTable->trailer()
              ? ToDictionary(m_CrossRefTable->trailer()->Clone())
@@ -1076,7 +1094,7 @@
     if (!trailer)
       return SUCCESS;
 
-    m_CrossRefTable->SetTrailer(std::move(trailer));
+    m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
     const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
     if (xrefsize > 0) {
       // Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 5d036a2..21dd8aa 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -79,6 +79,7 @@
 
   const CPDF_Dictionary* GetTrailer() const;
   CPDF_Dictionary* GetMutableTrailerForTesting();
+  uint32_t GetTrailerObjectNumber() const;
 
   // Returns a new trailer which combines the last read trailer with the /Root
   // and /Info from previous ones.
diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
index da68641..efde90e 100644
--- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
@@ -102,6 +102,10 @@
     EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
   for (size_t i = 0; i < std::size(versions); ++i)
     EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
+
+  const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+  ASSERT_TRUE(cross_ref_table);
+  EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
 }
 
 TEST(ParserTest, RebuildCrossRefFailed) {
@@ -334,6 +338,10 @@
   CPDF_TestParser parser;
   parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
   EXPECT_TRUE(parser.ParseLinearizedHeader());
+
+  const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+  ASSERT_TRUE(cross_ref_table);
+  EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
 }
 
 TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
@@ -434,8 +442,11 @@
       "%%EOF\n";
   ASSERT_TRUE(parser.InitTestFromBuffer(kData));
   EXPECT_EQ(CPDF_Parser::SUCCESS, parser.StartParseInternal());
-  ASSERT_TRUE(parser.GetCrossRefTable());
-  const auto& objects_info = parser.GetCrossRefTable()->objects_info();
+
+  const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+  ASSERT_TRUE(cross_ref_table);
+  EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
+  const auto& objects_info = cross_ref_table->objects_info();
   EXPECT_EQ(2u, objects_info.size());
 
   // Skip over the first object, and continue parsing the remaining objects.