Change CPDF_ObjectStream's underlying storage to a vector.

Use a vector because the object stream is intended to be accessed using
indices into the vector. This CL is a stepping stone towards that goal.
Though it temporarily makes object access slower by requiring a vector
traversal.

Bug: pdfium:1733
Change-Id: I6124290bc9add0ca7d13a3a069ec7f98f4d14e99
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/86373
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_object_stream.cpp b/core/fpdfapi/parser/cpdf_object_stream.cpp
index b6646f9..320c48c 100644
--- a/core/fpdfapi/parser/cpdf_object_stream.cpp
+++ b/core/fpdfapi/parser/cpdf_object_stream.cpp
@@ -16,7 +16,7 @@
 #include "core/fxcrt/cfx_readonlymemorystream.h"
 #include "core/fxcrt/fx_safe_types.h"
 #include "third_party/base/check.h"
-#include "third_party/base/containers/contains.h"
+#include "third_party/base/containers/adapters.h"
 #include "third_party/base/ptr_util.h"
 
 namespace {
@@ -72,23 +72,20 @@
 
 CPDF_ObjectStream::~CPDF_ObjectStream() = default;
 
-bool CPDF_ObjectStream::HasObject(uint32_t obj_number) const {
-  return pdfium::Contains(objects_offsets_, obj_number);
-}
-
 RetainPtr<CPDF_Object> CPDF_ObjectStream::ParseObject(
     CPDF_IndirectObjectHolder* pObjList,
     uint32_t obj_number) const {
-  const auto it = objects_offsets_.find(obj_number);
-  if (it == objects_offsets_.end())
-    return nullptr;
+  for (const ObjectInfo& info : pdfium::base::Reversed(object_info_)) {
+    if (info.obj_num != obj_number)
+      continue;
 
-  RetainPtr<CPDF_Object> result = ParseObjectAtOffset(pObjList, it->second);
-  if (!result)
-    return nullptr;
-
-  result->SetObjNum(obj_number);
-  return result;
+    RetainPtr<CPDF_Object> result =
+        ParseObjectAtOffset(pObjList, info.obj_offset);
+    if (result)
+      result->SetObjNum(obj_number);
+    return result;
+  }
+  return nullptr;
 }
 
 void CPDF_ObjectStream::Init(const CPDF_Stream* stream) {
@@ -111,7 +108,7 @@
     if (!obj_num)
       continue;
 
-    objects_offsets_[obj_num] = obj_offset;
+    object_info_.push_back({obj_num, obj_offset});
   }
 }
 
diff --git a/core/fpdfapi/parser/cpdf_object_stream.h b/core/fpdfapi/parser/cpdf_object_stream.h
index 394fd53..5b3195e 100644
--- a/core/fpdfapi/parser/cpdf_object_stream.h
+++ b/core/fpdfapi/parser/cpdf_object_stream.h
@@ -5,8 +5,8 @@
 #ifndef CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
 #define CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
 
-#include <map>
 #include <memory>
+#include <vector>
 
 #include "core/fpdfapi/parser/cpdf_object.h"
 #include "core/fxcrt/retain_ptr.h"
@@ -19,16 +19,25 @@
 // See ISO 32000-1:2008 spec, section 7.5.7.
 class CPDF_ObjectStream {
  public:
+  struct ObjectInfo {
+    ObjectInfo(uint32_t obj_num, uint32_t obj_offset)
+        : obj_num(obj_num), obj_offset(obj_offset) {}
+
+    bool operator==(const ObjectInfo& that) const {
+      return obj_num == that.obj_num && obj_offset == that.obj_offset;
+    }
+
+    uint32_t obj_num;
+    uint32_t obj_offset;
+  };
+
   static std::unique_ptr<CPDF_ObjectStream> Create(const CPDF_Stream* stream);
 
   ~CPDF_ObjectStream();
 
-  bool HasObject(uint32_t obj_number) const;
   RetainPtr<CPDF_Object> ParseObject(CPDF_IndirectObjectHolder* pObjList,
                                      uint32_t obj_number) const;
-  const std::map<uint32_t, uint32_t>& objects_offsets() const {
-    return objects_offsets_;
-  }
+  const std::vector<ObjectInfo>& object_info() const { return object_info_; }
 
  private:
   explicit CPDF_ObjectStream(const CPDF_Stream* stream);
@@ -40,7 +49,7 @@
 
   RetainPtr<IFX_SeekableReadStream> data_stream_;
   int first_object_offset_ = 0;
-  std::map<uint32_t, uint32_t> objects_offsets_;
+  std::vector<ObjectInfo> object_info_;
 };
 
 #endif  // CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
diff --git a/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp b/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
index 87b5cbf..ffabeea 100644
--- a/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
@@ -15,7 +15,6 @@
 #include "third_party/base/cxx17_backports.h"
 
 using testing::ElementsAre;
-using testing::Pair;
 
 namespace {
 
@@ -40,8 +39,10 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(11, 14), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 14),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
   RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -188,8 +189,10 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(11, 14), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 14),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
   EXPECT_FALSE(obj_stream->ParseObject(&holder, 10));
@@ -208,8 +211,9 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(11, 14)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 14)));
 
   CPDF_IndirectObjectHolder holder;
   RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -239,8 +243,11 @@
   ASSERT_TRUE(obj_stream);
 
   // TODO(thestig): Can this avoid finding object 2?
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(2, 3), Pair(10, 0), Pair(11, 14), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 14),
+                          CPDF_ObjectStream::ObjectInfo(12, 21),
+                          CPDF_ObjectStream::ObjectInfo(2, 3)));
 
   CPDF_IndirectObjectHolder holder;
   EXPECT_FALSE(obj_stream->ParseObject(&holder, 2));
@@ -258,8 +265,9 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamDictGarbageObjectOffset) {
@@ -275,8 +283,10 @@
   ASSERT_TRUE(obj_stream);
 
   // TODO(thestig): Should object 11 be rejected?
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(11, 0), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 0),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
   RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -305,8 +315,10 @@
   ASSERT_TRUE(obj_stream);
 
   // TODO(thestig): Should object 11 be rejected?
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(11, 4294967295), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 4294967295),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
   EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
@@ -325,8 +337,10 @@
   ASSERT_TRUE(obj_stream);
 
   // TODO(thestig): Should object 11 be rejected?
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 0), Pair(11, 999), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(11, 999),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
   EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
@@ -345,8 +359,10 @@
   ASSERT_TRUE(obj_stream);
 
   // TODO(thestig): Should object 10 be at offset 0 instead?
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 14), Pair(12, 21)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+                          CPDF_ObjectStream::ObjectInfo(10, 14),
+                          CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
   RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -376,8 +392,10 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 21), Pair(11, 0), Pair(12, 14)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(11, 0),
+                          CPDF_ObjectStream::ObjectInfo(12, 14),
+                          CPDF_ObjectStream::ObjectInfo(10, 21)));
 
   CPDF_IndirectObjectHolder holder;
   RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -415,8 +433,10 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  EXPECT_THAT(obj_stream->objects_offsets(),
-              ElementsAre(Pair(10, 21), Pair(11, 0), Pair(12, 14)));
+  EXPECT_THAT(obj_stream->object_info(),
+              ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 21),
+                          CPDF_ObjectStream::ObjectInfo(11, 0),
+                          CPDF_ObjectStream::ObjectInfo(12, 14)));
 
   CPDF_IndirectObjectHolder holder;
   RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 2cf653a..bed2a14 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -701,9 +701,9 @@
         cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
         if (const auto object_stream =
                 CPDF_ObjectStream::Create(pStream.Get())) {
-          for (const auto& it : object_stream->objects_offsets()) {
-            if (it.first < kMaxObjectNumber)
-              cross_ref_table->AddCompressed(it.first, obj_num);
+          for (const auto& info : object_stream->object_info()) {
+            if (info.obj_num < kMaxObjectNumber)
+              cross_ref_table->AddCompressed(info.obj_num, obj_num);
           }
         }
       }