Take object index into account for compressed objects.

For a compressed object, its entry in the xref table uses the third
column to store the index into the object stream that stores the
compressed object. Since CPDF_ObjectStream now stores its object info in
a vector, simply index into it to look up the compressed object's offset
in the object stream.

This clears the ambiguity when there are multiple objects in an object
stream with the same object number.

Bug: pdfium:1733
Change-Id: I8a9f52160a3cca6c6c4322f746f9cc599499c21e
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/86374
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
index bae1962..6400208 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
@@ -33,7 +33,8 @@
 CPDF_CrossRefTable::~CPDF_CrossRefTable() = default;
 
 void CPDF_CrossRefTable::AddCompressed(uint32_t obj_num,
-                                       uint32_t archive_obj_num) {
+                                       uint32_t archive_obj_num,
+                                       uint32_t archive_obj_index) {
   if (obj_num >= CPDF_Parser::kMaxObjectNumber ||
       archive_obj_num >= CPDF_Parser::kMaxObjectNumber) {
     NOTREACHED();
@@ -48,7 +49,8 @@
     return;
 
   info.type = ObjectType::kCompressed;
-  info.archive_obj_num = archive_obj_num;
+  info.archive.obj_num = archive_obj_num;
+  info.archive.obj_index = archive_obj_index;
   info.gennum = 0;
 
   objects_info_[archive_obj_num].type = ObjectType::kObjStream;
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.h b/core/fpdfapi/parser/cpdf_cross_ref_table.h
index c5f004d..304f5d7 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.h
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.h
@@ -28,12 +28,15 @@
 
   struct ObjectInfo {
     ObjectInfo() : pos(0), type(ObjectType::kFree), gennum(0) {}
-    // if type is ObjectType::kCompressed the archive_obj_num should be used.
-    // if type is ObjectType::kNotCompressed the pos should be used.
-    // In other cases its are unused.
+    // If `type` is `ObjectType::kCompressed`, `archive` should be used.
+    // If `type` is `ObjectType::kNotCompressed`, `pos` should be used.
+    // In other cases, it is unused.
     union {
       FX_FILESIZE pos;
-      uint32_t archive_obj_num;
+      struct {
+        uint32_t obj_num;
+        uint32_t obj_index;
+      } archive;
     };
     ObjectType type;
     uint16_t gennum;
@@ -48,7 +51,9 @@
   explicit CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer);
   ~CPDF_CrossRefTable();
 
-  void AddCompressed(uint32_t obj_num, uint32_t archive_obj_num);
+  void AddCompressed(uint32_t obj_num,
+                     uint32_t archive_obj_num,
+                     uint32_t archive_obj_index);
   void AddNormal(uint32_t obj_num, uint16_t gen_num, FX_FILESIZE pos);
   void SetFree(uint32_t obj_num);
 
diff --git a/core/fpdfapi/parser/cpdf_object_stream.cpp b/core/fpdfapi/parser/cpdf_object_stream.cpp
index 320c48c..18c02be 100644
--- a/core/fpdfapi/parser/cpdf_object_stream.cpp
+++ b/core/fpdfapi/parser/cpdf_object_stream.cpp
@@ -16,7 +16,6 @@
 #include "core/fxcrt/cfx_readonlymemorystream.h"
 #include "core/fxcrt/fx_safe_types.h"
 #include "third_party/base/check.h"
-#include "third_party/base/containers/adapters.h"
 #include "third_party/base/ptr_util.h"
 
 namespace {
@@ -74,18 +73,20 @@
 
 RetainPtr<CPDF_Object> CPDF_ObjectStream::ParseObject(
     CPDF_IndirectObjectHolder* pObjList,
-    uint32_t obj_number) const {
-  for (const ObjectInfo& info : pdfium::base::Reversed(object_info_)) {
-    if (info.obj_num != obj_number)
-      continue;
+    uint32_t obj_number,
+    uint32_t archive_obj_index) const {
+  if (archive_obj_index >= object_info_.size())
+    return nullptr;
 
-    RetainPtr<CPDF_Object> result =
-        ParseObjectAtOffset(pObjList, info.obj_offset);
-    if (result)
-      result->SetObjNum(obj_number);
-    return result;
-  }
-  return nullptr;
+  const auto& info = object_info_[archive_obj_index];
+  if (info.obj_num != obj_number)
+    return nullptr;
+
+  RetainPtr<CPDF_Object> result =
+      ParseObjectAtOffset(pObjList, info.obj_offset);
+  if (result)
+    result->SetObjNum(obj_number);
+  return result;
 }
 
 void CPDF_ObjectStream::Init(const CPDF_Stream* stream) {
diff --git a/core/fpdfapi/parser/cpdf_object_stream.h b/core/fpdfapi/parser/cpdf_object_stream.h
index 5b3195e..bc09db3 100644
--- a/core/fpdfapi/parser/cpdf_object_stream.h
+++ b/core/fpdfapi/parser/cpdf_object_stream.h
@@ -36,7 +36,8 @@
   ~CPDF_ObjectStream();
 
   RetainPtr<CPDF_Object> ParseObject(CPDF_IndirectObjectHolder* pObjList,
-                                     uint32_t obj_number) const;
+                                     uint32_t obj_number,
+                                     uint32_t archive_obj_index) const;
   const std::vector<ObjectInfo>& object_info() const { return object_info_; }
 
  private:
diff --git a/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp b/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
index ffabeea..835eb60 100644
--- a/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
@@ -44,24 +44,36 @@
                           CPDF_ObjectStream::ObjectInfo(11, 14),
                           CPDF_ObjectStream::ObjectInfo(12, 21)));
 
+  // Check expected indices.
   CPDF_IndirectObjectHolder holder;
-  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
+  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10, 0);
   ASSERT_TRUE(obj10);
   EXPECT_EQ(10u, obj10->GetObjNum());
   EXPECT_EQ(0u, obj10->GetGenNum());
   EXPECT_TRUE(obj10->IsDictionary());
 
-  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11);
+  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11, 1);
   ASSERT_TRUE(obj11);
   EXPECT_EQ(11u, obj11->GetObjNum());
   EXPECT_EQ(0u, obj11->GetGenNum());
   EXPECT_TRUE(obj11->IsArray());
 
-  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12);
+  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12, 2);
   ASSERT_TRUE(obj12);
   EXPECT_EQ(12u, obj12->GetObjNum());
   EXPECT_EQ(0u, obj12->GetGenNum());
   EXPECT_TRUE(obj12->IsNumber());
+
+  // Check bad indices.
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10, 1));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10, 2));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10, 3));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11, 0));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11, 2));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11, 3));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12, 0));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12, 1));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12, 3));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamNoDict) {
@@ -195,9 +207,9 @@
                           CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10));
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10, 0));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11, 1));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12, 2));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamDictTooFewCount) {
@@ -216,19 +228,19 @@
                           CPDF_ObjectStream::ObjectInfo(11, 14)));
 
   CPDF_IndirectObjectHolder holder;
-  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
+  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10, 0);
   ASSERT_TRUE(obj10);
   EXPECT_EQ(10u, obj10->GetObjNum());
   EXPECT_EQ(0u, obj10->GetGenNum());
   EXPECT_TRUE(obj10->IsDictionary());
 
-  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11);
+  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11, 1);
   ASSERT_TRUE(obj11);
   EXPECT_EQ(11u, obj11->GetObjNum());
   EXPECT_EQ(0u, obj11->GetGenNum());
   EXPECT_TRUE(obj11->IsArray());
 
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 12, 2));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamDictTooManyObject) {
@@ -250,7 +262,11 @@
                           CPDF_ObjectStream::ObjectInfo(2, 3)));
 
   CPDF_IndirectObjectHolder holder;
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 2));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 2, 0));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 2, 1));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 2, 2));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 2, 3));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 2, 4));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamDictGarbageObjNum) {
@@ -289,13 +305,13 @@
                           CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
-  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
+  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10, 0);
   ASSERT_TRUE(obj10);
   EXPECT_EQ(10u, obj10->GetObjNum());
   EXPECT_EQ(0u, obj10->GetGenNum());
   EXPECT_TRUE(obj10->IsDictionary());
 
-  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11);
+  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11, 1);
   ASSERT_TRUE(obj11);
   EXPECT_EQ(11u, obj11->GetObjNum());
   EXPECT_EQ(0u, obj11->GetGenNum());
@@ -321,7 +337,7 @@
                           CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11, 1));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamDictObjectOffsetTooBig) {
@@ -343,7 +359,7 @@
                           CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
-  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 11, 1));
 }
 
 TEST(CPDF_ObjectStreamTest, StreamDictDuplicateObjNum) {
@@ -358,20 +374,28 @@
   auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
   ASSERT_TRUE(obj_stream);
 
-  // TODO(thestig): Should object 10 be at offset 0 instead?
   EXPECT_THAT(obj_stream->object_info(),
               ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
                           CPDF_ObjectStream::ObjectInfo(10, 14),
                           CPDF_ObjectStream::ObjectInfo(12, 21)));
 
   CPDF_IndirectObjectHolder holder;
-  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
+  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10, 0);
+  ASSERT_TRUE(obj10);
+  EXPECT_EQ(10u, obj10->GetObjNum());
+  EXPECT_EQ(0u, obj10->GetGenNum());
+  EXPECT_TRUE(obj10->IsDictionary());
+
+  obj10 = obj_stream->ParseObject(&holder, 10, 1);
   ASSERT_TRUE(obj10);
   EXPECT_EQ(10u, obj10->GetObjNum());
   EXPECT_EQ(0u, obj10->GetGenNum());
   EXPECT_TRUE(obj10->IsArray());
 
-  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12);
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10, 2));
+  EXPECT_FALSE(obj_stream->ParseObject(&holder, 10, 3));
+
+  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12, 2);
   ASSERT_TRUE(obj12);
   EXPECT_EQ(12u, obj12->GetObjNum());
   EXPECT_EQ(0u, obj12->GetGenNum());
@@ -398,19 +422,19 @@
                           CPDF_ObjectStream::ObjectInfo(10, 21)));
 
   CPDF_IndirectObjectHolder holder;
-  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
+  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10, 2);
   ASSERT_TRUE(obj10);
   EXPECT_EQ(10u, obj10->GetObjNum());
   EXPECT_EQ(0u, obj10->GetGenNum());
   EXPECT_TRUE(obj10->IsNumber());
 
-  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11);
+  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11, 0);
   ASSERT_TRUE(obj11);
   EXPECT_EQ(11u, obj11->GetObjNum());
   EXPECT_EQ(0u, obj11->GetGenNum());
   EXPECT_TRUE(obj11->IsDictionary());
 
-  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12);
+  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12, 1);
   ASSERT_TRUE(obj12);
   EXPECT_EQ(12u, obj12->GetObjNum());
   EXPECT_EQ(0u, obj12->GetGenNum());
@@ -439,19 +463,19 @@
                           CPDF_ObjectStream::ObjectInfo(12, 14)));
 
   CPDF_IndirectObjectHolder holder;
-  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
+  RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10, 0);
   ASSERT_TRUE(obj10);
   EXPECT_EQ(10u, obj10->GetObjNum());
   EXPECT_EQ(0u, obj10->GetGenNum());
   EXPECT_TRUE(obj10->IsNumber());
 
-  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11);
+  RetainPtr<CPDF_Object> obj11 = obj_stream->ParseObject(&holder, 11, 1);
   ASSERT_TRUE(obj11);
   EXPECT_EQ(11u, obj11->GetObjNum());
   EXPECT_EQ(0u, obj11->GetGenNum());
   EXPECT_TRUE(obj11->IsDictionary());
 
-  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12);
+  RetainPtr<CPDF_Object> obj12 = obj_stream->ParseObject(&holder, 12, 2);
   ASSERT_TRUE(obj12);
   EXPECT_EQ(12u, obj12->GetObjNum());
   EXPECT_EQ(0u, obj12->GetGenNum());
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index bed2a14..07b8d5b 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -621,7 +621,8 @@
         m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos);
         break;
       case ObjectType::kCompressed:
-        m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive_obj_num);
+        m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
+                                       obj.info.archive.obj_index);
         break;
       default:
         NOTREACHED();
@@ -699,11 +700,13 @@
 
       if (obj_num < kMaxObjectNumber) {
         cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
-        if (const auto object_stream =
-                CPDF_ObjectStream::Create(pStream.Get())) {
-          for (const auto& info : object_stream->object_info()) {
+        const auto object_stream = CPDF_ObjectStream::Create(pStream.Get());
+        if (object_stream) {
+          const auto& object_info = object_stream->object_info();
+          for (size_t i = 0; i < object_info.size(); ++i) {
+            const auto& info = object_info[i];
             if (info.obj_num < kMaxObjectNumber)
-              cross_ref_table->AddCompressed(info.obj_num, obj_num);
+              cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
           }
         }
       }
@@ -842,7 +845,9 @@
   if (!IsValidObjectNumber(archive_obj_num))
     return;
 
-  m_CrossRefTable->AddCompressed(obj_num, archive_obj_num);
+  const uint32_t archive_obj_index = GetVarInt(
+      entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2]));
+  m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
 }
 
 const CPDF_Array* CPDF_Parser::GetIDArray() const {
@@ -925,12 +930,13 @@
   if (GetObjectType(objnum) != ObjectType::kCompressed)
     return nullptr;
 
-  const CPDF_ObjectStream* pObjStream =
-      GetObjectStream(m_CrossRefTable->GetObjectInfo(objnum)->archive_obj_num);
+  const ObjectInfo& info = *m_CrossRefTable->GetObjectInfo(objnum);
+  const CPDF_ObjectStream* pObjStream = GetObjectStream(info.archive.obj_num);
   if (!pObjStream)
     return nullptr;
 
-  return pObjStream->ParseObject(m_pObjectsHolder.Get(), objnum);
+  return pObjStream->ParseObject(m_pObjectsHolder.Get(), objnum,
+                                 info.archive.obj_index);
 }
 
 const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
diff --git a/testing/SUPPRESSIONS b/testing/SUPPRESSIONS
index 3d687da..1ed29ab 100644
--- a/testing/SUPPRESSIONS
+++ b/testing/SUPPRESSIONS
@@ -344,9 +344,6 @@
 # TODO(pdfium:1571): Remove after associated bug is fixed
 bug_1571.in * * * *
 
-# TODO(pdfium:1733): Remove after associated bug is fixed
-bug_1733.in * * * *
-
 # TODO(chromium:237527): Remove after associated bug is fixed
 bug_237527_1.in * * * *