Change CPDF_ObjectStream's underlying storage to a vector.
Use a vector because the object stream is intended to be accessed using
indices into the vector. This CL is a stepping stone towards that goal.
Though it temporarily makes object access slower by requiring a vector
traversal.
Bug: pdfium:1733
Change-Id: I6124290bc9add0ca7d13a3a069ec7f98f4d14e99
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/86373
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_object_stream.cpp b/core/fpdfapi/parser/cpdf_object_stream.cpp
index b6646f9..320c48c 100644
--- a/core/fpdfapi/parser/cpdf_object_stream.cpp
+++ b/core/fpdfapi/parser/cpdf_object_stream.cpp
@@ -16,7 +16,7 @@
#include "core/fxcrt/cfx_readonlymemorystream.h"
#include "core/fxcrt/fx_safe_types.h"
#include "third_party/base/check.h"
-#include "third_party/base/containers/contains.h"
+#include "third_party/base/containers/adapters.h"
#include "third_party/base/ptr_util.h"
namespace {
@@ -72,23 +72,20 @@
CPDF_ObjectStream::~CPDF_ObjectStream() = default;
-bool CPDF_ObjectStream::HasObject(uint32_t obj_number) const {
- return pdfium::Contains(objects_offsets_, obj_number);
-}
-
RetainPtr<CPDF_Object> CPDF_ObjectStream::ParseObject(
CPDF_IndirectObjectHolder* pObjList,
uint32_t obj_number) const {
- const auto it = objects_offsets_.find(obj_number);
- if (it == objects_offsets_.end())
- return nullptr;
+ for (const ObjectInfo& info : pdfium::base::Reversed(object_info_)) {
+ if (info.obj_num != obj_number)
+ continue;
- RetainPtr<CPDF_Object> result = ParseObjectAtOffset(pObjList, it->second);
- if (!result)
- return nullptr;
-
- result->SetObjNum(obj_number);
- return result;
+ RetainPtr<CPDF_Object> result =
+ ParseObjectAtOffset(pObjList, info.obj_offset);
+ if (result)
+ result->SetObjNum(obj_number);
+ return result;
+ }
+ return nullptr;
}
void CPDF_ObjectStream::Init(const CPDF_Stream* stream) {
@@ -111,7 +108,7 @@
if (!obj_num)
continue;
- objects_offsets_[obj_num] = obj_offset;
+ object_info_.push_back({obj_num, obj_offset});
}
}
diff --git a/core/fpdfapi/parser/cpdf_object_stream.h b/core/fpdfapi/parser/cpdf_object_stream.h
index 394fd53..5b3195e 100644
--- a/core/fpdfapi/parser/cpdf_object_stream.h
+++ b/core/fpdfapi/parser/cpdf_object_stream.h
@@ -5,8 +5,8 @@
#ifndef CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
#define CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
-#include <map>
#include <memory>
+#include <vector>
#include "core/fpdfapi/parser/cpdf_object.h"
#include "core/fxcrt/retain_ptr.h"
@@ -19,16 +19,25 @@
// See ISO 32000-1:2008 spec, section 7.5.7.
class CPDF_ObjectStream {
public:
+ struct ObjectInfo {
+ ObjectInfo(uint32_t obj_num, uint32_t obj_offset)
+ : obj_num(obj_num), obj_offset(obj_offset) {}
+
+ bool operator==(const ObjectInfo& that) const {
+ return obj_num == that.obj_num && obj_offset == that.obj_offset;
+ }
+
+ uint32_t obj_num;
+ uint32_t obj_offset;
+ };
+
static std::unique_ptr<CPDF_ObjectStream> Create(const CPDF_Stream* stream);
~CPDF_ObjectStream();
- bool HasObject(uint32_t obj_number) const;
RetainPtr<CPDF_Object> ParseObject(CPDF_IndirectObjectHolder* pObjList,
uint32_t obj_number) const;
- const std::map<uint32_t, uint32_t>& objects_offsets() const {
- return objects_offsets_;
- }
+ const std::vector<ObjectInfo>& object_info() const { return object_info_; }
private:
explicit CPDF_ObjectStream(const CPDF_Stream* stream);
@@ -40,7 +49,7 @@
RetainPtr<IFX_SeekableReadStream> data_stream_;
int first_object_offset_ = 0;
- std::map<uint32_t, uint32_t> objects_offsets_;
+ std::vector<ObjectInfo> object_info_;
};
#endif // CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
diff --git a/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp b/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
index 87b5cbf..ffabeea 100644
--- a/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_object_stream_unittest.cpp
@@ -15,7 +15,6 @@
#include "third_party/base/cxx17_backports.h"
using testing::ElementsAre;
-using testing::Pair;
namespace {
@@ -40,8 +39,10 @@
auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
ASSERT_TRUE(obj_stream);
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(11, 14), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 14),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
CPDF_IndirectObjectHolder holder;
RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -188,8 +189,10 @@
auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
ASSERT_TRUE(obj_stream);
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(11, 14), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 14),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
CPDF_IndirectObjectHolder holder;
EXPECT_FALSE(obj_stream->ParseObject(&holder, 10));
@@ -208,8 +211,9 @@
auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
ASSERT_TRUE(obj_stream);
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(11, 14)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 14)));
CPDF_IndirectObjectHolder holder;
RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -239,8 +243,11 @@
ASSERT_TRUE(obj_stream);
// TODO(thestig): Can this avoid finding object 2?
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(2, 3), Pair(10, 0), Pair(11, 14), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 14),
+ CPDF_ObjectStream::ObjectInfo(12, 21),
+ CPDF_ObjectStream::ObjectInfo(2, 3)));
CPDF_IndirectObjectHolder holder;
EXPECT_FALSE(obj_stream->ParseObject(&holder, 2));
@@ -258,8 +265,9 @@
auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
ASSERT_TRUE(obj_stream);
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
}
TEST(CPDF_ObjectStreamTest, StreamDictGarbageObjectOffset) {
@@ -275,8 +283,10 @@
ASSERT_TRUE(obj_stream);
// TODO(thestig): Should object 11 be rejected?
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(11, 0), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 0),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
CPDF_IndirectObjectHolder holder;
RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -305,8 +315,10 @@
ASSERT_TRUE(obj_stream);
// TODO(thestig): Should object 11 be rejected?
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(11, 4294967295), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 4294967295),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
CPDF_IndirectObjectHolder holder;
EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
@@ -325,8 +337,10 @@
ASSERT_TRUE(obj_stream);
// TODO(thestig): Should object 11 be rejected?
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 0), Pair(11, 999), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(11, 999),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
CPDF_IndirectObjectHolder holder;
EXPECT_FALSE(obj_stream->ParseObject(&holder, 11));
@@ -345,8 +359,10 @@
ASSERT_TRUE(obj_stream);
// TODO(thestig): Should object 10 be at offset 0 instead?
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 14), Pair(12, 21)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 0),
+ CPDF_ObjectStream::ObjectInfo(10, 14),
+ CPDF_ObjectStream::ObjectInfo(12, 21)));
CPDF_IndirectObjectHolder holder;
RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -376,8 +392,10 @@
auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
ASSERT_TRUE(obj_stream);
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 21), Pair(11, 0), Pair(12, 14)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(11, 0),
+ CPDF_ObjectStream::ObjectInfo(12, 14),
+ CPDF_ObjectStream::ObjectInfo(10, 21)));
CPDF_IndirectObjectHolder holder;
RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
@@ -415,8 +433,10 @@
auto obj_stream = CPDF_ObjectStream::Create(stream.Get());
ASSERT_TRUE(obj_stream);
- EXPECT_THAT(obj_stream->objects_offsets(),
- ElementsAre(Pair(10, 21), Pair(11, 0), Pair(12, 14)));
+ EXPECT_THAT(obj_stream->object_info(),
+ ElementsAre(CPDF_ObjectStream::ObjectInfo(10, 21),
+ CPDF_ObjectStream::ObjectInfo(11, 0),
+ CPDF_ObjectStream::ObjectInfo(12, 14)));
CPDF_IndirectObjectHolder holder;
RetainPtr<CPDF_Object> obj10 = obj_stream->ParseObject(&holder, 10);
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 2cf653a..bed2a14 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -701,9 +701,9 @@
cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
if (const auto object_stream =
CPDF_ObjectStream::Create(pStream.Get())) {
- for (const auto& it : object_stream->objects_offsets()) {
- if (it.first < kMaxObjectNumber)
- cross_ref_table->AddCompressed(it.first, obj_num);
+ for (const auto& info : object_stream->object_info()) {
+ if (info.obj_num < kMaxObjectNumber)
+ cross_ref_table->AddCompressed(info.obj_num, obj_num);
}
}
}