Cherry-pick code to improve PDF object tracking
This cherry-picks the following CLs and squashes them into a single CL
for easier merging:
Add RemoveTextObjectWithTwoPagesSharingContentStreamAndResources test
Change-Id: I5fc0f0888d71368c0dd257931e4a1013301f639f
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105212
(cherry picked from commit 7bfe65fa528d99232031ac203b2e56da44643db3)
Add RemoveTextObjectWithTwoPagesSharingContentArrayAndResources test
Change-Id: I396d1cf0a9d3da88337c459aa7ef6f6ec189bb1d
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105491
(cherry picked from commit f3d0f929f461effa86ac27716bd304ba7d534445)
Switch CPDF_PageContentManager to have a CPDF_Document pointer
Change-Id: Iebc74c98071241ea220a185d43937749b9885c76
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105531
(cherry picked from commit 0a111609332bb25f78e7485778f3457648954493)
Fix nits in cpdf_pagecontentmanager.h
Change-Id: I512e5b3d5b976a2485196ae765dd8c1c98275e67
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105472
(cherry picked from commit 4e8a0feaafffd19f747e373bd9f98927ddd3a61f)
Encapsulate more in CPDF_PageContentManager
Change-Id: I198bcd4972603989123b01ff53bd455395f26d5b
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105530
(cherry picked from commit 06a0689d429fde72152dc6c9c2245b86c045ee8a)
Add some using statements in fpdf_save_embeddertest.cpp
Change-Id: If1b9064532740e5de7074f747c23cc0de878ca0c
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105552
(cherry picked from commit 5018240ebf009a67fc9b2613a6f3f8bda1e80c03)
Test trailer generation in FPDFSaveEmbedderTest.SaveLinearizedDoc
Change-Id: I3dfc67b5839af85f73c15555cf23f2fe9b9c687a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105533
(cherry picked from commit 1f4904f680cd7a63d96f072a371ef20ce939cd7a)
Check for removed resources in saved output in RemoveTextObject test
Change-Id: Ia934158b5fd72a42fba0125aa1637d1c980bda3c
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105570
(cherry picked from commit 4d48929e3ffc3d43f2b72383a761d3df6e859e57)
Add RemoveTextObjectWithTwoPagesSharingResources test
Change-Id: If14cb333430a5bb11e50fbb6fd86f1898cf5f29a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105590
(cherry picked from commit 01ea024b74dc8fcc16001792ef5b20b53f3959f0)
Save the trailer's object number when parsing
Change-Id: I86f980a09d2214c50412ce65a905dd92ebc85a6a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105610
(cherry picked from commit 1e5dee361ee112e2b152ae7890a0fef567ccc4e9)
Add object tree traversal utility functions
Change-Id: I28817068d50c79f36d12bd50c1910937241194a5
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105611
(cherry picked from commit 01b85b79a2bba7e895a18a32dbee840eae81eb31)
Avoid generating PDFs with unreferenced objects
Change-Id: I4c9d447ab745732909c4f7b5c6061886428a92dd
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105612
(cherry picked from commit 28f8db4c040f33c1d0955747e2aef11d3803f321)
Keep track of Font and XObject resources
Change-Id: I510e6c51eda28535ed00e87b6e10971f7178122c
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105613
(cherry picked from commit 69703b37cc02aceac37b504d34b50f1c3c24302a)
Keep track of ExtGState resources
Change-Id: I786a515b4ddcfa9ea2dccb94d3d7ad6a189ec7ce
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105614
(cherry picked from commit 9b9bf7539e7795c44a2b9985232a8cda652b53f0)
Remove a duplicate FPDFEditEmbedderTest test case
Change-Id: Ieb0806520af652501930ad10fcdbfd33b5952c9d
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105710
(cherry picked from commit 02516521c768c4a07ffeef6f1f32ad72cfd7c1d1)
Add RemoveTextObjectWithTwoPagesSharingResourcesDict test case
Change-Id: I30d4f35e41c5be78568aab118eb90d460d1c030e
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105730
(cherry picked from commit 6c3577ca2ff7f3e6dc498f1b61be384f29db18d3)
Split CPDF_PageContentGenerator::UpdateContentStreams()
Change-Id: I9a688fd486bb851dceedca633856bbe5471b9b71
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105732
(cherry picked from commit 31f23b9263ab8de97b6884bca23dc30a3c520e1a)
Do copy-on-write in CPDF_PageContentGenerator
Change-Id: I9e5659421ee6e6d8b7807bc4159fe086f70982ef
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105733
(cherry picked from commit 692f0719e4cd4387daca51fb3a0151929648aa11)
Do copy-on-write in CPDF_PageContentManager
Change-Id: I4b52894ab44889bae0df9415542f018c91436c1a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/105630
(cherry picked from commit ef30200275bbfdea90782f1a1d62c0474aab0e74)
Bug: chromium:1428724,pdfium:2012
Change-Id: I7148a4d6c30666792ea0c8cc6ae5186495afb343
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/106190
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/edit/cpdf_creator.cpp b/core/fpdfapi/edit/cpdf_creator.cpp
index 3c9675e..9a2b88b 100644
--- a/core/fpdfapi/edit/cpdf_creator.cpp
+++ b/core/fpdfapi/edit/cpdf_creator.cpp
@@ -9,6 +9,7 @@
#include <stdint.h>
#include <algorithm>
+#include <set>
#include <utility>
#include "core/fpdfapi/parser/cpdf_array.h"
@@ -22,6 +23,7 @@
#include "core/fpdfapi/parser/cpdf_security_handler.h"
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
+#include "core/fpdfapi/parser/object_tree_traversal_util.h"
#include "core/fxcrt/data_vector.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_random.h"
@@ -163,13 +165,30 @@
}
bool CPDF_Creator::WriteOldObjs() {
- uint32_t nLastObjNum = m_pParser->GetLastObjNum();
- if (!m_pParser->IsValidObjectNumber(nLastObjNum))
+ const uint32_t nLastObjNum = m_pParser->GetLastObjNum();
+ if (!m_pParser->IsValidObjectNumber(nLastObjNum)) {
return true;
+ }
+ if (m_CurObjNum > nLastObjNum) {
+ return true;
+ }
+ const std::set<uint32_t> objects_with_refs =
+ GetObjectsWithReferences(m_pDocument);
+ uint32_t last_object_number_written = 0;
for (uint32_t objnum = m_CurObjNum; objnum <= nLastObjNum; ++objnum) {
- if (!WriteOldIndirectObject(objnum))
+ if (!pdfium::Contains(objects_with_refs, objnum)) {
+ continue;
+ }
+ if (!WriteOldIndirectObject(objnum)) {
return false;
+ }
+ last_object_number_written = objnum;
+ }
+ // If there are no new objects to write, then adjust `m_dwLastObjNum` if
+ // needed to reflect the actual last object number.
+ if (m_NewObjNumArray.empty()) {
+ m_dwLastObjNum = last_object_number_written;
}
return true;
}
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index 734a717..8717028 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -13,6 +13,7 @@
#include <tuple>
#include <utility>
+#include "constants/page_object.h"
#include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
#include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
#include "core/fpdfapi/edit/cpdf_stringarchivestream.h"
@@ -37,6 +38,7 @@
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/fpdf_parser_decode.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
+#include "core/fpdfapi/parser/object_tree_traversal_util.h"
#include "third_party/base/check.h"
#include "third_party/base/containers/contains.h"
#include "third_party/base/notreached.h"
@@ -45,6 +47,10 @@
namespace {
+// Key: The resource type.
+// Value: The resource names of a given type.
+using ResourcesMap = std::map<ByteString, std::set<ByteString>>;
+
bool GetColor(const CPDF_Color* pColor, float* rgb) {
int intRGB[3];
if (!pColor || !pColor->IsColorSpaceRGB() ||
@@ -57,6 +63,68 @@
return true;
}
+void RecordPageObjectResourceUsage(const CPDF_PageObject* page_object,
+ ResourcesMap& seen_resources) {
+ const ByteString& resource_name = page_object->GetResourceName();
+ if (!resource_name.IsEmpty()) {
+ switch (page_object->GetType()) {
+ case CPDF_PageObject::Type::kText:
+ seen_resources["Font"].insert(resource_name);
+ break;
+ case CPDF_PageObject::Type::kImage:
+ case CPDF_PageObject::Type::kForm:
+ seen_resources["XObject"].insert(resource_name);
+ break;
+ case CPDF_PageObject::Type::kPath:
+ break;
+ case CPDF_PageObject::Type::kShading:
+ break;
+ }
+ }
+ const ByteString& graphics_resource_name =
+ page_object->GetGraphicsResourceName();
+ if (!graphics_resource_name.IsEmpty()) {
+ seen_resources["ExtGState"].insert(graphics_resource_name);
+ }
+}
+
+void RemoveUnusedResources(RetainPtr<CPDF_Dictionary> resources_dict,
+ const ResourcesMap& resources_in_use) {
+ // TODO(thestig): Remove other unused resource types:
+ // - ColorSpace
+ // - Pattern
+ // - Shading
+ static constexpr const char* kResourceKeys[] = {"ExtGState", "Font",
+ "XObject"};
+ for (const char* resource_key : kResourceKeys) {
+ RetainPtr<CPDF_Dictionary> resource_dict =
+ resources_dict->GetMutableDictFor(resource_key);
+ if (!resource_dict) {
+ continue;
+ }
+
+ std::vector<ByteString> keys;
+ {
+ CPDF_DictionaryLocker resource_dict_locker(resource_dict);
+ for (auto& it : resource_dict_locker) {
+ keys.push_back(it.first);
+ }
+ }
+
+ auto it = resources_in_use.find(resource_key);
+ const std::set<ByteString>* resource_in_use_of_current_type =
+ it != resources_in_use.end() ? &it->second : nullptr;
+ for (const ByteString& key : keys) {
+ if (resource_in_use_of_current_type &&
+ pdfium::Contains(*resource_in_use_of_current_type, key)) {
+ continue;
+ }
+
+ resource_dict->RemoveFor(key.AsStringView());
+ }
+ }
+}
+
} // namespace
CPDF_PageContentGenerator::CPDF_PageContentGenerator(
@@ -72,7 +140,15 @@
void CPDF_PageContentGenerator::GenerateContent() {
DCHECK(m_pObjHolder->IsPage());
- UpdateContentStreams(GenerateModifiedStreams());
+ std::map<int32_t, fxcrt::ostringstream> new_stream_data =
+ GenerateModifiedStreams();
+ // If no streams were regenerated or removed, nothing to do here.
+ if (new_stream_data.empty()) {
+ return;
+ }
+
+ UpdateContentStreams(std::move(new_stream_data));
+ UpdateResourcesDict();
}
std::map<int32_t, fxcrt::ostringstream>
@@ -141,12 +217,10 @@
void CPDF_PageContentGenerator::UpdateContentStreams(
std::map<int32_t, fxcrt::ostringstream>&& new_stream_data) {
- // If no streams were regenerated or removed, nothing to do here.
- if (new_stream_data.empty())
- return;
+ CHECK(!new_stream_data.empty());
// Make sure default graphics are created.
- GetOrCreateDefaultGraphics();
+ m_DefaultGraphicsName = GetOrCreateDefaultGraphics();
CPDF_PageContentManager page_content_manager(m_pObjHolder, m_pDocument);
for (auto& pair : new_stream_data) {
@@ -160,18 +234,42 @@
continue;
}
- RetainPtr<CPDF_Stream> old_stream =
- page_content_manager.GetStreamByIndex(stream_index);
- DCHECK(old_stream);
-
- // If buf is now empty, remove the stream instead of setting the data.
- if (buf->tellp() <= 0)
- page_content_manager.ScheduleRemoveStreamByIndex(stream_index);
- else
- old_stream->SetDataFromStringstreamAndRemoveFilter(buf);
+ page_content_manager.UpdateStream(stream_index, buf);
}
}
+void CPDF_PageContentGenerator::UpdateResourcesDict() {
+ RetainPtr<CPDF_Dictionary> resources = m_pObjHolder->GetMutableResources();
+ if (!resources) {
+ return;
+ }
+
+ const uint32_t resources_object_number = resources->GetObjNum();
+ if (resources_object_number) {
+ // If `resources` is not an inline object, then do not modify it directly if
+ // it has multiple references.
+ if (pdfium::Contains(GetObjectsWithMultipleReferences(m_pDocument),
+ resources_object_number)) {
+ resources = pdfium::WrapRetain(resources->Clone()->AsMutableDictionary());
+ const uint32_t clone_object_number =
+ m_pDocument->AddIndirectObject(resources);
+ m_pObjHolder->SetResources(resources);
+ m_pObjHolder->GetMutableDict()->SetNewFor<CPDF_Reference>(
+ pdfium::page_object::kResources, m_pDocument, clone_object_number);
+ }
+ }
+
+ ResourcesMap seen_resources;
+ for (auto& page_object : m_pageObjects) {
+ RecordPageObjectResourceUsage(page_object, seen_resources);
+ }
+ if (!m_DefaultGraphicsName.IsEmpty()) {
+ seen_resources["ExtGState"].insert(m_DefaultGraphicsName);
+ }
+
+ RemoveUnusedResources(std::move(resources), seen_resources);
+}
+
ByteString CPDF_PageContentGenerator::RealizeResource(
const CPDF_Object* pResource,
const ByteString& bsType) const {
@@ -179,7 +277,8 @@
if (!m_pObjHolder->GetResources()) {
m_pObjHolder->SetResources(m_pDocument->NewIndirect<CPDF_Dictionary>());
m_pObjHolder->GetMutableDict()->SetNewFor<CPDF_Reference>(
- "Resources", m_pDocument, m_pObjHolder->GetResources()->GetObjNum());
+ pdfium::page_object::kResources, m_pDocument,
+ m_pObjHolder->GetResources()->GetObjNum());
}
RetainPtr<CPDF_Dictionary> pResList =
@@ -321,6 +420,8 @@
pImage->ConvertStreamToIndirectObject();
ByteString name = RealizeResource(pStream, "XObject");
+ pImageObj->SetResourceName(name);
+
if (bWasInline) {
auto* pPageData = CPDF_DocPageData::FromDocument(m_pDocument);
pImageObj->SetImage(pPageData->GetImage(pStream->GetObjNum()));
@@ -340,10 +441,11 @@
if (!pStream)
return;
+ ByteString name = RealizeResource(pStream.Get(), "XObject");
+ pFormObj->SetResourceName(name);
+
*buf << "q\n";
WriteMatrix(*buf, pFormObj->form_matrix()) << " cm ";
-
- ByteString name = RealizeResource(pStream.Get(), "XObject");
*buf << "/" << PDF_NameEncode(name) << " Do Q\n";
}
@@ -498,6 +600,7 @@
}
m_pDocument->AddIndirectObject(gsDict);
name = RealizeResource(std::move(gsDict), "ExtGState");
+ pPageObj->SetGraphicsResourceName(name);
m_pObjHolder->GraphicsMapInsert(graphD, name);
}
*buf << "/" << PDF_NameEncode(name) << " gs ";
@@ -508,8 +611,8 @@
*buf << "0 0 0 RG 0 0 0 rg 1 w "
<< static_cast<int>(CFX_GraphStateData::LineCap::kButt) << " J "
<< static_cast<int>(CFX_GraphStateData::LineJoin::kMiter) << " j\n";
- ByteString name = GetOrCreateDefaultGraphics();
- *buf << "/" << PDF_NameEncode(name) << " gs ";
+ m_DefaultGraphicsName = GetOrCreateDefaultGraphics();
+ *buf << "/" << PDF_NameEncode(m_DefaultGraphicsName) << " gs ";
}
ByteString CPDF_PageContentGenerator::GetOrCreateDefaultGraphics() const {
@@ -562,10 +665,10 @@
}
data.baseFont = pFont->GetBaseFontName();
- ByteString dictName;
+ ByteString dict_name;
absl::optional<ByteString> maybe_name = m_pObjHolder->FontsMapSearch(data);
if (maybe_name.has_value()) {
- dictName = std::move(maybe_name.value());
+ dict_name = std::move(maybe_name.value());
} else {
RetainPtr<const CPDF_Object> pIndirectFont = pFont->GetFontDict();
if (pIndirectFont->IsInline()) {
@@ -581,10 +684,12 @@
m_pDocument->AddIndirectObject(pFontDict);
pIndirectFont = std::move(pFontDict);
}
- dictName = RealizeResource(std::move(pIndirectFont), "Font");
- m_pObjHolder->FontsMapInsert(data, dictName);
+ dict_name = RealizeResource(std::move(pIndirectFont), "Font");
+ m_pObjHolder->FontsMapInsert(data, dict_name);
}
- *buf << "/" << PDF_NameEncode(dictName) << " ";
+ pTextObj->SetResourceName(dict_name);
+
+ *buf << "/" << PDF_NameEncode(dict_name) << " ";
WriteFloat(*buf, pTextObj->GetFontSize()) << " Tf ";
*buf << static_cast<int>(pTextObj->GetTextRenderMode()) << " Tr ";
ByteString text;
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
index 45894f3..06bb239 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
@@ -59,18 +59,24 @@
// streams are not touched.
std::map<int32_t, fxcrt::ostringstream> GenerateModifiedStreams();
- // Add buffer as a stream in page's 'Contents'
+ // For each entry in `new_stream_data`, adds the string buffer to the page's
+ // content stream.
void UpdateContentStreams(
std::map<int32_t, fxcrt::ostringstream>&& new_stream_data);
- // Set the stream index of all page objects with stream index ==
+ // Sets the stream index of all page objects with stream index ==
// |CPDF_PageObject::kNoContentStream|. These are new objects that had not
// been parsed from or written to any content stream yet.
void UpdateStreamlessPageObjects(int new_content_stream_index);
+ // Updates the resource dictionary for `m_pObjHolder` to account for all the
+ // changes.
+ void UpdateResourcesDict();
+
UnownedPtr<CPDF_PageObjectHolder> const m_pObjHolder;
UnownedPtr<CPDF_Document> const m_pDocument;
std::vector<UnownedPtr<CPDF_PageObject>> m_pageObjects;
+ ByteString m_DefaultGraphicsName;
};
#endif // CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTGENERATOR_H_
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
index de977c9..4be22e6 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
@@ -4,8 +4,12 @@
#include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
+#include <stdint.h>
+
#include <map>
#include <numeric>
+#include <set>
+#include <sstream>
#include <utility>
#include <vector>
@@ -16,21 +20,25 @@
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fpdfapi/parser/object_tree_traversal_util.h"
#include "third_party/abseil-cpp/absl/types/variant.h"
#include "third_party/base/check.h"
#include "third_party/base/containers/adapters.h"
+#include "third_party/base/containers/contains.h"
#include "third_party/base/numerics/safe_conversions.h"
CPDF_PageContentManager::CPDF_PageContentManager(
CPDF_PageObjectHolder* page_obj_holder,
- CPDF_IndirectObjectHolder* indirect_obj_holder)
+ CPDF_Document* document)
: page_obj_holder_(page_obj_holder),
- indirect_obj_holder_(indirect_obj_holder) {
+ document_(document),
+ objects_with_multi_refs_(GetObjectsWithMultipleReferences(document_)) {
RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
RetainPtr<CPDF_Object> contents_obj =
page_dict->GetMutableObjectFor("Contents");
RetainPtr<CPDF_Array> contents_array = ToArray(contents_obj);
if (contents_array) {
+ CHECK(contents_array->IsInline());
contents_ = std::move(contents_array);
return;
}
@@ -43,10 +51,19 @@
return;
contents_array.Reset(indirect_obj->AsMutableArray());
- if (contents_array)
- contents_ = std::move(contents_array);
- else if (indirect_obj->IsStream())
+ if (contents_array) {
+ if (pdfium::Contains(objects_with_multi_refs_,
+ contents_array->GetObjNum())) {
+ RetainPtr<CPDF_Array> cloned_contents_array =
+ pdfium::WrapRetain(contents_array->Clone()->AsMutableArray());
+ page_dict->SetFor("Contents", cloned_contents_array);
+ contents_ = std::move(cloned_contents_array);
+ } else {
+ contents_ = std::move(contents_array);
+ }
+ } else if (indirect_obj->IsStream()) {
contents_ = pdfium::WrapRetain(indirect_obj->AsMutableStream());
+ }
}
}
@@ -75,21 +92,21 @@
}
size_t CPDF_PageContentManager::AddStream(fxcrt::ostringstream* buf) {
- auto new_stream = indirect_obj_holder_->NewIndirect<CPDF_Stream>();
+ auto new_stream = document_->NewIndirect<CPDF_Stream>();
new_stream->SetDataFromStringstream(buf);
// If there is one Content stream (not in an array), now there will be two, so
// create an array with the old and the new one. The new one's index is 1.
RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
if (contents_stream) {
- auto new_contents_array = indirect_obj_holder_->NewIndirect<CPDF_Array>();
- new_contents_array->AppendNew<CPDF_Reference>(indirect_obj_holder_,
+ auto new_contents_array = document_->NewIndirect<CPDF_Array>();
+ new_contents_array->AppendNew<CPDF_Reference>(document_,
contents_stream->GetObjNum());
- new_contents_array->AppendNew<CPDF_Reference>(indirect_obj_holder_,
+ new_contents_array->AppendNew<CPDF_Reference>(document_,
new_stream->GetObjNum());
RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
- page_dict->SetNewFor<CPDF_Reference>("Contents", indirect_obj_holder_,
+ page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
new_contents_array->GetObjNum());
contents_ = std::move(new_contents_array);
return 1;
@@ -98,7 +115,7 @@
// If there is an array, just add the new stream to it, at the last position.
RetainPtr<CPDF_Array> contents_array = GetContentsArray();
if (contents_array) {
- contents_array->AppendNew<CPDF_Reference>(indirect_obj_holder_,
+ contents_array->AppendNew<CPDF_Reference>(document_,
new_stream->GetObjNum());
return contents_array->size() - 1;
}
@@ -106,12 +123,52 @@
// There were no Contents, so add the new stream as the single Content stream.
// Its index is 0.
RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
- page_dict->SetNewFor<CPDF_Reference>("Contents", indirect_obj_holder_,
+ page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
new_stream->GetObjNum());
contents_ = std::move(new_stream);
return 0;
}
+void CPDF_PageContentManager::UpdateStream(size_t stream_index,
+ fxcrt::ostringstream* buf) {
+ // If `buf` is now empty, remove the stream instead of setting the data.
+ if (buf->tellp() <= 0) {
+ ScheduleRemoveStreamByIndex(stream_index);
+ return;
+ }
+
+ RetainPtr<CPDF_Stream> existing_stream = GetStreamByIndex(stream_index);
+ CHECK(existing_stream);
+ if (!pdfium::Contains(objects_with_multi_refs_,
+ existing_stream->GetObjNum())) {
+ existing_stream->SetDataFromStringstreamAndRemoveFilter(buf);
+ return;
+ }
+
+ if (GetContentsStream()) {
+ auto new_stream = document_->NewIndirect<CPDF_Stream>();
+ new_stream->SetDataFromStringstream(buf);
+ RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
+ page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
+ new_stream->GetObjNum());
+ }
+
+ RetainPtr<CPDF_Array> contents_array = GetContentsArray();
+ if (!contents_array) {
+ return;
+ }
+
+ RetainPtr<CPDF_Reference> stream_reference =
+ ToReference(contents_array->GetMutableObjectAt(stream_index));
+ if (!stream_reference) {
+ return;
+ }
+
+ auto new_stream = document_->NewIndirect<CPDF_Stream>();
+ new_stream->SetDataFromStringstream(buf);
+ stream_reference->SetRef(document_, new_stream->GetObjNum());
+}
+
void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) {
streams_to_remove_.insert(stream_index);
}
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.h b/core/fpdfapi/edit/cpdf_pagecontentmanager.h
index 301384a..5785fc3 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentmanager.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.h
@@ -5,6 +5,8 @@
#ifndef CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
#define CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
+#include <stdint.h>
+
#include <set>
#include "core/fxcrt/fx_string_wrappers.h"
@@ -13,40 +15,44 @@
#include "third_party/abseil-cpp/absl/types/variant.h"
class CPDF_Array;
-class CPDF_IndirectObjectHolder;
+class CPDF_Document;
class CPDF_PageObjectHolder;
class CPDF_Stream;
class CPDF_PageContentManager {
public:
CPDF_PageContentManager(CPDF_PageObjectHolder* page_obj_holder,
- CPDF_IndirectObjectHolder* indirect_obj_holder);
+ CPDF_Document* document);
~CPDF_PageContentManager();
- // Gets the Content stream at a given index. If Contents is a single stream
- // rather than an array, it is retrievable at index 0.
- RetainPtr<CPDF_Stream> GetStreamByIndex(size_t stream_index);
-
// Adds a new Content stream. Its index in the array will be returned, or 0
// if Contents is not an array, but only a single stream.
size_t AddStream(fxcrt::ostringstream* buf);
- // Schedule the removal of the Content stream at a given index. It will be
+ // Changes the stream at `stream_index` to contain the data in `buf`. If `buf`
+ // is empty, then schedule the removal of the stream instead.
+ void UpdateStream(size_t stream_index, fxcrt::ostringstream* buf);
+
+ private:
+ // Gets the Content stream at a given index. If Contents is a single stream
+ // rather than an array, it is retrievable at index 0.
+ RetainPtr<CPDF_Stream> GetStreamByIndex(size_t stream_index);
+
+ // Schedules the removal of the Content stream at a given index. It will be
// removed upon CPDF_PageContentManager destruction.
void ScheduleRemoveStreamByIndex(size_t stream_index);
- private:
- // Remove all Content streams for which ScheduleRemoveStreamByIndex() was
+ // Removes all Content streams for which ScheduleRemoveStreamByIndex() was
// called. Update the content stream of all page objects with the shifted
// indexes.
void ExecuteScheduledRemovals();
- private:
RetainPtr<CPDF_Stream> GetContentsStream();
RetainPtr<CPDF_Array> GetContentsArray();
UnownedPtr<CPDF_PageObjectHolder> const page_obj_holder_;
- UnownedPtr<CPDF_IndirectObjectHolder> const indirect_obj_holder_;
+ UnownedPtr<CPDF_Document> const document_;
+ const std::set<uint32_t> objects_with_multi_refs_;
// When holding a CPDF_Stream, the pointer may be null.
absl::variant<RetainPtr<CPDF_Stream>, RetainPtr<CPDF_Array>> contents_;
std::set<size_t> streams_to_remove_;
diff --git a/core/fpdfapi/font/cpdf_font.h b/core/fpdfapi/font/cpdf_font.h
index dee1724..a74cba9 100644
--- a/core/fpdfapi/font/cpdf_font.h
+++ b/core/fpdfapi/font/cpdf_font.h
@@ -133,6 +133,9 @@
CFX_Font* GetFontFallback(int position);
+ const ByteString& GetResourceName() const { return m_ResourceName; }
+ void SetResourceName(const ByteString& name) { m_ResourceName = name; }
+
protected:
CPDF_Font(CPDF_Document* pDocument, RetainPtr<CPDF_Dictionary> pFontDict);
@@ -163,6 +166,7 @@
void CheckFontMetrics();
UnownedPtr<CPDF_Document> const m_pDocument;
+ ByteString m_ResourceName; // The resource name for this font.
CFX_Font m_Font;
std::vector<std::unique_ptr<CFX_Font>> m_FontFallbacks;
RetainPtr<CPDF_StreamAcc> m_pFontFile;
diff --git a/core/fpdfapi/page/cpdf_allstates.cpp b/core/fpdfapi/page/cpdf_allstates.cpp
index 07d4c4f..a4330db 100644
--- a/core/fpdfapi/page/cpdf_allstates.cpp
+++ b/core/fpdfapi/page/cpdf_allstates.cpp
@@ -16,6 +16,7 @@
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
+#include "core/fxcrt/bytestring.h"
#include "core/fxge/cfx_graphstatedata.h"
#include "third_party/base/cxx17_backports.h"
@@ -25,6 +26,7 @@
void CPDF_AllStates::Copy(const CPDF_AllStates& src) {
CopyStates(src);
+ m_GraphicsResourceName = src.m_GraphicsResourceName;
m_TextMatrix = src.m_TextMatrix;
m_ParentMatrix = src.m_ParentMatrix;
m_CTM = src.m_CTM;
diff --git a/core/fpdfapi/page/cpdf_allstates.h b/core/fpdfapi/page/cpdf_allstates.h
index 0ae2802..63eb527 100644
--- a/core/fpdfapi/page/cpdf_allstates.h
+++ b/core/fpdfapi/page/cpdf_allstates.h
@@ -8,6 +8,7 @@
#define CORE_FPDFAPI_PAGE_CPDF_ALLSTATES_H_
#include "core/fpdfapi/page/cpdf_graphicstates.h"
+#include "core/fxcrt/bytestring.h"
#include "core/fxcrt/fx_coordinates.h"
class CPDF_Array;
@@ -24,6 +25,7 @@
CPDF_StreamContentParser* pParser);
void SetLineDash(const CPDF_Array* pArray, float phase, float scale);
+ ByteString m_GraphicsResourceName;
CFX_Matrix m_TextMatrix;
CFX_Matrix m_CTM;
CFX_Matrix m_ParentMatrix;
diff --git a/core/fpdfapi/page/cpdf_pageobject.h b/core/fpdfapi/page/cpdf_pageobject.h
index b5f5e3f..7d9d015 100644
--- a/core/fpdfapi/page/cpdf_pageobject.h
+++ b/core/fpdfapi/page/cpdf_pageobject.h
@@ -11,6 +11,7 @@
#include "core/fpdfapi/page/cpdf_contentmarks.h"
#include "core/fpdfapi/page/cpdf_graphicstates.h"
+#include "core/fxcrt/bytestring.h"
#include "core/fxcrt/fx_coordinates.h"
class CPDF_FormObject;
@@ -88,6 +89,18 @@
m_ContentStream = new_content_stream;
}
+ const ByteString& GetResourceName() const { return m_ResourceName; }
+ void SetResourceName(const ByteString& resource_name) {
+ m_ResourceName = resource_name;
+ }
+
+ const ByteString& GetGraphicsResourceName() const {
+ return m_GraphicsResourceName;
+ }
+ void SetGraphicsResourceName(const ByteString& resource_name) {
+ m_GraphicsResourceName = resource_name;
+ }
+
protected:
void CopyData(const CPDF_PageObject* pSrcObject);
@@ -97,6 +110,8 @@
CPDF_ContentMarks m_ContentMarks;
bool m_bDirty = false;
int32_t m_ContentStream;
+ ByteString m_ResourceName; // The resource name for this object.
+ ByteString m_GraphicsResourceName; // Like `m_ResourceName` but for graphics.
};
#endif // CORE_FPDFAPI_PAGE_CPDF_PAGEOBJECT_H_
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
index c7c2911..3bb3471 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
@@ -35,6 +35,7 @@
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcrt/autonuller.h"
+#include "core/fxcrt/bytestring.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/scoped_set_insertion.h"
#include "core/fxcrt/stl_util.h"
@@ -426,6 +427,7 @@
if (bText) {
pObj->m_TextState = m_pCurStates->m_TextState;
}
+ pObj->SetGraphicsResourceName(m_pCurStates->m_GraphicsResourceName);
}
// static
@@ -650,7 +652,8 @@
if (m_pSyntax->GetWord() == "EI")
break;
}
- CPDF_ImageObject* pObj = AddImageFromStream(std::move(pStream));
+ CPDF_ImageObject* pObj =
+ AddImageFromStream(std::move(pStream), /*resource_name=*/"");
// Record the bounding box of this image, so rendering code can draw it
// properly.
if (pObj && pObj->GetImage()->IsMask())
@@ -742,14 +745,15 @@
type = pXObject->GetDict()->GetByteStringFor("Subtype");
if (type == "Form") {
- AddForm(std::move(pXObject));
+ AddForm(std::move(pXObject), name);
return;
}
if (type == "Image") {
CPDF_ImageObject* pObj =
- pXObject->IsInline() ? AddImageFromStream(ToStream(pXObject->Clone()))
- : AddImageFromStreamObjNum(pXObject->GetObjNum());
+ pXObject->IsInline()
+ ? AddImageFromStream(ToStream(pXObject->Clone()), name)
+ : AddImageFromStreamObjNum(pXObject->GetObjNum(), name);
m_LastImageName = std::move(name);
if (pObj) {
@@ -760,7 +764,8 @@
}
}
-void CPDF_StreamContentParser::AddForm(RetainPtr<CPDF_Stream> pStream) {
+void CPDF_StreamContentParser::AddForm(RetainPtr<CPDF_Stream> pStream,
+ const ByteString& name) {
CPDF_AllStates status;
status.m_GeneralState = m_pCurStates->m_GeneralState;
status.m_GraphState = m_pCurStates->m_GraphState;
@@ -773,6 +778,8 @@
CFX_Matrix matrix = m_pCurStates->m_CTM * m_mtContentToUser;
auto pFormObj = std::make_unique<CPDF_FormObject>(GetCurrentStreamIndex(),
std::move(form), matrix);
+ pFormObj->SetResourceName(name);
+ pFormObj->SetGraphicsResourceName(m_pCurStates->m_GraphicsResourceName);
if (!m_pObjectHolder->BackgroundAlphaNeeded() &&
pFormObj->form()->BackgroundAlphaNeeded()) {
m_pObjectHolder->SetBackgroundAlphaNeeded(true);
@@ -783,11 +790,13 @@
}
CPDF_ImageObject* CPDF_StreamContentParser::AddImageFromStream(
- RetainPtr<CPDF_Stream> pStream) {
+ RetainPtr<CPDF_Stream> pStream,
+ const ByteString& name) {
if (!pStream)
return nullptr;
auto pImageObj = std::make_unique<CPDF_ImageObject>(GetCurrentStreamIndex());
+ pImageObj->SetResourceName(name);
pImageObj->SetImage(
pdfium::MakeRetain<CPDF_Image>(m_pDocument, std::move(pStream)));
@@ -795,8 +804,10 @@
}
CPDF_ImageObject* CPDF_StreamContentParser::AddImageFromStreamObjNum(
- uint32_t stream_obj_num) {
+ uint32_t stream_obj_num,
+ const ByteString& name) {
auto pImageObj = std::make_unique<CPDF_ImageObject>(GetCurrentStreamIndex());
+ pImageObj->SetResourceName(name);
pImageObj->SetImage(
CPDF_DocPageData::FromDocument(m_pDocument)->GetImage(stream_obj_num));
@@ -807,6 +818,7 @@
DCHECK(m_pLastImage);
auto pImageObj = std::make_unique<CPDF_ImageObject>(GetCurrentStreamIndex());
+ pImageObj->SetResourceName(m_LastImageName);
pImageObj->SetImage(CPDF_DocPageData::FromDocument(m_pDocument)
->GetImage(m_pLastImage->GetStream()->GetObjNum()));
@@ -892,6 +904,7 @@
if (!pGS)
return;
+ m_pCurStates->m_GraphicsResourceName = name;
m_pCurStates->ProcessExtGS(pGS.Get(), this);
}
@@ -1146,9 +1159,14 @@
}
RetainPtr<CPDF_Font> pFont = CPDF_DocPageData::FromDocument(m_pDocument)
->GetFont(std::move(pFontDict));
- if (pFont && pFont->IsType3Font()) {
- pFont->AsType3Font()->SetPageResources(m_pResources.Get());
- pFont->AsType3Font()->CheckType3FontMetrics();
+ if (pFont) {
+ // Save `name` for later retrieval by the CPDF_TextObject that uses the
+ // font.
+ pFont->SetResourceName(name);
+ if (pFont->IsType3Font()) {
+ pFont->AsType3Font()->SetPageResources(m_pResources.Get());
+ pFont->AsType3Font()->CheckType3FontMetrics();
+ }
}
return pFont;
}
@@ -1223,6 +1241,7 @@
: m_pCurStates->m_TextState.GetTextMode();
{
auto pText = std::make_unique<CPDF_TextObject>(GetCurrentStreamIndex());
+ pText->SetResourceName(pFont->GetResourceName());
SetGraphicStates(pText.get(), true, true, true);
if (TextRenderingModeIsStrokeMode(text_mode)) {
pdfium::span<float> pCTM = pText->m_TextState.GetMutableCTM();
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.h b/core/fpdfapi/page/cpdf_streamcontentparser.h
index 86da47d..276dc72 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.h
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.h
@@ -119,11 +119,13 @@
void AddPathRect(float x, float y, float w, float h);
void AddPathObject(CFX_FillRenderOptions::FillType fill_type,
RenderType render_type);
- CPDF_ImageObject* AddImageFromStream(RetainPtr<CPDF_Stream> pStream);
- CPDF_ImageObject* AddImageFromStreamObjNum(uint32_t stream_obj_num);
+ CPDF_ImageObject* AddImageFromStream(RetainPtr<CPDF_Stream> pStream,
+ const ByteString& name);
+ CPDF_ImageObject* AddImageFromStreamObjNum(uint32_t stream_obj_num,
+ const ByteString& name);
CPDF_ImageObject* AddLastImage();
- void AddForm(RetainPtr<CPDF_Stream> pStream);
+ void AddForm(RetainPtr<CPDF_Stream> pStream, const ByteString& name);
void SetGraphicStates(CPDF_PageObject* pObj,
bool bColor,
bool bText,
diff --git a/core/fpdfapi/parser/BUILD.gn b/core/fpdfapi/parser/BUILD.gn
index bdc2e86..e3152d8 100644
--- a/core/fpdfapi/parser/BUILD.gn
+++ b/core/fpdfapi/parser/BUILD.gn
@@ -73,6 +73,8 @@
"fpdf_parser_decode.h",
"fpdf_parser_utility.cpp",
"fpdf_parser_utility.h",
+ "object_tree_traversal_util.cpp",
+ "object_tree_traversal_util.h",
]
configs += [ "../../../:pdfium_strict_config" ]
deps = [
@@ -151,6 +153,7 @@
"cpdf_parser_embeddertest.cpp",
"cpdf_security_handler_embeddertest.cpp",
"fpdf_parser_decode_embeddertest.cpp",
+ "object_tree_traversal_util_embeddertest.cpp",
]
deps = [
":parser",
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
index 2474931..0bb5bb6 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
@@ -27,8 +27,10 @@
CPDF_CrossRefTable::CPDF_CrossRefTable() = default;
-CPDF_CrossRefTable::CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer)
- : trailer_(std::move(trailer)) {}
+CPDF_CrossRefTable::CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number)
+ : trailer_(std::move(trailer)),
+ trailer_object_number_(trailer_object_number) {}
CPDF_CrossRefTable::~CPDF_CrossRefTable() = default;
@@ -90,8 +92,10 @@
info.pos = 0;
}
-void CPDF_CrossRefTable::SetTrailer(RetainPtr<CPDF_Dictionary> trailer) {
+void CPDF_CrossRefTable::SetTrailer(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number) {
trailer_ = std::move(trailer);
+ trailer_object_number_ = trailer_object_number;
}
const CPDF_CrossRefTable::ObjectInfo* CPDF_CrossRefTable::GetObjectInfo(
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.h b/core/fpdfapi/parser/cpdf_cross_ref_table.h
index c1d639f..246e129 100644
--- a/core/fpdfapi/parser/cpdf_cross_ref_table.h
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.h
@@ -49,7 +49,8 @@
std::unique_ptr<CPDF_CrossRefTable> top);
CPDF_CrossRefTable();
- explicit CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer);
+ CPDF_CrossRefTable(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number);
~CPDF_CrossRefTable();
void AddCompressed(uint32_t obj_num,
@@ -58,7 +59,9 @@
void AddNormal(uint32_t obj_num, uint16_t gen_num, FX_FILESIZE pos);
void SetFree(uint32_t obj_num);
- void SetTrailer(RetainPtr<CPDF_Dictionary> trailer);
+ void SetTrailer(RetainPtr<CPDF_Dictionary> trailer,
+ uint32_t trailer_object_number);
+ uint32_t trailer_object_number() const { return trailer_object_number_; }
const CPDF_Dictionary* trailer() const { return trailer_.Get(); }
CPDF_Dictionary* GetMutableTrailerForTesting() { return trailer_.Get(); }
@@ -77,6 +80,10 @@
void UpdateTrailer(RetainPtr<CPDF_Dictionary> new_trailer);
RetainPtr<CPDF_Dictionary> trailer_;
+ // `trailer_` can be the dictionary part of a XRef stream object. Since it is
+ // inline, it has no object number. Store the stream's object number, or 0 if
+ // there is none.
+ uint32_t trailer_object_number_ = 0;
std::map<uint32_t, ObjectInfo> objects_info_;
};
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index e2bbfbf..302aec1 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -51,6 +51,9 @@
// dictionary.
constexpr size_t kMinFieldCount = 3;
+// V4 trailers are inline.
+constexpr uint32_t kNoV4TrailerObjectNumber = 0;
+
struct CrossRefV5IndexEntry {
uint32_t start_obj_num;
uint32_t obj_count;
@@ -378,7 +381,7 @@
if (!trailer)
return false;
- m_CrossRefTable->SetTrailer(std::move(trailer));
+ m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
ShrinkObjectMap(xrefsize);
@@ -412,7 +415,8 @@
// SLOW ...
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(pDict)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
+ kNoV4TrailerObjectNumber),
std::move(m_CrossRefTable));
}
@@ -451,7 +455,8 @@
// Merge the trailers.
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
+ kNoV4TrailerObjectNumber),
std::move(m_CrossRefTable));
// Now GetTrailer() returns the merged trailer, where /Prev is from the
@@ -478,7 +483,8 @@
// SLOW ...
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(pDict)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
+ kNoV4TrailerObjectNumber),
std::move(m_CrossRefTable));
}
@@ -693,11 +699,17 @@
} else if (word == "trailer") {
RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
if (pTrailer) {
+ CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
+ // Grab the object number from `pTrailer` before potentially calling
+ // std::move(pTrailer) below.
+ const uint32_t trailer_object_number = pTrailer->GetObjNum();
+ RetainPtr<CPDF_Dictionary> trailer_dict =
+ stream_trailer ? stream_trailer->GetMutableDict()
+ : ToDictionary(std::move(pTrailer));
cross_ref_table = CPDF_CrossRefTable::MergeUp(
std::move(cross_ref_table),
- std::make_unique<CPDF_CrossRefTable>(ToDictionary(
- pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone()
- : std::move(pTrailer))));
+ std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
+ trailer_object_number));
}
} else if (word == "obj" && numbers.size() == 2u) {
const FX_FILESIZE obj_pos = numbers[0].second;
@@ -713,7 +725,8 @@
cross_ref_table = CPDF_CrossRefTable::MergeUp(
std::move(cross_ref_table),
std::make_unique<CPDF_CrossRefTable>(
- ToDictionary(pStream->GetDict()->Clone())));
+ ToDictionary(pStream->GetDict()->Clone()),
+ pStream->GetObjNum()));
}
if (obj_num < kMaxObjectNumber) {
@@ -763,12 +776,13 @@
RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
if (bMainXRef) {
- m_CrossRefTable =
- std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer));
+ m_CrossRefTable = std::make_unique<CPDF_CrossRefTable>(
+ std::move(pNewTrailer), pStream->GetObjNum());
m_CrossRefTable->ShrinkObjectMap(size);
} else {
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
- std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer)),
+ std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer),
+ pStream->GetObjNum()),
std::move(m_CrossRefTable));
}
@@ -912,6 +926,10 @@
return m_CrossRefTable->GetMutableTrailerForTesting();
}
+uint32_t CPDF_Parser::GetTrailerObjectNumber() const {
+ return m_CrossRefTable->trailer_object_number();
+}
+
RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
return m_CrossRefTable->trailer()
? ToDictionary(m_CrossRefTable->trailer()->Clone())
@@ -1076,7 +1094,7 @@
if (!trailer)
return SUCCESS;
- m_CrossRefTable->SetTrailer(std::move(trailer));
+ m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
if (xrefsize > 0) {
// Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 5d036a2..21dd8aa 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -79,6 +79,7 @@
const CPDF_Dictionary* GetTrailer() const;
CPDF_Dictionary* GetMutableTrailerForTesting();
+ uint32_t GetTrailerObjectNumber() const;
// Returns a new trailer which combines the last read trailer with the /Root
// and /Info from previous ones.
diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
index da68641..efde90e 100644
--- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
@@ -102,6 +102,10 @@
EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
for (size_t i = 0; i < std::size(versions); ++i)
EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
+
+ const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+ ASSERT_TRUE(cross_ref_table);
+ EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
}
TEST(ParserTest, RebuildCrossRefFailed) {
@@ -334,6 +338,10 @@
CPDF_TestParser parser;
parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
EXPECT_TRUE(parser.ParseLinearizedHeader());
+
+ const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+ ASSERT_TRUE(cross_ref_table);
+ EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
}
TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
@@ -434,8 +442,11 @@
"%%EOF\n";
ASSERT_TRUE(parser.InitTestFromBuffer(kData));
EXPECT_EQ(CPDF_Parser::SUCCESS, parser.StartParseInternal());
- ASSERT_TRUE(parser.GetCrossRefTable());
- const auto& objects_info = parser.GetCrossRefTable()->objects_info();
+
+ const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
+ ASSERT_TRUE(cross_ref_table);
+ EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
+ const auto& objects_info = cross_ref_table->objects_info();
EXPECT_EQ(2u, objects_info.size());
// Skip over the first object, and continue parsing the remaining objects.
diff --git a/core/fpdfapi/parser/cpdf_reference.h b/core/fpdfapi/parser/cpdf_reference.h
index df24458..241b398 100644
--- a/core/fpdfapi/parser/cpdf_reference.h
+++ b/core/fpdfapi/parser/cpdf_reference.h
@@ -32,6 +32,7 @@
CPDF_IndirectObjectHolder* holder) const override;
uint32_t GetRefObjNum() const { return m_RefObjNum; }
+ bool HasIndirectObjectHolder() const { return !!m_pObjList; }
void SetRef(CPDF_IndirectObjectHolder* pDoc, uint32_t objnum);
private:
diff --git a/core/fpdfapi/parser/object_tree_traversal_util.cpp b/core/fpdfapi/parser/object_tree_traversal_util.cpp
new file mode 100644
index 0000000..e1a0d52
--- /dev/null
+++ b/core/fpdfapi/parser/object_tree_traversal_util.cpp
@@ -0,0 +1,221 @@
+// Copyright 2023 The PDFium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/object_tree_traversal_util.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <queue>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "third_party/base/check.h"
+#include "third_party/base/containers/contains.h"
+
+namespace {
+
+class ObjectTreeTraverser {
+ public:
+ explicit ObjectTreeTraverser(const CPDF_Document* document)
+ : document_(document) {
+ const CPDF_Parser* parser = document_->GetParser();
+ const CPDF_Dictionary* trailer = parser ? parser->GetTrailer() : nullptr;
+ const CPDF_Dictionary* root = trailer ? trailer : document_->GetRoot();
+ const uint32_t root_object_number =
+ trailer ? parser->GetTrailerObjectNumber() : root->GetObjNum();
+ // If `root` is a trailer, then it may not have an object number, as many
+ // trailers are inlined.
+ if (root_object_number) {
+ referenced_objects_[root_object_number] = 1;
+ object_number_map_[root] = root_object_number;
+ }
+
+ object_queue_.push(pdfium::WrapRetain(root));
+ seen_objects_.insert(root);
+ }
+ ~ObjectTreeTraverser() = default;
+
+ void Traverse() { CalculateReferenceCounts(GetReferenceEntries()); }
+
+ const std::map<uint32_t, int>& referenced_objects() {
+ return referenced_objects_;
+ }
+
+ private:
+ struct ReferenceEntry {
+ uint32_t ref_object_number;
+ uint32_t referenced_object_number;
+ };
+
+ std::vector<ReferenceEntry> GetReferenceEntries() {
+ std::vector<ReferenceEntry> reference_entries;
+ while (!object_queue_.empty()) {
+ RetainPtr<const CPDF_Object> current_object = object_queue_.front();
+ object_queue_.pop();
+
+ switch (current_object->GetType()) {
+ case CPDF_Object::kArray: {
+ CPDF_ArrayLocker locker(current_object->AsArray());
+ for (const auto& it : locker) {
+ PushNewObject(current_object, it);
+ }
+ break;
+ }
+ case CPDF_Object::kDictionary: {
+ CPDF_DictionaryLocker locker(current_object->AsDictionary());
+ for (const auto& it : locker) {
+ PushNewObject(current_object, it.second);
+ }
+ break;
+ }
+ case CPDF_Object::kReference: {
+ const CPDF_Reference* ref_object = current_object->AsReference();
+ const uint32_t ref_object_number = GetObjectNumber(ref_object);
+ const uint32_t referenced_object_number = ref_object->GetRefObjNum();
+ CHECK(referenced_object_number);
+
+ RetainPtr<const CPDF_Object> referenced_object;
+ if (ref_object->HasIndirectObjectHolder()) {
+ // Calling GetIndirectObject() does not work for normal references.
+ referenced_object = ref_object->GetDirect();
+ } else {
+ // Calling GetDirect() does not work for references from trailers.
+ referenced_object =
+ document_->GetIndirectObject(referenced_object_number);
+ }
+ // Unlike the other object types, CPDF_Reference can point at nullptr.
+ if (referenced_object) {
+ reference_entries.push_back(
+ {ref_object_number, referenced_object_number});
+ PushNewObject(ref_object, referenced_object);
+ }
+ break;
+ }
+ case CPDF_Object::kStream: {
+ RetainPtr<const CPDF_Dictionary> dict =
+ current_object->AsStream()->GetDict();
+ CHECK(dict->IsInline()); // i.e. No object number.
+ CPDF_DictionaryLocker locker(dict);
+ for (const auto& it : locker) {
+ PushNewObject(current_object, it.second);
+ }
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+ }
+ return reference_entries;
+ }
+
+ void CalculateReferenceCounts(
+ const std::vector<ReferenceEntry>& reference_entries) {
+ // Tracks PDF objects that referenced other PDF objects, identified by their
+ // object numbers. Never 0.
+ std::set<uint32_t> seen_ref_objects;
+
+ for (const ReferenceEntry& entry : reference_entries) {
+ // Make sure this is not a self-reference.
+ if (entry.referenced_object_number == entry.ref_object_number) {
+ continue;
+ }
+
+ // Make sure this is not a circular reference.
+ if (pdfium::Contains(seen_ref_objects, entry.ref_object_number) &&
+ pdfium::Contains(seen_ref_objects, entry.referenced_object_number)) {
+ continue;
+ }
+
+ ++referenced_objects_[entry.referenced_object_number];
+ if (entry.ref_object_number) {
+ seen_ref_objects.insert(entry.ref_object_number);
+ }
+ }
+ }
+
+ void PushNewObject(const CPDF_Object* parent_object,
+ RetainPtr<const CPDF_Object> child_object) {
+ CHECK(parent_object);
+ CHECK(child_object);
+ const bool inserted = seen_objects_.insert(child_object).second;
+ if (!inserted) {
+ return;
+ }
+ const uint32_t child_object_number = child_object->GetObjNum();
+ if (child_object_number) {
+ object_number_map_[child_object] = child_object_number;
+ } else {
+ // This search can fail for inlined trailers.
+ auto it = object_number_map_.find(parent_object);
+ if (it != object_number_map_.end()) {
+ object_number_map_[child_object] = it->second;
+ }
+ }
+ object_queue_.push(std::move(child_object));
+ }
+
+ // Returns 0 if not found.
+ uint32_t GetObjectNumber(const CPDF_Object* object) const {
+ auto it = object_number_map_.find(object);
+ return it != object_number_map_.end() ? it->second : 0;
+ }
+
+ const CPDF_Document* const document_;
+
+ // Queue of objects to traverse.
+ // - Pointers in the queue are non-null.
+ // - The same pointer never enters the queue twice.
+ std::queue<RetainPtr<const CPDF_Object>> object_queue_;
+
+ // Map of objects to "top-level" object numbers. For inline objects, this is
+ // the ancestor object with an object number. The keys are non-null and the
+ // values are never 0.
+ // This is used to prevent self-references, as a single PDF object, with
+ // inlined objects, is represented by multiple CPDF_Objects.
+ std::map<const CPDF_Object*, uint32_t> object_number_map_;
+
+ // Tracks traversed objects to prevent duplicates from getting into
+ // `object_queue_` and `object_number_map_`.
+ std::set<const CPDF_Object*> seen_objects_;
+
+ // Tracks which PDF objects are referenced.
+ // Key: object number
+ // Value: number of times referenced
+ std::map<uint32_t, int> referenced_objects_;
+};
+
+} // namespace
+
+std::set<uint32_t> GetObjectsWithReferences(const CPDF_Document* document) {
+ ObjectTreeTraverser traverser(document);
+ traverser.Traverse();
+
+ std::set<uint32_t> results;
+ for (const auto& it : traverser.referenced_objects()) {
+ results.insert(it.first);
+ }
+ return results;
+}
+
+std::set<uint32_t> GetObjectsWithMultipleReferences(
+ const CPDF_Document* document) {
+ ObjectTreeTraverser traverser(document);
+ traverser.Traverse();
+
+ std::set<uint32_t> results;
+ for (const auto& it : traverser.referenced_objects()) {
+ if (it.second > 1) {
+ results.insert(it.first);
+ }
+ }
+ return results;
+}
diff --git a/core/fpdfapi/parser/object_tree_traversal_util.h b/core/fpdfapi/parser/object_tree_traversal_util.h
new file mode 100644
index 0000000..e9db96d
--- /dev/null
+++ b/core/fpdfapi/parser/object_tree_traversal_util.h
@@ -0,0 +1,46 @@
+// Copyright 2023 The PDFium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_OBJECT_TREE_TRAVERSAL_UTIL_H_
+#define CORE_FPDFAPI_PARSER_OBJECT_TREE_TRAVERSAL_UTIL_H_
+
+#include <stdint.h>
+
+#include <set>
+
+class CPDF_Document;
+
+// Traverses `document` starting with its trailer, if it has one, or starting at
+// the catalog, which always exists. The trailer should have a reference to the
+// catalog. The traversal avoids cycles.
+// Returns all the PDF objects (not CPDF_Objects) the traversal reached as a set
+// of object numbers.
+std::set<uint32_t> GetObjectsWithReferences(const CPDF_Document* document);
+
+// Same as GetObjectsWithReferences(), but only returns the objects with
+// multiple references. References that would create a cycle are ignored.
+//
+// In this example, where (A) is the root node:
+//
+// A -> B
+// A -> C
+// B -> D
+// C -> D
+//
+// GetObjectsWithMultipleReferences() returns {D}, since both (B) and (C)
+// references to (D), and there are no cycles.
+//
+// In this example, where (A) is the root node:
+//
+// A -> B
+// B -> C
+// C -> B
+//
+// GetObjectsWithMultipleReferences() returns {}, even though both (A) and (C)
+// references (B). Since (B) -> (C) -> (B) creates a cycle, the (C) -> (B)
+// reference does not count.
+std::set<uint32_t> GetObjectsWithMultipleReferences(
+ const CPDF_Document* document);
+
+#endif // CORE_FPDFAPI_PARSER_OBJECT_TREE_TRAVERSAL_UTIL_H_
diff --git a/core/fpdfapi/parser/object_tree_traversal_util_embeddertest.cpp b/core/fpdfapi/parser/object_tree_traversal_util_embeddertest.cpp
new file mode 100644
index 0000000..c90c77e
--- /dev/null
+++ b/core/fpdfapi/parser/object_tree_traversal_util_embeddertest.cpp
@@ -0,0 +1,96 @@
+// Copyright 2023 The PDFium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/object_tree_traversal_util.h"
+
+#include <stdint.h>
+
+#include <set>
+
+#include "core/fpdfapi/parser/cpdf_document.h"
+#include "testing/embedder_test.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using testing::UnorderedElementsAreArray;
+using ObjectTreeTraversalUtilEmbedderTest = EmbedderTest;
+
+namespace {
+
+CPDF_Document* GetCPDFDocument(FPDF_DOCUMENT document) {
+ // This is cheating slightly to avoid a layering violation, since this file
+ // cannot include fpdfsdk/cpdfsdk_helpers.h to get access to
+ // CPDFDocumentFromFPDFDocument().
+ return reinterpret_cast<CPDF_Document*>((document));
+}
+
+} // namespace
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest, GetObjectsWithReferencesBasic) {
+ ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+ CPDF_Document* doc = GetCPDFDocument(document());
+ std::set<uint32_t> referenced_objects = GetObjectsWithReferences(doc);
+ EXPECT_THAT(referenced_objects,
+ UnorderedElementsAreArray({1, 2, 3, 4, 5, 6}));
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest, GetObjectsWithReferencesNewDoc) {
+ ScopedFPDFDocument new_doc(FPDF_CreateNewDocument());
+ CPDF_Document* doc = GetCPDFDocument(new_doc.get());
+ std::set<uint32_t> referenced_objects = GetObjectsWithReferences(doc);
+ // Empty documents have a catalog and an empty pages object.
+ EXPECT_THAT(referenced_objects, UnorderedElementsAreArray({1, 2}));
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest,
+ GetObjectsWithReferencesCircularRefs) {
+ ASSERT_TRUE(OpenDocument("circular_viewer_ref.pdf"));
+ CPDF_Document* doc = GetCPDFDocument(document());
+ std::set<uint32_t> referenced_objects = GetObjectsWithReferences(doc);
+ // The trailer points at a catalog, and the catalog only references itself.
+ EXPECT_THAT(referenced_objects, UnorderedElementsAreArray({1}));
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest,
+ GetObjectsWithReferencesCrossRefStream) {
+ ASSERT_TRUE(OpenDocument("bug_1399.pdf"));
+ CPDF_Document* doc = GetCPDFDocument(document());
+ std::set<uint32_t> referenced_objects = GetObjectsWithReferences(doc);
+ // The trailer is the dictionary inside /XRef object 16 0. Note that it
+ // references object 3 0, but the rest of the document does not.
+ EXPECT_THAT(referenced_objects,
+ UnorderedElementsAreArray({1, 2, 3, 4, 5, 12, 13, 14, 16}));
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest,
+ GetObjectsWithMultipleReferencesBasic) {
+ ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+ CPDF_Document* doc = GetCPDFDocument(document());
+ std::set<uint32_t> referenced_objects = GetObjectsWithMultipleReferences(doc);
+ EXPECT_TRUE(referenced_objects.empty());
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest,
+ GetObjectsWithMultipleReferencesNewDoc) {
+ ScopedFPDFDocument new_doc(FPDF_CreateNewDocument());
+ CPDF_Document* doc = GetCPDFDocument(new_doc.get());
+ std::set<uint32_t> referenced_objects = GetObjectsWithMultipleReferences(doc);
+ EXPECT_TRUE(referenced_objects.empty());
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest,
+ GetObjectsWithMultipleReferencesCircularRefs) {
+ ASSERT_TRUE(OpenDocument("circular_viewer_ref.pdf"));
+ CPDF_Document* doc = GetCPDFDocument(document());
+ std::set<uint32_t> referenced_objects = GetObjectsWithMultipleReferences(doc);
+ EXPECT_TRUE(referenced_objects.empty());
+}
+
+TEST_F(ObjectTreeTraversalUtilEmbedderTest,
+ GetObjectsWithMultipleReferencesSharedObjects) {
+ ASSERT_TRUE(OpenDocument("hello_world_2_pages.pdf"));
+ CPDF_Document* doc = GetCPDFDocument(document());
+ std::set<uint32_t> referenced_objects = GetObjectsWithMultipleReferences(doc);
+ EXPECT_THAT(referenced_objects, UnorderedElementsAreArray({5, 6, 7}));
+}
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index ce462d4..91b9d6b 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -37,6 +37,8 @@
#include "third_party/base/check.h"
using pdfium::HelloWorldChecksum;
+using testing::HasSubstr;
+using testing::Not;
namespace {
@@ -904,6 +906,7 @@
{
ScopedFPDFPageObject page_object(FPDFPage_GetObject(page, 0));
ASSERT_TRUE(page_object);
+ ASSERT_EQ(FPDF_PAGEOBJ_TEXT, FPDFPageObj_GetType(page_object.get()));
EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object.get()));
}
ASSERT_EQ(1, FPDFPage_CountObjects(page));
@@ -925,9 +928,197 @@
ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
VerifySavedDocument(200, 200, FirstRemovedChecksum());
+ // Verify removed/renamed resources are no longer there.
+ EXPECT_THAT(GetString(), Not(HasSubstr("/F1")));
+ EXPECT_THAT(GetString(), Not(HasSubstr("/F2")));
+ EXPECT_THAT(GetString(), Not(HasSubstr("/Times-Roman")));
+
UnloadPage(page);
}
+TEST_F(FPDFEditEmbedderTest,
+ RemoveTextObjectWithTwoPagesSharingContentStreamAndResources) {
+ // Load document with some text.
+ ASSERT_TRUE(OpenDocument("hello_world_2_pages.pdf"));
+ FPDF_PAGE page1 = LoadPage(0);
+ ASSERT_TRUE(page1);
+ FPDF_PAGE page2 = LoadPage(1);
+ ASSERT_TRUE(page2);
+
+ // Show what the original file looks like.
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, HelloWorldChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Get the "Hello, world!" text object from page 1 and remove it.
+ ASSERT_EQ(2, FPDFPage_CountObjects(page1));
+ {
+ ScopedFPDFPageObject page_object(FPDFPage_GetObject(page1, 0));
+ ASSERT_TRUE(page_object);
+ ASSERT_EQ(FPDF_PAGEOBJ_TEXT, FPDFPageObj_GetType(page_object.get()));
+ EXPECT_TRUE(FPDFPage_RemoveObject(page1, page_object.get()));
+ }
+ ASSERT_EQ(1, FPDFPage_CountObjects(page1));
+
+ // Verify the "Hello, world!" text is gone from page 1.
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, FirstRemovedChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Verify the rendering again after calling FPDFPage_GenerateContent().
+ ASSERT_TRUE(FPDFPage_GenerateContent(page1));
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, FirstRemovedChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Save the document and verify it after reloading.
+ ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ASSERT_TRUE(OpenSavedDocument());
+ FPDF_PAGE saved_page1 = LoadSavedPage(0);
+ VerifySavedRendering(saved_page1, 200, 200, FirstRemovedChecksum());
+ CloseSavedPage(saved_page1);
+ FPDF_PAGE saved_page2 = LoadSavedPage(1);
+ VerifySavedRendering(saved_page2, 200, 200, HelloWorldChecksum());
+ CloseSavedPage(saved_page2);
+ CloseSavedDocument();
+
+ std::vector<std::string> split_saved_data = StringSplit(GetString(), '\n');
+ // Verify removed/renamed resources are in the save PDF the correct number of
+ // times.
+ EXPECT_THAT(split_saved_data, Contains(HasSubstr("/F1")).Times(1));
+ EXPECT_THAT(split_saved_data, Contains(HasSubstr("/F2")).Times(1));
+ EXPECT_THAT(split_saved_data, Contains(HasSubstr("/Times-Roman")).Times(1));
+
+ UnloadPage(page1);
+ UnloadPage(page2);
+}
+
+TEST_F(FPDFEditEmbedderTest,
+ RemoveTextObjectWithTwoPagesSharingContentArrayAndResources) {
+ // Load document with some text.
+ ASSERT_TRUE(OpenDocument("hello_world_2_pages_split_streams.pdf"));
+ FPDF_PAGE page1 = LoadPage(0);
+ ASSERT_TRUE(page1);
+ FPDF_PAGE page2 = LoadPage(1);
+ ASSERT_TRUE(page2);
+
+ // Show what the original file looks like.
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, HelloWorldChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Get the "Hello, world!" text object from page 1 and remove it.
+ ASSERT_EQ(2, FPDFPage_CountObjects(page1));
+ {
+ ScopedFPDFPageObject page_object(FPDFPage_GetObject(page1, 0));
+ ASSERT_TRUE(page_object);
+ ASSERT_EQ(FPDF_PAGEOBJ_TEXT, FPDFPageObj_GetType(page_object.get()));
+ EXPECT_TRUE(FPDFPage_RemoveObject(page1, page_object.get()));
+ }
+ ASSERT_EQ(1, FPDFPage_CountObjects(page1));
+
+ // Verify the "Hello, world!" text is gone from page 1.
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, FirstRemovedChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Verify the rendering again after calling FPDFPage_GenerateContent().
+ ASSERT_TRUE(FPDFPage_GenerateContent(page1));
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, FirstRemovedChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Save the document and verify it after reloading.
+ ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ASSERT_TRUE(OpenSavedDocument());
+ FPDF_PAGE saved_page1 = LoadSavedPage(0);
+ VerifySavedRendering(saved_page1, 200, 200, FirstRemovedChecksum());
+ CloseSavedPage(saved_page1);
+ FPDF_PAGE saved_page2 = LoadSavedPage(1);
+ VerifySavedRendering(saved_page2, 200, 200, HelloWorldChecksum());
+ CloseSavedPage(saved_page2);
+ CloseSavedDocument();
+
+ UnloadPage(page1);
+ UnloadPage(page2);
+}
+
+TEST_F(FPDFEditEmbedderTest, RemoveTextObjectWithTwoPagesSharingResourcesDict) {
+ // Load document with some text.
+ ASSERT_TRUE(OpenDocument("hello_world_2_pages_shared_resources_dict.pdf"));
+ FPDF_PAGE page1 = LoadPage(0);
+ ASSERT_TRUE(page1);
+ FPDF_PAGE page2 = LoadPage(1);
+ ASSERT_TRUE(page2);
+
+ // Show what the original file looks like.
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, HelloWorldChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Get the "Hello, world!" text object from page 1 and remove it.
+ ASSERT_EQ(2, FPDFPage_CountObjects(page1));
+ {
+ ScopedFPDFPageObject page_object(FPDFPage_GetObject(page1, 0));
+ ASSERT_TRUE(page_object);
+ ASSERT_EQ(FPDF_PAGEOBJ_TEXT, FPDFPageObj_GetType(page_object.get()));
+ EXPECT_TRUE(FPDFPage_RemoveObject(page1, page_object.get()));
+ }
+ ASSERT_EQ(1, FPDFPage_CountObjects(page1));
+
+ // Verify the "Hello, world!" text is gone from page 1
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, FirstRemovedChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Verify the rendering again after calling FPDFPage_GenerateContent().
+ ASSERT_TRUE(FPDFPage_GenerateContent(page1));
+ {
+ ScopedFPDFBitmap page1_bitmap = RenderPage(page1);
+ CompareBitmap(page1_bitmap.get(), 200, 200, FirstRemovedChecksum());
+ ScopedFPDFBitmap page2_bitmap = RenderPage(page2);
+ CompareBitmap(page2_bitmap.get(), 200, 200, HelloWorldChecksum());
+ }
+
+ // Save the document and verify it after reloading.
+ ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ ASSERT_TRUE(OpenSavedDocument());
+ FPDF_PAGE saved_page1 = LoadSavedPage(0);
+ VerifySavedRendering(saved_page1, 200, 200, FirstRemovedChecksum());
+ CloseSavedPage(saved_page1);
+ FPDF_PAGE saved_page2 = LoadSavedPage(1);
+ VerifySavedRendering(saved_page2, 200, 200, HelloWorldChecksum());
+ CloseSavedPage(saved_page2);
+ CloseSavedDocument();
+
+ UnloadPage(page1);
+ UnloadPage(page2);
+}
+
void CheckMarkCounts(FPDF_PAGE page,
int start_from,
int expected_object_count,
@@ -2874,10 +3065,11 @@
"5384da3406d62360ffb5cac4476fff1c");
}
- // Never mind, my new favorite color is blue, increase alpha
+ // Never mind, my new favorite color is blue, increase alpha.
+ // The red graphics state goes away.
EXPECT_TRUE(FPDFPageObj_SetFillColor(rect, 0, 0, 255, 180));
EXPECT_TRUE(FPDFPage_GenerateContent(page));
- EXPECT_EQ(3u, graphics_dict->size());
+ EXPECT_EQ(2u, graphics_dict->size());
// Check that bitmap displays changed content
{
@@ -2888,7 +3080,7 @@
// And now generate, without changes
EXPECT_TRUE(FPDFPage_GenerateContent(page));
- EXPECT_EQ(3u, graphics_dict->size());
+ EXPECT_EQ(2u, graphics_dict->size());
{
ScopedFPDFBitmap page_bitmap = RenderPage(page);
CompareBitmap(page_bitmap.get(), 612, 792,
@@ -2911,7 +3103,7 @@
// Generate yet again, check dicts are reasonably sized
EXPECT_TRUE(FPDFPage_GenerateContent(page));
- EXPECT_EQ(3u, graphics_dict->size());
+ EXPECT_EQ(2u, graphics_dict->size());
EXPECT_EQ(1u, font_dict->size());
FPDF_ClosePage(page);
}
diff --git a/fpdfsdk/fpdf_save_embeddertest.cpp b/fpdfsdk/fpdf_save_embeddertest.cpp
index 413bd91..3f5efd8 100644
--- a/fpdfsdk/fpdf_save_embeddertest.cpp
+++ b/fpdfsdk/fpdf_save_embeddertest.cpp
@@ -15,41 +15,45 @@
#include "testing/gmock/include/gmock/gmock-matchers.h"
#include "testing/gtest/include/gtest/gtest.h"
+using testing::HasSubstr;
+using testing::Not;
+using testing::StartsWith;
+
class FPDFSaveEmbedderTest : public EmbedderTest {};
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDoc) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\r\n"));
EXPECT_EQ(805u, GetString().size());
}
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDocWithVersion) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, 0, 14));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.4\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.4\r\n"));
EXPECT_EQ(805u, GetString().size());
}
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDocWithBadVersion) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, 0, -1));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\r\n"));
ClearString();
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, 0, 0));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\r\n"));
ClearString();
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, 0, 18));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\r\n"));
}
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDocIncremental) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, FPDF_INCREMENTAL, 14));
// Version gets taken as-is from input document.
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\n%\xa0\xf2\xa4\xf4"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\n%\xa0\xf2\xa4\xf4"));
// Additional output produced vs. non incremental.
EXPECT_EQ(985u, GetString().size());
}
@@ -57,21 +61,21 @@
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDocNoIncremental) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, FPDF_NO_INCREMENTAL, 14));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.4\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.4\r\n"));
EXPECT_EQ(805u, GetString().size());
}
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDocRemoveSecurity) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, FPDF_REMOVE_SECURITY, 14));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.4\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.4\r\n"));
EXPECT_EQ(805u, GetString().size());
}
TEST_F(FPDFSaveEmbedderTest, SaveSimpleDocBadFlags) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveWithVersion(document(), this, 999999, 14));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.4\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.4\r\n"));
EXPECT_EQ(805u, GetString().size());
}
@@ -106,10 +110,15 @@
}
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.6\r\n"));
- EXPECT_THAT(GetString(), testing::HasSubstr("/Root "));
- EXPECT_THAT(GetString(), testing::HasSubstr("/Info "));
- EXPECT_EQ(8219u, GetString().size());
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.6\r\n"));
+ EXPECT_THAT(GetString(), HasSubstr("/Root "));
+ EXPECT_THAT(GetString(), HasSubstr("/Info "));
+ EXPECT_THAT(GetString(), HasSubstr("/Size 37"));
+ EXPECT_THAT(GetString(), HasSubstr("35 0 obj"));
+ EXPECT_THAT(GetString(), HasSubstr("36 0 obj"));
+ EXPECT_THAT(GetString(), Not(HasSubstr("37 0 obj")));
+ EXPECT_THAT(GetString(), Not(HasSubstr("38 0 obj")));
+ EXPECT_EQ(7908u, GetString().size());
// Make sure new document renders the same as the old one.
ASSERT_TRUE(OpenSavedDocument());
@@ -146,19 +155,17 @@
CloseSavedPage(saved_page);
CloseSavedDocument();
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\r\n"));
- EXPECT_THAT(GetString(), testing::HasSubstr("/Root "));
- // TODO(crbug.com/pdfium/1409): The PDF should not have any images, given it
- // is rendering blank. The file size should also be a lot smaller.
- EXPECT_THAT(GetString(), testing::HasSubstr("/Image"));
- EXPECT_LT(GetString().size(), 1300u);
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\r\n"));
+ EXPECT_THAT(GetString(), HasSubstr("/Root "));
+ EXPECT_THAT(GetString(), Not(HasSubstr("/Image")));
+ EXPECT_LT(GetString().size(), 600u);
}
#ifdef PDF_ENABLE_XFA
TEST_F(FPDFSaveEmbedderTest, SaveXFADoc) {
ASSERT_TRUE(OpenDocument("simple_xfa.pdf"));
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
- EXPECT_THAT(GetString(), testing::StartsWith("%PDF-1.7\r\n"));
+ EXPECT_THAT(GetString(), StartsWith("%PDF-1.7\r\n"));
ASSERT_TRUE(OpenSavedDocument());
// TODO(tsepez): check for XFA forms in document
CloseSavedDocument();
@@ -168,15 +175,14 @@
TEST_F(FPDFSaveEmbedderTest, BUG_342) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
- EXPECT_THAT(GetString(), testing::HasSubstr("0000000000 65535 f\r\n"));
- EXPECT_THAT(GetString(),
- testing::Not(testing::HasSubstr("0000000000 65536 f\r\n")));
+ EXPECT_THAT(GetString(), HasSubstr("0000000000 65535 f\r\n"));
+ EXPECT_THAT(GetString(), Not(HasSubstr("0000000000 65536 f\r\n")));
}
TEST_F(FPDFSaveEmbedderTest, BUG_905142) {
ASSERT_TRUE(OpenDocument("bug_905142.pdf"));
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
- EXPECT_THAT(GetString(), testing::HasSubstr("/Length 0"));
+ EXPECT_THAT(GetString(), HasSubstr("/Length 0"));
}
// Should not trigger a DCHECK() failure in CFX_FileBufferArchive.
@@ -184,5 +190,5 @@
TEST_F(FPDFSaveEmbedderTest, Bug1328389) {
ASSERT_TRUE(OpenDocument("bug_1328389.pdf"));
EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
- EXPECT_THAT(GetString(), testing::HasSubstr("/Foo/"));
+ EXPECT_THAT(GetString(), HasSubstr("/Foo/"));
}
diff --git a/testing/resources/hello_world_2_pages.in b/testing/resources/hello_world_2_pages.in
new file mode 100644
index 0000000..ec33354
--- /dev/null
+++ b/testing/resources/hello_world_2_pages.in
@@ -0,0 +1,67 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /Count 2
+ /Kids [3 0 R 4 0 R]
+ /MediaBox [0 0 200 200]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+{{object 4 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+{{object 5 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+{{object 6 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+{{object 7 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/hello_world_2_pages.pdf b/testing/resources/hello_world_2_pages.pdf
new file mode 100644
index 0000000..4942e3a
--- /dev/null
+++ b/testing/resources/hello_world_2_pages.pdf
@@ -0,0 +1,81 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /MediaBox [0 0 200 200]
+ /Count 2
+ /Kids [3 0 R 4 0 R]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+4 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+5 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+6 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+7 0 obj <<
+ /Length 83
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+xref
+0 8
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000163 00000 n
+0000000305 00000 n
+0000000447 00000 n
+0000000525 00000 n
+0000000601 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 8
+>>
+startxref
+735
+%%EOF
diff --git a/testing/resources/hello_world_2_pages_shared_resources_dict.in b/testing/resources/hello_world_2_pages_shared_resources_dict.in
new file mode 100644
index 0000000..9c30780
--- /dev/null
+++ b/testing/resources/hello_world_2_pages_shared_resources_dict.in
@@ -0,0 +1,64 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /Count 2
+ /Kids [3 0 R 4 0 R]
+ /MediaBox [0 0 200 200]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources 5 0 R
+ /Contents 8 0 R
+>>
+endobj
+{{object 4 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources 5 0 R
+ /Contents 8 0 R
+>>
+endobj
+{{object 5 0}} <<
+ /Font <<
+ /F1 6 0 R
+ /F2 7 0 R
+ >>
+>>
+endobj
+{{object 6 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+{{object 7 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+{{object 8 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/hello_world_2_pages_shared_resources_dict.pdf b/testing/resources/hello_world_2_pages_shared_resources_dict.pdf
new file mode 100644
index 0000000..06b2531
--- /dev/null
+++ b/testing/resources/hello_world_2_pages_shared_resources_dict.pdf
@@ -0,0 +1,79 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /Count 2
+ /Kids [3 0 R 4 0 R]
+ /MediaBox [0 0 200 200]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources 5 0 R
+ /Contents 8 0 R
+>>
+endobj
+4 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources 5 0 R
+ /Contents 8 0 R
+>>
+endobj
+5 0 obj <<
+ /Font <<
+ /F1 6 0 R
+ /F2 7 0 R
+ >>
+>>
+endobj
+6 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+7 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+8 0 obj <<
+ /Length 83
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+xref
+0 9
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000163 00000 n
+0000000251 00000 n
+0000000339 00000 n
+0000000404 00000 n
+0000000482 00000 n
+0000000558 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 9
+>>
+startxref
+692
+%%EOF
diff --git a/testing/resources/hello_world_2_pages_split_streams.in b/testing/resources/hello_world_2_pages_split_streams.in
new file mode 100644
index 0000000..ef68f5c
--- /dev/null
+++ b/testing/resources/hello_world_2_pages_split_streams.in
@@ -0,0 +1,75 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /Count 2
+ /Kids [3 0 R 4 0 R]
+ /MediaBox [0 0 200 200]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+{{object 4 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+{{object 5 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+{{object 6 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+{{object 7 0}} [8 0 R 9 0 R]
+{{object 8 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+endstream
+endobj
+{{object 9 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+20 100 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/hello_world_2_pages_split_streams.pdf b/testing/resources/hello_world_2_pages_split_streams.pdf
new file mode 100644
index 0000000..3ac829f
--- /dev/null
+++ b/testing/resources/hello_world_2_pages_split_streams.pdf
@@ -0,0 +1,91 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /Count 2
+ /Kids [3 0 R 4 0 R]
+ /MediaBox [0 0 200 200]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+4 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ /F2 6 0 R
+ >>
+ >>
+ /Contents 7 0 R
+>>
+endobj
+5 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+6 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+7 0 obj [8 0 R 9 0 R]
+8 0 obj <<
+ /Length 41
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+endstream
+endobj
+9 0 obj <<
+ /Length 47
+>>
+stream
+BT
+20 100 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+xref
+0 10
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000163 00000 n
+0000000305 00000 n
+0000000447 00000 n
+0000000525 00000 n
+0000000601 00000 n
+0000000623 00000 n
+0000000715 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 10
+>>
+startxref
+813
+%%EOF