Add experimental FPDFPageObj_SetIsActive() API Introduce an experimental API to allow setting whether a page object is active. For documents being edited, this allows page objects to remain in memory yet be excluded during content generation and rendering. Bug: 377660088 Change-Id: I473a790c79df906a32bae7270780c63d3054c3ef Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/125630 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp index a11a222..1639b5e 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -136,6 +136,7 @@ CPDF_PageContentGenerator::CPDF_PageContentGenerator( CPDF_PageObjectHolder* pObjHolder) : m_pObjHolder(pObjHolder), m_pDocument(pObjHolder->GetDocument()) { + // Copy all page objects, even if they are inactive. for (const auto& pObj : *pObjHolder) { m_pageObjects.emplace_back(pObj.get()); } @@ -161,6 +162,9 @@ // Figure out which streams are dirty. std::set<int32_t> all_dirty_streams; for (auto& pPageObj : m_pageObjects) { + // Must include dirty page objects even if they are marked as inactive. + // Otherwise an inactive object will not be detected that its stream needs + // to be removed as part of regeneration. if (pPageObj->IsDirty()) all_dirty_streams.insert(pPageObj->GetContentStream()); } @@ -195,6 +199,10 @@ // Process the page objects, write into each dirty stream. for (auto& pPageObj : m_pageObjects) { + if (!pPageObj->IsActive()) { + continue; + } + int stream_index = pPageObj->GetContentStream(); auto it = streams.find(stream_index); if (it == streams.end()) @@ -301,6 +309,9 @@ ResourcesMap seen_resources; for (auto& page_object : m_pageObjects) { + if (!page_object->IsActive()) { + continue; + } RecordPageObjectResourceUsage(page_object, seen_resources); } if (!m_DefaultGraphicsName.IsEmpty()) { @@ -344,8 +355,10 @@ const CPDF_ContentMarks* content_marks = empty_content_marks.get(); for (auto& pPageObj : m_pageObjects) { - if (m_pObjHolder->IsPage() && !pPageObj->IsDirty()) + if (m_pObjHolder->IsPage() && + (!pPageObj->IsDirty() || !pPageObj->IsActive())) { continue; + } bDirty = true; content_marks = ProcessContentMarks(buf, pPageObj, content_marks); @@ -358,8 +371,13 @@ void CPDF_PageContentGenerator::UpdateStreamlessPageObjects( int new_content_stream_index) { for (auto& pPageObj : m_pageObjects) { - if (pPageObj->GetContentStream() == CPDF_PageObject::kNoContentStream) + if (!pPageObj->IsActive()) { + continue; + } + + if (pPageObj->GetContentStream() == CPDF_PageObject::kNoContentStream) { pPageObj->SetContentStream(new_content_stream_index); + } } }
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp index 790bf13..676469e 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
@@ -176,6 +176,9 @@ // Since CPDF_PageContentManager is only instantiated in // CPDF_PageContentGenerator::GenerateContent(), which cleans up the dirty // streams first, this should always be true. + // This method does not bother to inspect IsActive() for page objects; it will + // remove any object that has been scheduled for removal, regardless of active + // status. DCHECK(!page_obj_holder_->HasDirtyStreams()); if (streams_to_remove_.empty()) {
diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp index 20feada..60aec84 100644 --- a/core/fpdfapi/page/cpdf_contentparser.cpp +++ b/core/fpdfapi/page/cpdf_contentparser.cpp
@@ -229,6 +229,9 @@ } for (auto& pObj : *m_pPageObjectHolder) { + if (!pObj->IsActive()) { + continue; + } CPDF_ClipPath& clip_path = pObj->mutable_clip_path(); if (!clip_path.HasRef()) { continue;
diff --git a/core/fpdfapi/page/cpdf_form.cpp b/core/fpdfapi/page/cpdf_form.cpp index cbe92fd..526c148 100644 --- a/core/fpdfapi/page/cpdf_form.cpp +++ b/core/fpdfapi/page/cpdf_form.cpp
@@ -90,18 +90,22 @@ } bool CPDF_Form::HasPageObjects() const { - return GetPageObjectCount() != 0; + return GetActivePageObjectCount() != 0; } CFX_FloatRect CPDF_Form::CalcBoundingBox() const { - if (GetPageObjectCount() == 0) + if (GetActivePageObjectCount() == 0) { return CFX_FloatRect(); + } float left = 1000000.0f; float right = -1000000.0f; float bottom = 1000000.0f; float top = -1000000.0f; for (const auto& pObj : *this) { + if (!pObj->IsActive()) { + continue; + } const auto& rect = pObj->GetRect(); left = std::min(left, rect.left); right = std::max(right, rect.right); @@ -117,12 +121,16 @@ std::optional<std::pair<RetainPtr<CFX_DIBitmap>, CFX_Matrix>> CPDF_Form::GetBitmapAndMatrixFromSoleImageOfForm() const { - if (GetPageObjectCount() != 1) + // TODO(crbug.com/377660088): Determine if there is a case where only a single + // active object but other inactive objects is problematic for this method. + if (GetActivePageObjectCount() != 1) { return std::nullopt; + } CPDF_ImageObject* pImageObject = (*begin())->AsImage(); - if (!pImageObject) + if (!pImageObject) { return std::nullopt; + } return {{pImageObject->GetIndependentBitmap(), pImageObject->matrix()}}; }
diff --git a/core/fpdfapi/page/cpdf_pageobject.cpp b/core/fpdfapi/page/cpdf_pageobject.cpp index 16d1342..744aea6 100644 --- a/core/fpdfapi/page/cpdf_pageobject.cpp +++ b/core/fpdfapi/page/cpdf_pageobject.cpp
@@ -90,6 +90,13 @@ m_bDirty = true; } +void CPDF_PageObject::SetIsActive(bool value) { + if (m_bIsActive != value) { + m_bIsActive = value; + m_bDirty = true; + } +} + void CPDF_PageObject::TransformClipPath(const CFX_Matrix& matrix) { CPDF_ClipPath& clip_path = mutable_clip_path(); if (!clip_path.HasRef()) {
diff --git a/core/fpdfapi/page/cpdf_pageobject.h b/core/fpdfapi/page/cpdf_pageobject.h index 4821321..000dd29 100644 --- a/core/fpdfapi/page/cpdf_pageobject.h +++ b/core/fpdfapi/page/cpdf_pageobject.h
@@ -62,6 +62,8 @@ void SetDirty(bool value) { m_bDirty = value; } bool IsDirty() const { return m_bDirty; } + void SetIsActive(bool value); + bool IsActive() const { return m_bIsActive; } void TransformClipPath(const CFX_Matrix& matrix); void SetOriginalRect(const CFX_FloatRect& rect) { m_OriginalRect = rect; } @@ -141,7 +143,12 @@ CFX_FloatRect m_Rect; CFX_FloatRect m_OriginalRect; CPDF_ContentMarks m_ContentMarks; + // Modifying `m_bIsActive` automatically set `m_bDirty` to be true, but + // otherwise `m_bDirty` and `m_bIsActive` are independent. A + // `CPDF_PageObject` can remain dirty until page object processing completes + // and marks it no longer dirty. bool m_bDirty = false; + bool m_bIsActive = true; int32_t m_ContentStream; // The resource name for this object. ByteString m_ResourceName;
diff --git a/core/fpdfapi/page/cpdf_pageobjectholder.cpp b/core/fpdfapi/page/cpdf_pageobjectholder.cpp index 173a9e0..fd5b231 100644 --- a/core/fpdfapi/page/cpdf_pageobjectholder.cpp +++ b/core/fpdfapi/page/cpdf_pageobjectholder.cpp
@@ -156,6 +156,16 @@ m_Transparency.SetIsolated(); } +size_t CPDF_PageObjectHolder::GetActivePageObjectCount() const { + size_t count = 0; + for (const auto& page_object : m_PageObjectList) { + if (page_object->IsActive()) { + ++count; + } + } + return count; +} + CPDF_PageObject* CPDF_PageObjectHolder::GetPageObjectByIndex( size_t index) const { return fxcrt::IndexInBounds(m_PageObjectList, index)
diff --git a/core/fpdfapi/page/cpdf_pageobjectholder.h b/core/fpdfapi/page/cpdf_pageobjectholder.h index 5a9b697..44cbbd2 100644 --- a/core/fpdfapi/page/cpdf_pageobjectholder.h +++ b/core/fpdfapi/page/cpdf_pageobjectholder.h
@@ -87,6 +87,7 @@ return m_pPageResources; } size_t GetPageObjectCount() const { return m_PageObjectList.size(); } + size_t GetActivePageObjectCount() const; CPDF_PageObject* GetPageObjectByIndex(size_t index) const; void AppendPageObject(std::unique_ptr<CPDF_PageObject> pPageObj);
diff --git a/core/fpdfapi/render/cpdf_progressiverenderer.cpp b/core/fpdfapi/render/cpdf_progressiverenderer.cpp index 3dfd7a1..71676da 100644 --- a/core/fpdfapi/render/cpdf_progressiverenderer.cpp +++ b/core/fpdfapi/render/cpdf_progressiverenderer.cpp
@@ -76,7 +76,7 @@ bool is_mask = false; while (iter != iterEnd) { CPDF_PageObject* pCurObj = iter->get(); - if (pCurObj->GetRect().left <= m_ClipRect.right && + if (pCurObj->IsActive() && pCurObj->GetRect().left <= m_ClipRect.right && pCurObj->GetRect().right >= m_ClipRect.left && pCurObj->GetRect().bottom <= m_ClipRect.top && pCurObj->GetRect().top >= m_ClipRect.bottom) {
diff --git a/core/fpdfapi/render/cpdf_renderstatus.cpp b/core/fpdfapi/render/cpdf_renderstatus.cpp index de97aa8..de4b2c1 100644 --- a/core/fpdfapi/render/cpdf_renderstatus.cpp +++ b/core/fpdfapi/render/cpdf_renderstatus.cpp
@@ -215,8 +215,9 @@ m_bStopped = true; return; } - if (!pCurObj) + if (!pCurObj || !pCurObj->IsActive()) { continue; + } if (pCurObj->GetRect().left > clip_rect.right || pCurObj->GetRect().right < clip_rect.left ||
diff --git a/core/fpdfapi/render/fpdf_progressive_render_embeddertest.cpp b/core/fpdfapi/render/fpdf_progressive_render_embeddertest.cpp index 0d6b92c..c80ca6a 100644 --- a/core/fpdfapi/render/fpdf_progressive_render_embeddertest.cpp +++ b/core/fpdfapi/render/fpdf_progressive_render_embeddertest.cpp
@@ -10,6 +10,7 @@ #include "core/fxcrt/check.h" #include "core/fxge/cfx_defaultrenderdevice.h" #include "core/fxge/dib/fx_dib.h" +#include "public/cpp/fpdf_scopers.h" #include "public/fpdf_progressive.h" #include "testing/embedder_test.h" #include "testing/embedder_test_constants.h" @@ -379,6 +380,51 @@ rectangles_checksum); } +TEST_F(FPDFProgressiveRenderEmbedderTest, RenderPathObjectUsability) { + // Test rendering of paths with one of the page objects active vs. inactive. + const char* all_rectangles_used_checksum = []() { + if (CFX_DefaultRenderDevice::UseSkiaRenderer()) { + return "b4e411a6b5ffa59a50efede2efece597"; + } + return "0a90de37f52127619c3dfb642b5fa2fe"; + }(); + const char* one_rectangle_inactive_checksum = []() { + if (CFX_DefaultRenderDevice::UseSkiaRenderer()) { + return "cf5bb4e61609162c03f4c8a6d9791230"; + } + return "0481e8936b35ac9484b51a0966ab4ab6"; + }(); + + ASSERT_TRUE(OpenDocument("rectangles.pdf")); + ScopedEmbedderTestPage page = LoadScopedPage(0); + ASSERT_TRUE(page); + + // Check rendering result before modifications. + { + ScopedFPDFBitmap bitmap = RenderPage(page.get()); + CompareBitmap(bitmap.get(), 200, 300, all_rectangles_used_checksum); + } + + ASSERT_EQ(FPDFPage_CountObjects(page.get()), 8); + FPDF_PAGEOBJECT page_obj = FPDFPage_GetObject(page.get(), 4); + ASSERT_TRUE(page_obj); + + // Check rendering result after a page object is made inactive. + // Contents do not need to be regenerated to observe an effect. + ASSERT_TRUE(FPDFPageObj_SetIsActive(page_obj, /*active=*/false)); + { + ScopedFPDFBitmap bitmap = RenderPage(page.get()); + CompareBitmap(bitmap.get(), 200, 300, one_rectangle_inactive_checksum); + } + + // Check rendering result after the same page object is active again. + ASSERT_TRUE(FPDFPageObj_SetIsActive(page_obj, /*active=*/true)); + { + ScopedFPDFBitmap bitmap = RenderPage(page.get()); + CompareBitmap(bitmap.get(), 200, 300, all_rectangles_used_checksum); + } +} + TEST_F(FPDFProgressiveRenderEmbedderTest, RenderHighlightWithColorScheme) { // Test rendering of highlight with forced color scheme on. //
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 7edaceb..b3159c2 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp
@@ -605,8 +605,6 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() const { - DCHECK_NE(m_pPage->GetPageObjectCount(), 0u); - const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); if (nPageWidth <= 0 || nPageHeight <= 0) @@ -620,8 +618,9 @@ int32_t nStartV = nPageHeight; int32_t nEndV = 0; for (const auto& pPageObj : *m_pPage) { - if (!pPageObj->IsText()) + if (!pPageObj->IsActive() || !pPageObj->IsText()) { continue; + } int32_t minH = static_cast<int32_t>( std::clamp<float>(pPageObj->GetRect().left, 0.0f, nPageWidth)); @@ -683,12 +682,17 @@ } void CPDF_TextPage::ProcessObject() { - if (m_pPage->GetPageObjectCount() == 0) + if (m_pPage->GetActivePageObjectCount() == 0) { return; + } m_TextlineDir = FindTextlineFlowOrientation(); for (auto it = m_pPage->begin(); it != m_pPage->end(); ++it) { CPDF_PageObject* pObj = it->get(); + if (!pObj->IsActive()) { + continue; + } + if (pObj->IsText()) { ProcessTextObject(pObj->AsText(), CFX_Matrix(), m_pPage, it); } else if (pObj->IsForm()) { @@ -708,6 +712,10 @@ const CPDF_PageObjectHolder* pHolder = pFormObj->form(); for (auto it = pHolder->begin(); it != pHolder->end(); ++it) { CPDF_PageObject* pPageObj = it->get(); + if (!pPageObj->IsActive()) { + continue; + } + if (pPageObj->IsText()) { ProcessTextObject(pPageObj->AsText(), curFormMatrix, pHolder, it); } else if (pPageObj->IsForm()) {
diff --git a/fpdfsdk/fpdf_editpage.cpp b/fpdfsdk/fpdf_editpage.cpp index d41b12a..9dab7f1 100644 --- a/fpdfsdk/fpdf_editpage.cpp +++ b/fpdfsdk/fpdf_editpage.cpp
@@ -630,6 +630,17 @@ : FPDF_PAGEOBJ_UNKNOWN; } +FPDF_EXPORT FPDF_BOOL FPDFPageObj_SetIsActive(FPDF_PAGEOBJECT page_object, + FPDF_BOOL active) { + CPDF_PageObject* cpage_object = CPDFPageObjectFromFPDFPageObject(page_object); + if (!cpage_object) { + return false; + } + + cpage_object->SetIsActive(active); + return true; +} + FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFPage_GenerateContent(FPDF_PAGE page) { CPDF_Page* pPage = CPDFPageFromFPDFPage(page); if (!IsPageObject(pPage))
diff --git a/fpdfsdk/fpdf_editpage_embeddertest.cpp b/fpdfsdk/fpdf_editpage_embeddertest.cpp index a62cb71..21ee8aa 100644 --- a/fpdfsdk/fpdf_editpage_embeddertest.cpp +++ b/fpdfsdk/fpdf_editpage_embeddertest.cpp
@@ -495,3 +495,72 @@ CloseSavedPage(page); CloseSavedDocument(); } + +TEST_F(FPDFEditPageEmbedderTest, PageObjectIsActive) { + const char* one_rectangle_inactive_checksum = []() { + if (CFX_DefaultRenderDevice::UseSkiaRenderer()) { + return "cf5bb4e61609162c03f4c8a6d9791230"; + } + return "0481e8936b35ac9484b51a0966ab4ab6"; + }(); + + ASSERT_TRUE(OpenDocument("rectangles.pdf")); + ScopedEmbedderTestPage page = LoadScopedPage(0); + ASSERT_TRUE(page); + const int page_width = static_cast<int>(FPDF_GetPageWidth(page.get())); + const int page_height = static_cast<int>(FPDF_GetPageHeight(page.get())); + + // Note the original count of page objects for the rectangles. + EXPECT_EQ(8, FPDFPage_CountObjects(page.get())); + + { + // Render the page as is. + ScopedFPDFBitmap bitmap = RenderLoadedPage(page.get()); + CompareBitmap(bitmap.get(), page_width, page_height, + pdfium::RectanglesChecksum()); + } + + { + // Save a copy, open the copy, and render it. + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + ASSERT_TRUE(OpenSavedDocument()); + FPDF_PAGE saved_page = LoadSavedPage(0); + ASSERT_TRUE(saved_page); + + // Note that all page objects for the rectangles are present in the copy. + EXPECT_EQ(8, FPDFPage_CountObjects(saved_page)); + + ScopedFPDFBitmap bitmap = RenderSavedPage(saved_page); + CompareBitmap(bitmap.get(), page_width, page_height, + pdfium::RectanglesChecksum()); + + CloseSavedPage(saved_page); + CloseSavedDocument(); + } + + // Mark one of the page objects as inactive. It is still present in the page. + FPDF_PAGEOBJECT page_obj = FPDFPage_GetObject(page.get(), 4); + ASSERT_TRUE(page_obj); + ASSERT_TRUE(FPDFPageObj_SetIsActive(page_obj, /*active=*/false)); + EXPECT_TRUE(FPDFPage_GenerateContent(page.get())); + EXPECT_EQ(8, FPDFPage_CountObjects(page.get())); + + { + // Save a copy, open the copy, and render it. + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + ASSERT_TRUE(OpenSavedDocument()); + FPDF_PAGE saved_page = LoadSavedPage(0); + ASSERT_TRUE(saved_page); + + // Note that a rectangle is absent from the copy. + EXPECT_EQ(7, FPDFPage_CountObjects(saved_page)); + + // The absence of the inactive page object affects the rendered result. + ScopedFPDFBitmap bitmap = RenderSavedPage(saved_page); + CompareBitmap(bitmap.get(), page_width, page_height, + one_rectangle_inactive_checksum); + + CloseSavedPage(saved_page); + CloseSavedDocument(); + } +}
diff --git a/fpdfsdk/fpdf_flatten.cpp b/fpdfsdk/fpdf_flatten.cpp index 3547bf9..248c23a 100644 --- a/fpdfsdk/fpdf_flatten.cpp +++ b/fpdfsdk/fpdf_flatten.cpp
@@ -61,6 +61,10 @@ pPDFPage->ParseContent(); for (const auto& pPageObject : *pPDFPage) { + if (!pPageObject->IsActive()) { + continue; + } + const CFX_FloatRect& rc = pPageObject->GetRect(); if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox))) pRectArray->push_back(rc);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c index 27ad99a..1765faf 100644 --- a/fpdfsdk/fpdf_view_c_api_test.c +++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -229,6 +229,7 @@ CHK(FPDFPageObj_SetDashArray); CHK(FPDFPageObj_SetDashPhase); CHK(FPDFPageObj_SetFillColor); + CHK(FPDFPageObj_SetIsActive); CHK(FPDFPageObj_SetLineCap); CHK(FPDFPageObj_SetLineJoin); CHK(FPDFPageObj_SetMatrix);
diff --git a/fxjs/cjs_document.cpp b/fxjs/cjs_document.cpp index b5b5c1c..bb81a66 100644 --- a/fxjs/cjs_document.cpp +++ b/fxjs/cjs_document.cpp
@@ -1235,7 +1235,7 @@ int nWords = 0; WideString swRet; for (auto& pPageObj : *page) { - if (pPageObj->IsText()) { + if (pPageObj->IsActive() && pPageObj->IsText()) { CPDF_TextObject* pTextObj = pPageObj->AsText(); int nObjWords = pTextObj->CountWords(); if (nWords + nObjWords >= nWordNo) { @@ -1290,8 +1290,9 @@ int nWords = 0; for (auto& pPageObj : *page) { - if (pPageObj->IsText()) + if (pPageObj->IsActive() && pPageObj->IsText()) { nWords += pPageObj->AsText()->CountWords(); + } } return CJS_Result::Success(pRuntime->NewNumber(nWords)); }
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h index 6ab72cb..bcef484 100644 --- a/public/fpdf_edit.h +++ b/public/fpdf_edit.h
@@ -260,6 +260,22 @@ // error. FPDF_EXPORT int FPDF_CALLCONV FPDFPageObj_GetType(FPDF_PAGEOBJECT page_object); +// Experimental API. +// Sets if |page_object| is active within page. +// +// page_object - handle to a page object. +// active - a boolean specifying if the object is active. +// +// Returns TRUE on success. +// +// Page objects all start in the active state by default, and remain in that +// state unless this function is called. +// +// When |active| is false, this makes the |page_object| be treated as if it +// wasn't in the document even though it is still held internally. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +FPDFPageObj_SetIsActive(FPDF_PAGEOBJECT page_object, FPDF_BOOL active); + // Transform |page_object| by the given matrix. // // page_object - handle to a page object.