Do not write out /Page objects that are not in the page tree

FPDFPage_Delete() may not work as expected when used in conjunction with
FPDF_SaveAsCopy(). The CPDF_Creator code that saves the PDF does not
understand the structure of the document's page tree. Therefore, it
only looks at references to determine whether to save referenced
objects, or to discard orphaned objects. This becomes a problem when
bookmarks or other objects has references to page objects, as the page
objects are not considered orphaned.

Fix this problem by augmenting the page deletion code in CPDF_Document.
After a page has been deleted, check to see if there are any references
to the page in the page tree. If it is no longer in the page tree,
replace it with an object of type null.

Since it is likely impossible to traverse the entire PDF's object tree
and correctly remove all the references to a page, use the null object
type to null out the page is the best way to effectively remove it,
while preventing references to it from dangling. Objects that become
orphaned as a result of this modification, such as the page content
stream, will be pruned by CPDF_Creator.

Bug: pdfium:2146
Change-Id: I474ff921e2239f27652f5308b56d1c1b74b51d77
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/117910
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 2ce880b..3012690 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -15,6 +15,7 @@
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
 #include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_null.h"
 #include "core/fpdfapi/parser/cpdf_number.h"
 #include "core/fpdfapi/parser/cpdf_parser.h"
 #include "core/fpdfapi/parser/cpdf_read_validator.h"
@@ -587,20 +588,54 @@
   return m_pParser ? m_pParser->GetIDArray() : nullptr;
 }
 
-void CPDF_Document::DeletePage(int iPage) {
+uint32_t CPDF_Document::DeletePage(int iPage) {
   RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
-  if (!pPages)
-    return;
+  if (!pPages) {
+    return 0;
+  }
 
   int nPages = pPages->GetIntegerFor("Count");
-  if (iPage < 0 || iPage >= nPages)
-    return;
+  if (iPage < 0 || iPage >= nPages) {
+    return 0;
+  }
+
+  RetainPtr<const CPDF_Dictionary> page_dict = GetPageDictionary(iPage);
+  if (!page_dict) {
+    return 0;
+  }
 
   std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
-  if (!InsertDeletePDFPage(std::move(pPages), iPage, nullptr, false, &stack))
-    return;
+  if (!InsertDeletePDFPage(std::move(pPages), iPage, nullptr, false, &stack)) {
+    return 0;
+  }
 
   m_PageList.erase(m_PageList.begin() + iPage);
+  return page_dict->GetObjNum();
+}
+
+void CPDF_Document::SetPageToNullObject(uint32_t page_obj_num) {
+  if (!page_obj_num || m_PageList.empty()) {
+    return;
+  }
+
+  // Load all pages so `m_PageList` has all the object numbers.
+  for (size_t i = 0; i < m_PageList.size(); ++i) {
+    GetPageDictionary(i);
+  }
+
+  if (pdfium::Contains(m_PageList, page_obj_num)) {
+    return;
+  }
+
+  // If `page_dict` is no longer in the page tree, replace it with an object of
+  // type null.
+  //
+  // Delete the object first from this container, so the conditional in the
+  // replacement call always evaluates to true.
+  DeleteIndirectObject(page_obj_num);
+  const bool replaced = ReplaceIndirectObjectIfHigherGeneration(
+      page_obj_num, pdfium::MakeRetain<CPDF_Null>());
+  CHECK(replaced);
 }
 
 void CPDF_Document::SetRootForTesting(RetainPtr<CPDF_Dictionary> root) {
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index 4e00e51..ac162a6 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -33,7 +33,7 @@
    public:
     virtual ~Extension() = default;
     virtual int GetPageCount() const = 0;
-    virtual void DeletePage(int page_index) = 0;
+    virtual uint32_t DeletePage(int page_index) = 0;
     virtual bool ContainsExtensionForm() const = 0;
     virtual bool ContainsExtensionFullForm() const = 0;
     virtual bool ContainsExtensionForegroundForm() const = 0;
@@ -99,7 +99,12 @@
   RetainPtr<CPDF_Dictionary> GetInfo();
   RetainPtr<const CPDF_Array> GetFileIdentifier() const;
 
-  void DeletePage(int iPage);
+  // Returns the object number for the deleted page, or 0 on failure.
+  uint32_t DeletePage(int iPage);
+  // `page_obj_num` is the return value from DeletePage(). If it is non-zero,
+  // and it is no longer used in the page tree, then replace the page object
+  // with a null object.
+  void SetPageToNullObject(uint32_t page_obj_num);
   bool MovePages(pdfium::span<const int> page_indices, int dest_page_index);
 
   int GetPageCount() const;
diff --git a/fpdfsdk/fpdf_doc_embeddertest.cpp b/fpdfsdk/fpdf_doc_embeddertest.cpp
index 3cde698..8c44346 100644
--- a/fpdfsdk/fpdf_doc_embeddertest.cpp
+++ b/fpdfsdk/fpdf_doc_embeddertest.cpp
@@ -747,9 +747,8 @@
   EXPECT_EQ(1, FPDF_GetPageCount(document()));
 
   ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
-  // TODO(crbug.com/pdfium/2146): These should be 1.
-  EXPECT_EQ(2, CountPageEntries(GetString()));
-  EXPECT_EQ(2, CountStreamEntries(GetString()));
+  EXPECT_EQ(1, CountPageEntries(GetString()));
+  EXPECT_EQ(1, CountStreamEntries(GetString()));
 }
 
 TEST_F(FPDFDocEmbedderTest, DeletePageAndSaveWithCustomObject) {
@@ -761,8 +760,7 @@
   EXPECT_EQ(1, FPDF_GetPageCount(document()));
 
   ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
-  // TODO(crbug.com/pdfium/2146): This should be 1.
-  EXPECT_EQ(2, CountPageEntries(GetString()));
+  EXPECT_EQ(1, CountPageEntries(GetString()));
   EXPECT_EQ(1, CountStreamEntries(GetString()));
 }
 
@@ -793,8 +791,7 @@
   EXPECT_EQ(1, FPDF_GetPageCount(document()));
 
   ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
-  // TODO(crbug.com/pdfium/2146): This should be 1.
-  EXPECT_EQ(2, CountPageEntries(GetString()));
+  EXPECT_EQ(1, CountPageEntries(GetString()));
   EXPECT_EQ(1, CountStreamEntries(GetString()));
 }
 
diff --git a/fpdfsdk/fpdf_editpage.cpp b/fpdfsdk/fpdf_editpage.cpp
index e5c380a..ffc6a11 100644
--- a/fpdfsdk/fpdf_editpage.cpp
+++ b/fpdfsdk/fpdf_editpage.cpp
@@ -187,12 +187,9 @@
     return;
 
   CPDF_Document::Extension* pExtension = pDoc->GetExtension();
-  if (pExtension) {
-    pExtension->DeletePage(page_index);
-    return;
-  }
-
-  pDoc->DeletePage(page_index);
+  const uint32_t page_obj_num = pExtension ? pExtension->DeletePage(page_index)
+                                           : pDoc->DeletePage(page_index);
+  pDoc->SetPageToNullObject(page_obj_num);
 }
 
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
diff --git a/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp b/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp
index ad94b24..7f4753d 100644
--- a/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp
+++ b/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp
@@ -279,11 +279,11 @@
   return nullptr;
 }
 
-void CPDFXFA_Context::DeletePage(int page_index) {
+uint32_t CPDFXFA_Context::DeletePage(int page_index) {
   // Delete from the document first because, if GetPage was never called for
   // this |page_index| then |m_XFAPageList| may have size < |page_index| even
   // if it's a valid page in the document.
-  m_pPDFDoc->DeletePage(page_index);
+  uint32_t page_obj_num = m_pPDFDoc->DeletePage(page_index);
 
   if (fxcrt::IndexInBounds(m_XFAPageList, page_index)) {
     m_XFAPageList.erase(m_XFAPageList.begin() + page_index);
@@ -293,6 +293,8 @@
       }
     }
   }
+
+  return page_obj_num;
 }
 
 bool CPDFXFA_Context::ContainsExtensionForm() const {
diff --git a/fpdfsdk/fpdfxfa/cpdfxfa_context.h b/fpdfsdk/fpdfxfa/cpdfxfa_context.h
index a8b563e..dd600c3 100644
--- a/fpdfsdk/fpdfxfa/cpdfxfa_context.h
+++ b/fpdfsdk/fpdfxfa/cpdfxfa_context.h
@@ -69,7 +69,7 @@
 
   // CPDF_Document::Extension:
   int GetPageCount() const override;
-  void DeletePage(int page_index) override;
+  uint32_t DeletePage(int page_index) override;
   bool ContainsExtensionForm() const override;
   bool ContainsExtensionFullForm() const override;
   bool ContainsExtensionForegroundForm() const override;