Rewrite content stream regeneration.

Loop through the dirty page objects and streams and regenerate all
streams that are dirty.

Bug: pdfium:1051
Change-Id: I837b5a7cd9542b7777e7c7ae7ac9cc75f69f30b5
Reviewed-on: https://pdfium-review.googlesource.com/34330
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index 19994fa..ba8d03d 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -6,6 +6,9 @@
 
 #include "core/fpdfapi/edit/cpdf_pagecontentgenerator.h"
 
+#include <map>
+#include <memory>
+#include <set>
 #include <tuple>
 #include <utility>
 
@@ -73,35 +76,69 @@
 
 std::map<int32_t, std::unique_ptr<std::ostringstream>>
 CPDF_PageContentGenerator::GenerateModifiedStreams() {
-  auto buf = pdfium::MakeUnique<std::ostringstream>();
+  // Make sure default graphics are created.
+  (void)GetOrCreateDefaultGraphics();
 
+  // Figure out which streams are dirty.
+  std::set<int32_t> all_dirty_streams;
+  for (auto& pPageObj : m_pageObjects) {
+    if (pPageObj->IsDirty())
+      all_dirty_streams.insert(pPageObj->GetContentStream());
+  }
+  const std::set<int32_t>* marked_dirty_streams =
+      m_pObjHolder->GetDirtyStreams();
+  all_dirty_streams.insert(marked_dirty_streams->begin(),
+                           marked_dirty_streams->end());
+
+  // Start regenerating dirty streams.
   std::map<int32_t, std::unique_ptr<std::ostringstream>> streams;
-  if (GenerateStreamWithNewObjects(buf.get()))
-    streams[CPDF_PageObject::kNoContentStream] = std::move(buf);
+  std::map<int32_t, bool> stream_is_empty;
 
-  // TODO(pdfium:1051): Generate other streams and add to |streams|.
+  for (int32_t dirty_stream : all_dirty_streams) {
+    std::unique_ptr<std::ostringstream> buf =
+        pdfium::MakeUnique<std::ostringstream>();
+
+    // Set the default graphic state values
+    *buf << "q\n";
+    if (!m_pObjHolder->GetLastCTM().IsIdentity())
+      *buf << m_pObjHolder->GetLastCTM().GetInverse() << " cm\n";
+
+    ProcessDefaultGraphics(buf.get());
+
+    streams[dirty_stream] = std::move(buf);
+    stream_is_empty[dirty_stream] = true;
+  }
+
+  // Process the page objects, write into each dirty stream.
+  for (auto& pPageObj : m_pageObjects) {
+    int stream_index = pPageObj->GetContentStream();
+    auto it = streams.find(stream_index);
+    if (it == streams.end())
+      continue;
+
+    std::ostringstream* buf = it->second.get();
+    stream_is_empty[stream_index] = false;
+    ProcessPageObject(buf, pPageObj.Get());
+  }
+
+  // Finish dirty streams.
+  for (int32_t dirty_stream : all_dirty_streams) {
+    std::ostringstream* buf = streams[dirty_stream].get();
+    if (stream_is_empty[dirty_stream]) {
+      // Clear to show that this stream needs to be deleted.
+      buf->str("");
+    } else {
+      // Return graphics to original state
+      *buf << "Q\n";
+    }
+  }
+
+  // Clear dirty streams in m_pObjHolder
+  m_pObjHolder->ClearDirtyStreams();
 
   return streams;
 }
 
-bool CPDF_PageContentGenerator::GenerateStreamWithNewObjects(
-    std::ostringstream* buf) {
-  // Set the default graphic state values
-  *buf << "q\n";
-  if (!m_pObjHolder->GetLastCTM().IsIdentity())
-    *buf << m_pObjHolder->GetLastCTM().GetInverse() << " cm\n";
-  ProcessDefaultGraphics(buf);
-
-  // Process the page objects
-  if (!ProcessPageObjects(buf))
-    return false;
-
-  // Return graphics to original state
-  *buf << "Q\n";
-
-  return true;
-}
-
 void CPDF_PageContentGenerator::UpdateContentStreams(
     std::map<int32_t, std::unique_ptr<std::ostringstream>>* new_stream_data) {
   // If no streams were regenerated or removed, nothing to do here.
@@ -124,6 +161,9 @@
         page_content_manager.GetStreamByIndex(stream_index);
     ASSERT(old_stream);
 
+    // TODO(pdfium:1051): Remove streams that are now empty. If buf is empty,
+    // remove this instead of setting the data.
+
     old_stream->SetData(buf);
   }
 }
@@ -162,21 +202,28 @@
       continue;
 
     bDirty = true;
-    if (CPDF_ImageObject* pImageObject = pPageObj->AsImage())
-      ProcessImage(buf, pImageObject);
-    else if (CPDF_PathObject* pPathObj = pPageObj->AsPath())
-      ProcessPath(buf, pPathObj);
-    else if (CPDF_TextObject* pTextObj = pPageObj->AsText())
-      ProcessText(buf, pTextObj);
-    pPageObj->SetDirty(false);
+    ProcessPageObject(buf, pPageObj.Get());
   }
   return bDirty;
 }
 
 void CPDF_PageContentGenerator::UpdateStreamlessPageObjects(
     int new_content_stream_index) {
-  // TODO(pdfium:1051): Mark page objects that did not have a content stream
-  // with the new content stream index.
+  for (auto& pPageObj : m_pageObjects) {
+    if (pPageObj->GetContentStream() == CPDF_PageObject::kNoContentStream)
+      pPageObj->SetContentStream(new_content_stream_index);
+  }
+}
+
+void CPDF_PageContentGenerator::ProcessPageObject(std::ostringstream* buf,
+                                                  CPDF_PageObject* pPageObj) {
+  if (CPDF_ImageObject* pImageObject = pPageObj->AsImage())
+    ProcessImage(buf, pImageObject);
+  else if (CPDF_PathObject* pPathObj = pPageObj->AsPath())
+    ProcessPath(buf, pPathObj);
+  else if (CPDF_TextObject* pTextObj = pPageObj->AsText())
+    ProcessText(buf, pTextObj);
+  pPageObj->SetDirty(false);
 }
 
 void CPDF_PageContentGenerator::ProcessImage(std::ostringstream* buf,
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
index 04adf1c..13b8431 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
@@ -35,6 +35,7 @@
  private:
   friend class CPDF_PageContentGeneratorTest;
 
+  void ProcessPageObject(std::ostringstream* buf, CPDF_PageObject* pPageObj);
   void ProcessPath(std::ostringstream* buf, CPDF_PathObject* pPathObj);
   void ProcessImage(std::ostringstream* buf, CPDF_ImageObject* pImageObj);
   void ProcessGraphics(std::ostringstream* buf, CPDF_PageObject* pPageObj);
@@ -49,9 +50,6 @@
   std::map<int32_t, std::unique_ptr<std::ostringstream>>
   GenerateModifiedStreams();
 
-  // Generate new stream data with all dirty page objects.
-  bool GenerateStreamWithNewObjects(std::ostringstream* buf);
-
   // Add buffer as a stream in page's 'Contents'
   void UpdateContentStreams(
       std::map<int32_t, std::unique_ptr<std::ostringstream>>* buf);
diff --git a/core/fpdfapi/page/cpdf_pageobject.h b/core/fpdfapi/page/cpdf_pageobject.h
index 39e7629..3fc35aa 100644
--- a/core/fpdfapi/page/cpdf_pageobject.h
+++ b/core/fpdfapi/page/cpdf_pageobject.h
@@ -65,10 +65,14 @@
   // Get what content stream the object was parsed from in its page. This number
   // is the index of the content stream in the "Contents" array, or 0 if there
   // is a single content stream. If the object is newly created,
-  // kNoContentStream is returned.
+  // |kNoContentStream| is returned.
+  //
   // If the object is spread among more than one content stream, this is the
   // index of the last stream.
   int32_t GetContentStream() const { return m_ContentStream; }
+  void SetContentStream(int32_t new_content_stream) {
+    m_ContentStream = new_content_stream;
+  }
 
   float m_Left;
   float m_Right;
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index 0a119b9..07879c5 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -428,8 +428,7 @@
   VerifySavedDocument(612, 792, kLastMD5);
 }
 
-// Fails due to pdfium:1051.
-TEST_F(FPDFEditEmbeddertest, DISABLED_SetText) {
+TEST_F(FPDFEditEmbeddertest, SetText) {
   // Load document with some text.
   EXPECT_TRUE(OpenDocument("hello_world.pdf"));
   FPDF_PAGE page = LoadPage(0);
@@ -626,8 +625,7 @@
   UnloadPage(page);
 }
 
-// Fails due to pdfium:1051.
-TEST_F(FPDFEditEmbeddertest, DISABLED_RemoveExistingPageObject) {
+TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObject) {
   // Load document with some text.
   EXPECT_TRUE(OpenDocument("hello_world.pdf"));
   FPDF_PAGE page = LoadPage(0);
@@ -656,6 +654,100 @@
   CloseSavedDocument();
 }
 
+TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObjectSplitStreamsNotLonely) {
+  // Load document with some text.
+  EXPECT_TRUE(OpenDocument("hello_world_split_streams.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  // Get the "Hello, world!" text object and remove it. There is another object
+  // in the same stream that says "Goodbye, world!"
+  ASSERT_EQ(3, FPDFPage_CountObjects(page));
+  FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0);
+  ASSERT_TRUE(page_object);
+  EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+
+  // Verify the "Hello, world!" text is gone.
+  ASSERT_EQ(2, FPDFPage_CountObjects(page));
+#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+  const char kHelloRemovedMD5[] = "e07a62d412728fc4d6e3ff42f2dd0e11";
+#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
+  const char kHelloRemovedMD5[] = "de37b0bb7ff903c1068bae361844be50";
+#else
+  const char kHelloRemovedMD5[] = "95b92950647a2190e1230911e7a1a0e9";
+#endif
+  {
+    ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+    CompareBitmap(page_bitmap.get(), 200, 200, kHelloRemovedMD5);
+  }
+
+  // Save the file
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+  UnloadPage(page);
+  FPDFPageObj_Destroy(page_object);
+
+  // Re-open the file and check the page object count is still 2.
+  OpenSavedDocument();
+  FPDF_PAGE saved_page = LoadSavedPage(0);
+
+  EXPECT_EQ(2, FPDFPage_CountObjects(saved_page));
+  {
+    ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+    CompareBitmap(page_bitmap.get(), 200, 200, kHelloRemovedMD5);
+  }
+
+  CloseSavedPage(saved_page);
+  CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObjectSplitStreamsLonely) {
+  // Load document with some text.
+  EXPECT_TRUE(OpenDocument("hello_world_split_streams.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  // Get the "Greetings, world!" text object and remove it. This is the only
+  // object in the stream.
+  ASSERT_EQ(3, FPDFPage_CountObjects(page));
+  FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 2);
+  ASSERT_TRUE(page_object);
+  EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+
+  // Verify the "Greetings, world!" text is gone.
+  ASSERT_EQ(2, FPDFPage_CountObjects(page));
+#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+  const char kGreetingsRemovedMD5[] = "b90475ca64d1348c3bf5e2b77ad9187a";
+#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
+  const char kGreetingsRemovedMD5[] = "e5a6fa28298db07484cd922f3e210c88";
+#else
+  const char kGreetingsRemovedMD5[] = "2baa4c0e1758deba1b9c908e1fbd04ed";
+#endif
+  {
+    ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+    CompareBitmap(page_bitmap.get(), 200, 200, kGreetingsRemovedMD5);
+  }
+
+  // Save the file
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+  UnloadPage(page);
+  FPDFPageObj_Destroy(page_object);
+
+  // Re-open the file and check the page object count is still 2.
+  OpenSavedDocument();
+  FPDF_PAGE saved_page = LoadSavedPage(0);
+
+  EXPECT_EQ(2, FPDFPage_CountObjects(saved_page));
+  {
+    ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+    CompareBitmap(page_bitmap.get(), 200, 200, kGreetingsRemovedMD5);
+  }
+
+  CloseSavedPage(saved_page);
+  CloseSavedDocument();
+}
+
 // TODO(pdfium:1051): Extend this test to remove some elements and verify
 // saving works.
 TEST_F(FPDFEditEmbeddertest, GetContentStream) {
@@ -713,6 +805,39 @@
   CloseSavedDocument();
 }
 
+TEST_F(FPDFEditEmbeddertest, InsertPageObjectEditAndSave) {
+  // Load document with some text.
+  EXPECT_TRUE(OpenDocument("hello_world.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  // Add a red rectangle.
+  ASSERT_EQ(2, FPDFPage_CountObjects(page));
+  FPDF_PAGEOBJECT red_rect = FPDFPageObj_CreateNewRect(20, 100, 50, 50);
+  EXPECT_TRUE(FPDFPath_SetFillColor(red_rect, 255, 100, 100, 255));
+  EXPECT_TRUE(FPDFPath_SetDrawMode(red_rect, FPDF_FILLMODE_ALTERNATE, 0));
+  FPDFPage_InsertObject(page, red_rect);
+
+  // Verify the red rectangle was added.
+  ASSERT_EQ(3, FPDFPage_CountObjects(page));
+
+  // Generate content but change it again
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDFPath_SetFillColor(red_rect, 255, 0, 0, 255));
+
+  // Save the file
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+  UnloadPage(page);
+
+  // Re-open the file and check the page object count is still 3.
+  OpenSavedDocument();
+  FPDF_PAGE saved_page = LoadSavedPage(0);
+  EXPECT_EQ(3, FPDFPage_CountObjects(saved_page));
+  CloseSavedPage(saved_page);
+  CloseSavedDocument();
+}
+
 TEST_F(FPDFEditEmbeddertest, AddAndRemovePaths) {
   // Start with a blank page.
   FPDF_PAGE page = FPDFPage_New(CreateNewDocument(), 0, 612, 792);
diff --git a/testing/resources/hello_world_split_streams.in b/testing/resources/hello_world_split_streams.in
new file mode 100644
index 0000000..c7a2935
--- /dev/null
+++ b/testing/resources/hello_world_split_streams.in
@@ -0,0 +1,63 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents [6 0 R 7 0 R]
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 5 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+{{object 6 0}} <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+{{object 7 0}} <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Greetings, world!) Tj
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/hello_world_split_streams.pdf b/testing/resources/hello_world_split_streams.pdf
new file mode 100644
index 0000000..969fccf
--- /dev/null
+++ b/testing/resources/hello_world_split_streams.pdf
@@ -0,0 +1,77 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents [6 0 R 7 0 R]
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+5 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+6 0 obj <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+7 0 obj <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Greetings, world!) Tj
+endstream
+endobj
+xref
+0 8
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000161 00000 n 
+0000000311 00000 n 
+0000000389 00000 n 
+0000000465 00000 n 
+0000000586 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 8
+>>
+startxref
+669
+%%EOF