Write marked content operators when generating a stream.

The marked content operators are BMC, BDC and EMC. In the case of
BDC, it is preceded by a direct dict or a property name.

Bug: pdfium:1118
Change-Id: I3ee736ff7be3e7d7dde55ef581af3444a325e887
Reviewed-on: https://pdfium-review.googlesource.com/37470
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index 222784f..e571772 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -474,6 +474,8 @@
     "core/fpdfapi/edit/cpdf_pagecontentgenerator.h",
     "core/fpdfapi/edit/cpdf_pagecontentmanager.cpp",
     "core/fpdfapi/edit/cpdf_pagecontentmanager.h",
+    "core/fpdfapi/edit/cpdf_stringarchivestream.cpp",
+    "core/fpdfapi/edit/cpdf_stringarchivestream.h",
     "core/fpdfapi/font/cfx_cttgsubtable.cpp",
     "core/fpdfapi/font/cfx_cttgsubtable.h",
     "core/fpdfapi/font/cfx_stockfontarray.cpp",
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index 88f14b2..f6a941d 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -13,7 +13,9 @@
 #include <utility>
 
 #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
+#include "core/fpdfapi/edit/cpdf_stringarchivestream.h"
 #include "core/fpdfapi/font/cpdf_font.h"
+#include "core/fpdfapi/page/cpdf_contentmark.h"
 #include "core/fpdfapi/page/cpdf_docpagedata.h"
 #include "core/fpdfapi/page/cpdf_image.h"
 #include "core/fpdfapi/page/cpdf_imageobject.h"
@@ -94,6 +96,9 @@
   // Start regenerating dirty streams.
   std::map<int32_t, std::unique_ptr<std::ostringstream>> streams;
   std::set<int32_t> empty_streams;
+  std::unique_ptr<const CPDF_ContentMark> empty_content_mark =
+      pdfium::MakeUnique<CPDF_ContentMark>();
+  std::map<int32_t, const CPDF_ContentMark*> current_content_mark;
 
   for (int32_t dirty_stream : all_dirty_streams) {
     std::unique_ptr<std::ostringstream> buf =
@@ -108,6 +113,7 @@
 
     streams[dirty_stream] = std::move(buf);
     empty_streams.insert(dirty_stream);
+    current_content_mark[dirty_stream] = empty_content_mark.get();
   }
 
   // Process the page objects, write into each dirty stream.
@@ -119,6 +125,8 @@
 
     std::ostringstream* buf = it->second.get();
     empty_streams.erase(stream_index);
+    current_content_mark[stream_index] = ProcessContentMarks(
+        buf, pPageObj.Get(), current_content_mark[stream_index]);
     ProcessPageObject(buf, pPageObj.Get());
   }
 
@@ -129,6 +137,8 @@
       // Clear to show that this stream needs to be deleted.
       buf->str("");
     } else {
+      FinishMarks(buf, current_content_mark[dirty_stream]);
+
       // Return graphics to original state
       *buf << "Q\n";
     }
@@ -201,13 +211,19 @@
 
 bool CPDF_PageContentGenerator::ProcessPageObjects(std::ostringstream* buf) {
   bool bDirty = false;
+  std::unique_ptr<const CPDF_ContentMark> empty_content_mark =
+      pdfium::MakeUnique<CPDF_ContentMark>();
+  const CPDF_ContentMark* content_mark = empty_content_mark.get();
+
   for (auto& pPageObj : m_pageObjects) {
     if (m_pObjHolder->IsPage() && !pPageObj->IsDirty())
       continue;
 
     bDirty = true;
+    content_mark = ProcessContentMarks(buf, pPageObj.Get(), content_mark);
     ProcessPageObject(buf, pPageObj.Get());
   }
+  FinishMarks(buf, content_mark);
   return bDirty;
 }
 
@@ -219,6 +235,61 @@
   }
 }
 
+const CPDF_ContentMark* CPDF_PageContentGenerator::ProcessContentMarks(
+    std::ostringstream* buf,
+    const CPDF_PageObject* pPageObj,
+    const CPDF_ContentMark* pPrev) {
+  const CPDF_ContentMark* pNext = &pPageObj->m_ContentMark;
+
+  size_t first_different = pPrev->FindFirstDifference(pNext);
+
+  // Close all marks that are in prev but not in next.
+  // Technically we should iterate backwards to close from the top to the
+  // bottom, but since the EMC operators do not identify which mark they are
+  // closing, it does not matter.
+  for (size_t i = first_different; i < pPrev->CountItems(); ++i)
+    *buf << "EMC\n";
+
+  // Open all marks that are in next but not in prev.
+  for (size_t i = first_different; i < pNext->CountItems(); ++i) {
+    const CPDF_ContentMarkItem* item = pNext->GetItem(i);
+
+    // Write mark tag.
+    *buf << "/" << item->GetName() << " ";
+
+    // If there are no parameters, write a BMC (begin marked content) operator.
+    if (item->GetParamType() == CPDF_ContentMarkItem::None) {
+      *buf << "BMC\n";
+      continue;
+    }
+
+    // If there are parameters, write properties, direct or indirect.
+    if (item->GetParamType() == CPDF_ContentMarkItem::DirectDict) {
+      CPDF_StringArchiveStream archive_stream(buf);
+      item->GetParam()->WriteTo(&archive_stream);
+      *buf << " ";
+    } else {
+      ASSERT(item->GetParamType() == CPDF_ContentMarkItem::PropertiesDict);
+      *buf << "/" << item->GetPropertyName() << " ";
+    }
+
+    // Write BDC (begin dictionary content) operator.
+    *buf << "BDC\n";
+  }
+
+  return pNext;
+}
+
+void CPDF_PageContentGenerator::FinishMarks(
+    std::ostringstream* buf,
+    const CPDF_ContentMark* pContentMark) {
+  // Technically we should iterate backwards to close from the top to the
+  // bottom, but since the EMC operators do not identify which mark they are
+  // closing, it does not matter.
+  for (size_t i = 0; i < pContentMark->CountItems(); ++i)
+    *buf << "EMC\n";
+}
+
 void CPDF_PageContentGenerator::ProcessPageObject(std::ostringstream* buf,
                                                   CPDF_PageObject* pPageObj) {
   if (CPDF_ImageObject* pImageObject = pPageObj->AsImage())
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
index 13b8431..029a779 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
@@ -16,6 +16,7 @@
 #include "core/fxcrt/fx_system.h"
 #include "core/fxcrt/unowned_ptr.h"
 
+class CPDF_ContentMark;
 class CPDF_Document;
 class CPDF_ImageObject;
 class CPDF_Object;
@@ -44,6 +45,11 @@
   ByteString GetOrCreateDefaultGraphics() const;
   ByteString RealizeResource(const CPDF_Object* pResource,
                              const ByteString& bsType) const;
+  const CPDF_ContentMark* ProcessContentMarks(std::ostringstream* buf,
+                                              const CPDF_PageObject* pPageObj,
+                                              const CPDF_ContentMark* pPrev);
+  void FinishMarks(std::ostringstream* buf,
+                   const CPDF_ContentMark* pContentMark);
 
   // Returns a map from content stream index to new stream data. Unmodified
   // streams are not touched.
diff --git a/core/fpdfapi/edit/cpdf_stringarchivestream.cpp b/core/fpdfapi/edit/cpdf_stringarchivestream.cpp
new file mode 100644
index 0000000..328d6a2
--- /dev/null
+++ b/core/fpdfapi/edit/cpdf_stringarchivestream.cpp
@@ -0,0 +1,35 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/edit/cpdf_stringarchivestream.h"
+
+CPDF_StringArchiveStream::CPDF_StringArchiveStream(std::ostringstream* stream)
+    : stream_(stream) {}
+
+CPDF_StringArchiveStream::~CPDF_StringArchiveStream() {}
+
+bool CPDF_StringArchiveStream::WriteByte(uint8_t byte) {
+  NOTREACHED();
+  return false;
+}
+
+bool CPDF_StringArchiveStream::WriteDWord(uint32_t i) {
+  NOTREACHED();
+  return false;
+}
+
+FX_FILESIZE CPDF_StringArchiveStream::CurrentOffset() const {
+  NOTREACHED();
+  return false;
+}
+
+bool CPDF_StringArchiveStream::WriteBlock(const void* pData, size_t size) {
+  stream_->write(static_cast<const char*>(pData), size);
+  return true;
+}
+
+bool CPDF_StringArchiveStream::WriteString(const ByteStringView& str) {
+  stream_->write(str.unterminated_c_str(), str.GetLength());
+  return true;
+}
diff --git a/core/fpdfapi/edit/cpdf_stringarchivestream.h b/core/fpdfapi/edit/cpdf_stringarchivestream.h
new file mode 100644
index 0000000..bb5481e
--- /dev/null
+++ b/core/fpdfapi/edit/cpdf_stringarchivestream.h
@@ -0,0 +1,26 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_
+#define CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_
+
+#include "core/fxcrt/fx_stream.h"
+
+class CPDF_StringArchiveStream : public IFX_ArchiveStream {
+ public:
+  explicit CPDF_StringArchiveStream(std::ostringstream* stream);
+  ~CPDF_StringArchiveStream() override;
+
+  // IFX_ArchiveStream
+  bool WriteByte(uint8_t byte) override;
+  bool WriteDWord(uint32_t i) override;
+  FX_FILESIZE CurrentOffset() const override;
+  bool WriteBlock(const void* pData, size_t size) override;
+  bool WriteString(const ByteStringView& str) override;
+
+ private:
+  std::ostringstream* stream_;
+};
+
+#endif  // CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_
diff --git a/core/fpdfapi/page/cpdf_contentmark.cpp b/core/fpdfapi/page/cpdf_contentmark.cpp
index 29d1bba..1ff567d 100644
--- a/core/fpdfapi/page/cpdf_contentmark.cpp
+++ b/core/fpdfapi/page/cpdf_contentmark.cpp
@@ -6,6 +6,7 @@
 
 #include "core/fpdfapi/page/cpdf_contentmark.h"
 
+#include <algorithm>
 #include <utility>
 
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
@@ -79,6 +80,20 @@
     m_pMarkData.Reset();
 }
 
+size_t CPDF_ContentMark::FindFirstDifference(
+    const CPDF_ContentMark* other) const {
+  if (m_pMarkData == other->m_pMarkData)
+    return CountItems();
+
+  size_t min_len = std::min(CountItems(), other->CountItems());
+
+  for (size_t i = 0; i < min_len; ++i) {
+    if (GetItem(i) != other->GetItem(i))
+      return i;
+  }
+  return min_len;
+}
+
 CPDF_ContentMark::MarkData::MarkData() {}
 
 CPDF_ContentMark::MarkData::MarkData(const MarkData& src)
diff --git a/core/fpdfapi/page/cpdf_contentmark.h b/core/fpdfapi/page/cpdf_contentmark.h
index 3318033..8bbae52 100644
--- a/core/fpdfapi/page/cpdf_contentmark.h
+++ b/core/fpdfapi/page/cpdf_contentmark.h
@@ -35,6 +35,7 @@
                                  CPDF_Dictionary* pDict,
                                  const ByteString& property_name);
   void DeleteLastMark();
+  size_t FindFirstDifference(const CPDF_ContentMark* other) const;
 
  private:
   class MarkData : public Retainable {
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index e169c46..52cc9fe 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -694,6 +694,68 @@
   UnloadPage(page);
 }
 
+TEST_F(FPDFEditEmbeddertest, MaintainMarkedObjects) {
+  // Load document with some text.
+  EXPECT_TRUE(OpenDocument("text_in_page_marked.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  // Iterate over all objects, counting the number of times each content mark
+  // name appears.
+  CheckMarkCounts(page, 1, 19, 8, 4, 9, 1);
+
+  // Remove first page object.
+  FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0);
+  EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+  FPDFPageObj_Destroy(page_object);
+
+  CheckMarkCounts(page, 2, 18, 8, 3, 9, 1);
+
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+
+  UnloadPage(page);
+
+  OpenSavedDocument();
+  FPDF_PAGE saved_page = LoadSavedPage(0);
+
+  CheckMarkCounts(saved_page, 2, 18, 8, 3, 9, 1);
+
+  CloseSavedPage(saved_page);
+  CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, MaintainIndirectMarkedObjects) {
+  // Load document with some text.
+  EXPECT_TRUE(OpenDocument("text_in_page_marked_indirect.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  // Iterate over all objects, counting the number of times each content mark
+  // name appears.
+  CheckMarkCounts(page, 1, 19, 8, 4, 9, 1);
+
+  // Remove first page object.
+  FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0);
+  EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+  FPDFPageObj_Destroy(page_object);
+
+  CheckMarkCounts(page, 2, 18, 8, 3, 9, 1);
+
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+
+  UnloadPage(page);
+
+  OpenSavedDocument();
+  FPDF_PAGE saved_page = LoadSavedPage(0);
+
+  CheckMarkCounts(saved_page, 2, 18, 8, 3, 9, 1);
+
+  CloseSavedPage(saved_page);
+  CloseSavedDocument();
+}
+
 TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObject) {
   // Load document with some text.
   EXPECT_TRUE(OpenDocument("hello_world.pdf"));
@@ -2160,10 +2222,29 @@
     CompareBitmap(page_bitmap.get(), 612, 792, md5);
   }
 
+  // Now save the result.
+  EXPECT_EQ(1, FPDFPage_CountObjects(page));
+  EXPECT_TRUE(FPDFPage_GenerateContent(page));
+  EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+
   FPDF_ClosePage(page);
 
-  // TODO(pdfium:1118): Save, then re-open the file and check the changes were
-  // kept in the saved .pdf.
+  // Re-open the file and check the changes were kept in the saved .pdf.
+  OpenSavedDocument();
+  FPDF_PAGE saved_page = LoadSavedPage(0);
+  EXPECT_EQ(1, FPDFPage_CountObjects(saved_page));
+
+  text_object = FPDFPage_GetObject(saved_page, 0);
+  EXPECT_TRUE(text_object);
+  EXPECT_EQ(1, FPDFPageObj_CountMarks(text_object));
+  mark = FPDFPageObj_GetMark(text_object, 0);
+  EXPECT_TRUE(mark);
+  EXPECT_GT(FPDFPageObjMark_GetName(mark, buffer, 256), 0u);
+  name = GetPlatformWString(reinterpret_cast<unsigned short*>(buffer));
+  EXPECT_EQ(L"TestMarkName", name);
+
+  CloseSavedPage(saved_page);
+  CloseSavedDocument();
 }
 
 TEST_F(FPDFEditEmbeddertest, ExtractImageBitmap) {