Refactor modification of Contents in CPDF_PageContentGenerator.

This is a pure refactor, there should be no change in behavior. It
prepares for the next CL, which will allow modification of content
streams.

Bug: pdfium:1051
Change-Id: I01ca3e897efe423e89df75e1f31cd67539cc3d08
Reviewed-on: https://pdfium-review.googlesource.com/34470
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index 008ac77..132f3c2 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -481,6 +481,8 @@
     "core/fpdfapi/edit/cpdf_flateencoder.h",
     "core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp",
     "core/fpdfapi/edit/cpdf_pagecontentgenerator.h",
+    "core/fpdfapi/edit/cpdf_pagecontentmanager.cpp",
+    "core/fpdfapi/edit/cpdf_pagecontentmanager.h",
     "core/fpdfapi/font/cfx_cttgsubtable.cpp",
     "core/fpdfapi/font/cfx_cttgsubtable.h",
     "core/fpdfapi/font/cfx_stockfontarray.cpp",
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index 3365b5f..e3a8674 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -9,6 +9,7 @@
 #include <tuple>
 #include <utility>
 
+#include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
 #include "core/fpdfapi/font/cpdf_font.h"
 #include "core/fpdfapi/page/cpdf_docpagedata.h"
 #include "core/fpdfapi/page/cpdf_image.h"
@@ -64,60 +65,67 @@
 void CPDF_PageContentGenerator::GenerateContent() {
   ASSERT(m_pObjHolder->IsPage());
 
-  CPDF_Document* pDoc = m_pDocument.Get();
-  std::ostringstream buf;
+  std::map<int32_t, std::unique_ptr<std::ostringstream>> stream =
+      GenerateModifiedStreams();
 
+  UpdateContentStreams(&stream);
+}
+
+std::map<int32_t, std::unique_ptr<std::ostringstream>>
+CPDF_PageContentGenerator::GenerateModifiedStreams() {
+  auto buf = pdfium::MakeUnique<std::ostringstream>();
+
+  std::map<int32_t, std::unique_ptr<std::ostringstream>> streams;
+  if (GenerateStreamWithNewObjects(buf.get()))
+    streams[CPDF_PageObject::kNoContentStream] = std::move(buf);
+
+  // TODO(pdfium:1051): Generate other streams and add to |streams|.
+
+  return streams;
+}
+
+bool CPDF_PageContentGenerator::GenerateStreamWithNewObjects(
+    std::ostringstream* buf) {
   // Set the default graphic state values
-  buf << "q\n";
+  *buf << "q\n";
   if (!m_pObjHolder->GetLastCTM().IsIdentity())
-    buf << m_pObjHolder->GetLastCTM().GetInverse() << " cm\n";
-  ProcessDefaultGraphics(&buf);
+    *buf << m_pObjHolder->GetLastCTM().GetInverse() << " cm\n";
+  ProcessDefaultGraphics(buf);
 
   // Process the page objects
-  if (!ProcessPageObjects(&buf))
-    return;
+  if (!ProcessPageObjects(buf))
+    return false;
 
   // Return graphics to original state
-  buf << "Q\n";
+  *buf << "Q\n";
 
-  // Add buffer to a stream in page's 'Contents'
-  CPDF_Dictionary* pPageDict = m_pObjHolder->GetDict();
-  if (!pPageDict)
+  return true;
+}
+
+void CPDF_PageContentGenerator::UpdateContentStreams(
+    std::map<int32_t, std::unique_ptr<std::ostringstream>>* new_stream_data) {
+  // If no streams were regenerated or removed, nothing to do here.
+  if (new_stream_data->empty())
     return;
 
-  CPDF_Object* pContent = pPageDict->GetObjectFor("Contents");
-  CPDF_Stream* pStream = pDoc->NewIndirect<CPDF_Stream>();
-  pStream->SetData(&buf);
-  if (pContent) {
-    CPDF_Array* pArray = ToArray(pContent);
-    if (pArray) {
-      pArray->Add(pStream->MakeReference(pDoc));
-      return;
+  CPDF_PageContentManager page_content_manager(m_pObjHolder.Get());
+
+  for (auto& pair : *new_stream_data) {
+    int32_t stream_index = pair.first;
+    std::ostringstream* buf = pair.second.get();
+
+    if (stream_index == CPDF_PageObject::kNoContentStream) {
+      int new_stream_index = page_content_manager.AddStream(buf);
+      UpdateStreamlessPageObjects(new_stream_index);
+      continue;
     }
-    CPDF_Reference* pReference = ToReference(pContent);
-    if (!pReference) {
-      pPageDict->SetFor("Contents", pStream->MakeReference(pDoc));
-      return;
-    }
-    CPDF_Object* pDirectObj = pReference->GetDirect();
-    if (!pDirectObj) {
-      pPageDict->SetFor("Contents", pStream->MakeReference(pDoc));
-      return;
-    }
-    CPDF_Array* pObjArray = pDirectObj->AsArray();
-    if (pObjArray) {
-      pObjArray->Add(pStream->MakeReference(pDoc));
-      return;
-    }
-    if (pDirectObj->IsStream()) {
-      CPDF_Array* pContentArray = pDoc->NewIndirect<CPDF_Array>();
-      pContentArray->Add(pDirectObj->MakeReference(pDoc));
-      pContentArray->Add(pStream->MakeReference(pDoc));
-      pPageDict->SetFor("Contents", pContentArray->MakeReference(pDoc));
-      return;
-    }
+
+    CPDF_Stream* old_stream =
+        page_content_manager.GetStreamByIndex(stream_index);
+    ASSERT(old_stream);
+
+    old_stream->SetData(buf);
   }
-  pPageDict->SetFor("Contents", pStream->MakeReference(pDoc));
 }
 
 ByteString CPDF_PageContentGenerator::RealizeResource(
@@ -165,6 +173,12 @@
   return bDirty;
 }
 
+void CPDF_PageContentGenerator::UpdateStreamlessPageObjects(
+    int new_content_stream_index) {
+  // TODO(pdfium:1051): Mark page objects that did not have a content stream
+  // with the new content stream index.
+}
+
 void CPDF_PageContentGenerator::ProcessImage(std::ostringstream* buf,
                                              CPDF_ImageObject* pImageObj) {
   if ((pImageObj->matrix().a == 0 && pImageObj->matrix().b == 0) ||
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
index a6cf215..677d292 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
@@ -7,6 +7,8 @@
 #ifndef CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTGENERATOR_H_
 #define CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTGENERATOR_H_
 
+#include <map>
+#include <memory>
 #include <sstream>
 #include <vector>
 
@@ -41,6 +43,23 @@
   ByteString RealizeResource(const CPDF_Object* pResource,
                              const ByteString& bsType);
 
+  // Returns a map from content stream index to new stream data. Unmodified
+  // streams are not touched.
+  std::map<int32_t, std::unique_ptr<std::ostringstream>>
+  GenerateModifiedStreams();
+
+  // Generate new stream data with all dirty page objects.
+  bool GenerateStreamWithNewObjects(std::ostringstream* buf);
+
+  // Add buffer as a stream in page's 'Contents'
+  void UpdateContentStreams(
+      std::map<int32_t, std::unique_ptr<std::ostringstream>>* buf);
+
+  // Set the stream index of all page objects with stream index ==
+  // |CPDF_PageObject::kNoContentStream|. These are new objects that had not
+  // been parsed from or written to any content stream yet.
+  void UpdateStreamlessPageObjects(int new_content_stream_index);
+
   UnownedPtr<CPDF_PageObjectHolder> const m_pObjHolder;
   UnownedPtr<CPDF_Document> const m_pDocument;
   std::vector<UnownedPtr<CPDF_PageObject>> m_pageObjects;
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
new file mode 100644
index 0000000..e9ade27
--- /dev/null
+++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
@@ -0,0 +1,87 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
+
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+
+CPDF_PageContentManager::CPDF_PageContentManager(
+    CPDF_PageObjectHolder* obj_holder)
+    : obj_holder_(obj_holder), doc_(obj_holder_->GetDocument()) {
+  CPDF_Dictionary* page_dict = obj_holder_->GetDict();
+  CPDF_Object* contents_obj = page_dict->GetObjectFor("Contents");
+  CPDF_Array* contents_array = ToArray(contents_obj);
+  if (contents_array) {
+    contents_array_ = contents_array;
+    return;
+  }
+
+  CPDF_Reference* contents_reference = ToReference(contents_obj);
+  if (contents_reference) {
+    CPDF_Object* indirect_obj = contents_reference->GetDirect();
+    if (!indirect_obj)
+      return;
+
+    contents_array = indirect_obj->AsArray();
+    if (contents_array)
+      contents_array_ = contents_array;
+    else if (indirect_obj->IsStream())
+      contents_stream_ = indirect_obj->AsStream();
+  }
+}
+
+CPDF_PageContentManager::~CPDF_PageContentManager() = default;
+
+CPDF_Stream* CPDF_PageContentManager::GetStreamByIndex(size_t stream_index) {
+  if (contents_stream_)
+    return stream_index == 0 ? contents_stream_.Get() : nullptr;
+
+  if (contents_array_) {
+    CPDF_Reference* stream_reference =
+        ToReference(contents_array_->GetObjectAt(stream_index));
+    if (!stream_reference)
+      return nullptr;
+
+    return stream_reference->GetDirect()->AsStream();
+  }
+
+  return nullptr;
+}
+
+size_t CPDF_PageContentManager::AddStream(std::ostringstream* buf) {
+  CPDF_Stream* new_stream = doc_->NewIndirect<CPDF_Stream>();
+  new_stream->SetData(buf);
+
+  // If there is one Content stream (not in an array), now there will be two, so
+  // create an array with the old and the new one. The new one's index is 1.
+  if (contents_stream_) {
+    CPDF_Array* new_contents_array = doc_->NewIndirect<CPDF_Array>();
+    new_contents_array->Add(contents_stream_->MakeReference(doc_.Get()));
+    new_contents_array->Add(new_stream->MakeReference(doc_.Get()));
+
+    CPDF_Dictionary* page_dict = obj_holder_->GetDict();
+    page_dict->SetFor("Contents",
+                      new_contents_array->MakeReference(doc_.Get()));
+    contents_array_ = new_contents_array;
+    contents_stream_ = nullptr;
+    return 1;
+  }
+
+  // If there is an array, just add the new stream to it, at the last position.
+  if (contents_array_) {
+    contents_array_->Add(new_stream->MakeReference(doc_.Get()));
+    return contents_array_->GetCount() - 1;
+  }
+
+  // There were no Contents, so add the new stream as the single Content stream.
+  // Its index is 0.
+  CPDF_Dictionary* page_dict = obj_holder_->GetDict();
+  page_dict->SetFor("Contents", new_stream->MakeReference(doc_.Get()));
+  contents_stream_ = new_stream;
+  return 0;
+}
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.h b/core/fpdfapi/edit/cpdf_pagecontentmanager.h
new file mode 100644
index 0000000..384405b
--- /dev/null
+++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.h
@@ -0,0 +1,38 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
+#define CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
+
+#include <sstream>
+
+#include "core/fxcrt/unowned_ptr.h"
+
+class CPDF_Array;
+class CPDF_Document;
+class CPDF_Object;
+class CPDF_Stream;
+class CPDF_PageObjectHolder;
+
+class CPDF_PageContentManager {
+ public:
+  explicit CPDF_PageContentManager(CPDF_PageObjectHolder* pObjHolder);
+  ~CPDF_PageContentManager();
+
+  // Gets the Content stream at a given index. If Contents is a single stream
+  // rather than an array, it is considered to be at index 0.
+  CPDF_Stream* GetStreamByIndex(size_t stream_index);
+
+  // Adds a new Content stream. Its index in the array will be returned, or 0
+  // if Contents is not an array, but only a single stream.
+  size_t AddStream(std::ostringstream* buf);
+
+ private:
+  UnownedPtr<CPDF_PageObjectHolder> const obj_holder_;
+  UnownedPtr<CPDF_Document> const doc_;
+  UnownedPtr<CPDF_Array> contents_array_;
+  UnownedPtr<CPDF_Stream> contents_stream_;
+};
+
+#endif  // CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_