Properly account for CTM changes

In the existing code, CPDF_PageObjectHolder only stored the final value
of the current transformation matrix. This is insufficient information
for PDF editing and saving, where a page can have multiple content
streams and only some of them have been modified and get regenerated.
Even then, CPDF_PageContentGenerator mostly works, but the way it writes
out the page contents is based on the limitation of only having this
single CTM value.

To properly account for CTM changes from stream to stream, update
CPDF_StreamContentParser to store the CTM at the end of streams, when
handling "cm" and "Q" operators. CPDF_PageObjectHolder can then take
this data as `m_AllCTMs`, and stop storing `m_LastCTM`. With this
additional information, CPDF_PageContentGenerator can compensate for the
CTM when handling the modified streams. It can do so at the stream
level, and individual objects no longer need to take the CTM into
account, because each stream will start with the CTM set to the identity
matrix. At the end of each stream, the CTM will be restored to the value
expected by the next stream.

With the above changes, `CPDF_PageObject::m_bUsesCTM` is no longer
necessary, as objects no longer use the CTM. Remove it and associated
code. The FPDFEditPathEmbedderTest.AddPathToRectanglesWithLeakyCTM test
case also works as expected, so update the test expectation and remove
the TODO comment.

Bug: pdfium:2142
Change-Id: I95fab8dd5a06da19743f24f9e89007b4bb3c34c9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/117356
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index dc09c61..807d7e2 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -41,6 +41,7 @@
 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
 #include "core/fpdfapi/parser/object_tree_traversal_util.h"
 #include "core/fxcrt/check.h"
+#include "core/fxcrt/check_op.h"
 #include "core/fxcrt/containers/contains.h"
 #include "core/fxcrt/notreached.h"
 #include "core/fxcrt/numerics/safe_conversions.h"
@@ -179,11 +180,13 @@
   for (int32_t dirty_stream : all_dirty_streams) {
     fxcrt::ostringstream buf;
 
-    // Set the default graphic state values
+    // Set the default graphic state values. Update CTM to be the identity
+    // matrix for the duration of this stream, if it is not already.
     buf << "q\n";
-    const CFX_Matrix& ctm = m_pObjHolder->GetLastCTM();
+    const CFX_Matrix ctm =
+        m_pObjHolder->GetCTMAtBeginningOfStream(dirty_stream);
     if (!ctm.IsIdentity()) {
-      WriteMatrix(buf, ctm) << " cm\n";
+      WriteMatrix(buf, ctm.GetInverse()) << " cm\n";
     }
 
     ProcessDefaultGraphics(&buf);
@@ -208,15 +211,46 @@
 
   // Finish dirty streams.
   for (int32_t dirty_stream : all_dirty_streams) {
+    CFX_Matrix prev_ctm;
+    CFX_Matrix ctm;
+    bool affects_ctm;
+    if (dirty_stream == 0) {
+      // For the first stream, `prev_ctm` is the identity matrix.
+      ctm = m_pObjHolder->GetCTMAtEndOfStream(dirty_stream);
+      affects_ctm = !ctm.IsIdentity();
+    } else if (dirty_stream > 0) {
+      prev_ctm = m_pObjHolder->GetCTMAtEndOfStream(dirty_stream - 1);
+      ctm = m_pObjHolder->GetCTMAtEndOfStream(dirty_stream);
+      affects_ctm = prev_ctm != ctm;
+    } else {
+      CHECK_EQ(CPDF_PageObject::kNoContentStream, dirty_stream);
+      // This is the last stream, so there is no subsequent stream that it can
+      // affect.
+      affects_ctm = false;
+    }
+
+    const bool is_empty = pdfium::Contains(empty_streams, dirty_stream);
+
     fxcrt::ostringstream* buf = &streams[dirty_stream];
-    if (pdfium::Contains(empty_streams, dirty_stream)) {
+    if (is_empty && !affects_ctm) {
       // Clear to show that this stream needs to be deleted.
       buf->str("");
-    } else {
-      FinishMarks(buf, current_content_marks[dirty_stream]);
+      continue;
+    }
 
-      // Return graphics to original state
-      *buf << "Q\n";
+    if (!is_empty) {
+      FinishMarks(buf, current_content_marks[dirty_stream]);
+    }
+
+    // Return graphics to original state.
+    *buf << "Q\n";
+
+    if (affects_ctm) {
+      // Update CTM so the next stream gets the expected value.
+      CFX_Matrix ctm_difference = prev_ctm.GetInverse() * ctm;
+      if (!ctm_difference.IsIdentity()) {
+        WriteMatrix(*buf, ctm_difference) << " cm\n";
+      }
     }
   }
 
@@ -406,7 +440,7 @@
 
 void CPDF_PageContentGenerator::ProcessImage(fxcrt::ostringstream* buf,
                                              CPDF_ImageObject* pImageObj) {
-  CFX_Matrix matrix = pImageObj->matrix();
+  const CFX_Matrix& matrix = pImageObj->matrix();
   if ((matrix.a == 0 && matrix.b == 0) || (matrix.c == 0 && matrix.d == 0)) {
     return;
   }
@@ -421,12 +455,6 @@
 
   *buf << "q ";
 
-  if (pImageObj->UsesCTM()) {
-    const CFX_Matrix& ctm = m_pObjHolder->GetLastCTM();
-    if (!ctm.IsIdentity()) {
-      matrix.Concat(ctm.GetInverse());
-    }
-  }
   if (!matrix.IsIdentity()) {
     WriteMatrix(*buf, matrix) << " cm ";
   }
@@ -448,7 +476,7 @@
 
 void CPDF_PageContentGenerator::ProcessForm(fxcrt::ostringstream* buf,
                                             CPDF_FormObject* pFormObj) {
-  CFX_Matrix matrix = pFormObj->form_matrix();
+  const CFX_Matrix& matrix = pFormObj->form_matrix();
   if ((matrix.a == 0 && matrix.b == 0) || (matrix.c == 0 && matrix.d == 0)) {
     return;
   }
@@ -462,12 +490,6 @@
 
   *buf << "q\n";
 
-  if (pFormObj->UsesCTM()) {
-    const CFX_Matrix& ctm = m_pObjHolder->GetLastCTM();
-    if (!ctm.IsIdentity()) {
-      matrix.Concat(ctm.GetInverse());
-    }
-  }
   if (!matrix.IsIdentity()) {
     WriteMatrix(*buf, matrix) << " cm ";
   }
@@ -530,13 +552,7 @@
                                             CPDF_PathObject* pPathObj) {
   ProcessGraphics(buf, pPathObj);
 
-  CFX_Matrix matrix = pPathObj->matrix();
-  if (pPathObj->UsesCTM()) {
-    const CFX_Matrix& ctm = m_pObjHolder->GetLastCTM();
-    if (!ctm.IsIdentity()) {
-      matrix.Concat(ctm.GetInverse());
-    }
-  }
+  const CFX_Matrix& matrix = pPathObj->matrix();
   if (!matrix.IsIdentity()) {
     WriteMatrix(*buf, matrix) << " cm ";
   }
@@ -690,13 +706,7 @@
   ProcessGraphics(buf, pTextObj);
   *buf << "BT ";
 
-  CFX_Matrix matrix = pTextObj->GetTextMatrix();
-  if (pTextObj->UsesCTM()) {
-    const CFX_Matrix& ctm = m_pObjHolder->GetLastCTM();
-    if (!ctm.IsIdentity()) {
-      matrix.Concat(ctm.GetInverse());
-    }
-  }
+  const CFX_Matrix& matrix = pTextObj->GetTextMatrix();
   if (!matrix.IsIdentity()) {
     WriteMatrix(*buf, matrix) << " Tm ";
   }
diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp
index b4efb88..20feada 100644
--- a/core/fpdfapi/page/cpdf_contentparser.cpp
+++ b/core/fpdfapi/page/cpdf_contentparser.cpp
@@ -118,6 +118,10 @@
 
 CPDF_ContentParser::~CPDF_ContentParser() = default;
 
+CPDF_PageObjectHolder::CTMMap CPDF_ContentParser::TakeAllCTMs() {
+  return m_pParser ? m_pParser->TakeAllCTMs() : CPDF_PageObjectHolder::CTMMap();
+}
+
 // Returning |true| means that there is more content to be processed and
 // Continue() should be called again. Returning |false| means that we've
 // completed the parse and Continue() is complete.
diff --git a/core/fpdfapi/page/cpdf_contentparser.h b/core/fpdfapi/page/cpdf_contentparser.h
index c2ed273..7105a70 100644
--- a/core/fpdfapi/page/cpdf_contentparser.h
+++ b/core/fpdfapi/page/cpdf_contentparser.h
@@ -13,6 +13,7 @@
 #include <vector>
 
 #include "core/fpdfapi/page/cpdf_form.h"
+#include "core/fpdfapi/page/cpdf_pageobjectholder.h"
 #include "core/fpdfapi/page/cpdf_streamcontentparser.h"
 #include "core/fxcrt/fixed_size_data_vector.h"
 #include "core/fxcrt/raw_span.h"
@@ -40,9 +41,8 @@
                      CPDF_Form::RecursionState* recursion_state);
   ~CPDF_ContentParser();
 
-  const CPDF_AllStates* GetCurStates() const {
-    return m_pParser ? m_pParser->GetCurStates() : nullptr;
-  }
+  CPDF_PageObjectHolder::CTMMap TakeAllCTMs();
+
   // Returns whether to continue or not.
   bool Continue(PauseIndicatorIface* pPause);
 
diff --git a/core/fpdfapi/page/cpdf_pageobject.h b/core/fpdfapi/page/cpdf_pageobject.h
index 999c942..4821321 100644
--- a/core/fpdfapi/page/cpdf_pageobject.h
+++ b/core/fpdfapi/page/cpdf_pageobject.h
@@ -62,8 +62,6 @@
 
   void SetDirty(bool value) { m_bDirty = value; }
   bool IsDirty() const { return m_bDirty; }
-  void SetUsesCTM(bool uses_ctm) { m_bUsesCTM = uses_ctm; }
-  bool UsesCTM() const { return m_bUsesCTM; }
   void TransformClipPath(const CFX_Matrix& matrix);
 
   void SetOriginalRect(const CFX_FloatRect& rect) { m_OriginalRect = rect; }
@@ -144,11 +142,6 @@
   CFX_FloatRect m_OriginalRect;
   CPDF_ContentMarks m_ContentMarks;
   bool m_bDirty = false;
-  // If a page object is created by CPDF_StreamContentParser, then it uses the
-  // current transformation matrix. If it is created by a public API and
-  // inserted into a page, then it does not use the CTM. True by default since
-  // the majority of CPDF_PageObjects are created by CPDF_StreamContentParser.
-  bool m_bUsesCTM = true;
   int32_t m_ContentStream;
   // The resource name for this object.
   ByteString m_ResourceName;
diff --git a/core/fpdfapi/page/cpdf_pageobjectholder.cpp b/core/fpdfapi/page/cpdf_pageobjectholder.cpp
index f1756fb..eaecc8d 100644
--- a/core/fpdfapi/page/cpdf_pageobjectholder.cpp
+++ b/core/fpdfapi/page/cpdf_pageobjectholder.cpp
@@ -69,9 +69,7 @@
 
   m_ParseState = ParseState::kParsed;
   m_pDocument->IncrementParsedPageCount();
-  if (m_pParser->GetCurStates()) {
-    m_LastCTM = m_pParser->GetCurStates()->current_transformation_matrix();
-  }
+  m_AllCTMs = m_pParser->TakeAllCTMs();
 
   m_pParser.reset();
 }
@@ -114,6 +112,36 @@
   m_FontsMap[fd] = str;
 }
 
+CFX_Matrix CPDF_PageObjectHolder::GetCTMAtBeginningOfStream(int32_t stream) {
+  CHECK(stream >= 0 || stream == CPDF_PageObject::kNoContentStream);
+
+  if (stream == 0 || m_AllCTMs.empty()) {
+    return CFX_Matrix();
+  }
+
+  if (stream == CPDF_PageObject::kNoContentStream) {
+    return m_AllCTMs.rbegin()->second;
+  }
+
+  // For all other cases, CTM at beginning of `stream` is the same value as CTM
+  // at the end of the previous stream.
+  return GetCTMAtEndOfStream(stream - 1);
+}
+
+CFX_Matrix CPDF_PageObjectHolder::GetCTMAtEndOfStream(int32_t stream) {
+  // This code should never need to calculate the CTM for the end of
+  // `CPDF_PageObject::kNoContentStream`, which uses a negative sentinel value.
+  // All other streams have a non-negative index.
+  CHECK_GE(stream, 0);
+
+  if (m_AllCTMs.empty()) {
+    return CFX_Matrix();
+  }
+
+  const auto it = m_AllCTMs.lower_bound(stream);
+  return it != m_AllCTMs.end() ? it->second : m_AllCTMs.rbegin()->second;
+}
+
 void CPDF_PageObjectHolder::LoadTransparencyInfo() {
   RetainPtr<const CPDF_Dictionary> pGroup = m_pDict->GetDictFor("Group");
   if (!pGroup)
diff --git a/core/fpdfapi/page/cpdf_pageobjectholder.h b/core/fpdfapi/page/cpdf_pageobjectholder.h
index 6afe13b..5a9b697 100644
--- a/core/fpdfapi/page/cpdf_pageobjectholder.h
+++ b/core/fpdfapi/page/cpdf_pageobjectholder.h
@@ -52,6 +52,10 @@
  public:
   enum class ParseState : uint8_t { kNotParsed, kParsing, kParsed };
 
+  // Key: The stream index.
+  // Value: The current transformation matrix at the end of the stream.
+  using CTMMap = std::map<int32_t, CFX_Matrix>;
+
   using iterator = std::deque<std::unique_ptr<CPDF_PageObject>>::iterator;
   using const_iterator =
       std::deque<std::unique_ptr<CPDF_PageObject>>::const_iterator;
@@ -96,7 +100,6 @@
   iterator end() { return m_PageObjectList.end(); }
   const_iterator end() const { return m_PageObjectList.end(); }
 
-  const CFX_Matrix& GetLastCTM() const { return m_LastCTM; }
   const CFX_FloatRect& GetBBox() const { return m_BBox; }
 
   const CPDF_Transparency& GetTransparency() const { return m_Transparency; }
@@ -119,6 +122,12 @@
   std::optional<ByteString> FontsMapSearch(const FontData& fd);
   void FontsMapInsert(const FontData& fd, const ByteString& str);
 
+  // `stream` must be non-negative or `CPDF_PageObject::kNoContentStream`.
+  CFX_Matrix GetCTMAtBeginningOfStream(int32_t stream);
+
+  // `stream` must be non-negative.
+  CFX_Matrix GetCTMAtEndOfStream(int32_t stream);
+
  protected:
   void LoadTransparencyInfo();
 
@@ -137,7 +146,8 @@
   std::vector<CFX_FloatRect> m_MaskBoundingBoxes;
   std::unique_ptr<CPDF_ContentParser> m_pParser;
   std::deque<std::unique_ptr<CPDF_PageObject>> m_PageObjectList;
-  CFX_Matrix m_LastCTM;
+
+  CTMMap m_AllCTMs;
 
   // The indexes of Content streams that are dirty and need to be regenerated.
   std::set<int32_t> m_DirtyStreams;
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
index b28424d..a24280c 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
@@ -404,6 +404,10 @@
 
   // Add the sentinel.
   m_ContentMarksStack.push(std::make_unique<CPDF_ContentMarks>());
+
+  // Initialize `m_AllCTMs`, as there is a CTM, even if the stream contains no
+  // cm operators.
+  m_AllCTMs[0] = m_pCurStates->current_transformation_matrix();
 }
 
 CPDF_StreamContentParser::~CPDF_StreamContentParser() {
@@ -685,6 +689,8 @@
 
 void CPDF_StreamContentParser::Handle_ConcatMatrix() {
   m_pCurStates->prepend_to_current_transformation_matrix(GetMatrix());
+  m_AllCTMs[GetCurrentStreamIndex()] =
+      m_pCurStates->current_transformation_matrix();
   OnChangeTextMatrix();
 }
 
@@ -987,10 +993,14 @@
 }
 
 void CPDF_StreamContentParser::Handle_RestoreGraphState() {
-  if (m_StateStack.empty())
+  if (m_StateStack.empty()) {
     return;
+  }
+
   *m_pCurStates = *m_StateStack.back();
   m_StateStack.pop_back();
+  m_AllCTMs[GetCurrentStreamIndex()] =
+      m_pCurStates->current_transformation_matrix();
 }
 
 void CPDF_StreamContentParser::Handle_Rectangle() {
@@ -1175,6 +1185,12 @@
   return pFont;
 }
 
+CPDF_PageObjectHolder::CTMMap CPDF_StreamContentParser::TakeAllCTMs() {
+  CPDF_PageObjectHolder::CTMMap all_ctms;
+  all_ctms.swap(m_AllCTMs);
+  return all_ctms;
+}
+
 RetainPtr<CPDF_ColorSpace> CPDF_StreamContentParser::FindColorSpace(
     const ByteString& name) {
   if (name == "Pattern")
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.h b/core/fpdfapi/page/cpdf_streamcontentparser.h
index 176e675..6f8d8b9 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.h
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.h
@@ -14,6 +14,7 @@
 
 #include "core/fpdfapi/page/cpdf_contentmarks.h"
 #include "core/fpdfapi/page/cpdf_form.h"
+#include "core/fpdfapi/page/cpdf_pageobjectholder.h"
 #include "core/fxcrt/bytestring.h"
 #include "core/fxcrt/fx_coordinates.h"
 #include "core/fxcrt/fx_number.h"
@@ -32,7 +33,6 @@
 class CPDF_ImageObject;
 class CPDF_Object;
 class CPDF_PageObject;
-class CPDF_PageObjectHolder;
 class CPDF_Pattern;
 class CPDF_ShadingPattern;
 class CPDF_Stream;
@@ -64,6 +64,7 @@
   bool IsColored() const { return m_bColored; }
   pdfium::span<const float> GetType3Data() const { return m_Type3Data; }
   RetainPtr<CPDF_Font> FindFont(const ByteString& name);
+  CPDF_PageObjectHolder::CTMMap TakeAllCTMs();
 
   static ByteStringView FindKeyAbbreviationForTesting(ByteStringView abbr);
   static ByteStringView FindValueAbbreviationForTesting(ByteStringView abbr);
@@ -242,6 +243,7 @@
   std::vector<std::unique_ptr<CPDF_AllStates>> m_StateStack;
   std::array<float, 6> m_Type3Data = {};
   std::array<ContentParam, kParamBufSize> m_ParamBuf;
+  CPDF_PageObjectHolder::CTMMap m_AllCTMs;
 
   // The merged stream offsets at which a content stream ends and another
   // begins.
diff --git a/fpdfsdk/fpdf_editpage.cpp b/fpdfsdk/fpdf_editpage.cpp
index 512bbf8..e5c380a 100644
--- a/fpdfsdk/fpdf_editpage.cpp
+++ b/fpdfsdk/fpdf_editpage.cpp
@@ -257,7 +257,6 @@
   if (!IsPageObject(pPage))
     return;
 
-  pPageObj->SetUsesCTM(false);
   pPageObj->SetDirty(true);
   pPage->AppendPageObject(std::move(pPageObjHolder));
   CalcBoundingBox(pPageObj);
diff --git a/fpdfsdk/fpdf_editpath_embeddertest.cpp b/fpdfsdk/fpdf_editpath_embeddertest.cpp
index df2bd57..e23d98c 100644
--- a/fpdfsdk/fpdf_editpath_embeddertest.cpp
+++ b/fpdfsdk/fpdf_editpath_embeddertest.cpp
@@ -269,8 +269,6 @@
 
   UnloadPage(page);
 
-  // TODO(crbug.com/pdfium/2142): This is the wrong checksum. It should be
-  // RectanglesAndTriangleChecksum().
   VerifySavedDocument(kExpectedRectangleWidth, kExpectedRectangleHeight,
-                      RectanglesChecksum());
+                      RectanglesAndTriangleChecksum());
 }