Remove CPDF_Parser::GetSyntax()

Move some code into CPDF_Parser so that it need not expose an unowned
reference to its syntax parser. This code probably belonged in
CPDF_Parser in the first place, since it is mutating an object
owned by the parser.

Change-Id: I7354059a537660fdd7ad57ef5452f46d83153d4b
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/99311
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/edit/cpdf_creator.cpp b/core/fpdfapi/edit/cpdf_creator.cpp
index ab1dfeb..5bd35fe 100644
--- a/core/fpdfapi/edit/cpdf_creator.cpp
+++ b/core/fpdfapi/edit/cpdf_creator.cpp
@@ -21,7 +21,6 @@
 #include "core/fpdfapi/parser/cpdf_parser.h"
 #include "core/fpdfapi/parser/cpdf_security_handler.h"
 #include "core/fpdfapi/parser/cpdf_string.h"
-#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
 #include "core/fxcrt/data_vector.h"
 #include "core/fxcrt/fx_extension.h"
@@ -235,26 +234,14 @@
       }
       m_iStage = Stage::kInitWriteObjs20;
     } else {
-      m_SavedOffset = m_pParser->GetSyntax()->GetDocumentSize();
+      m_SavedOffset = m_pParser->GetDocumentSize();
       m_iStage = Stage::kWriteIncremental15;
     }
   }
   if (m_iStage == Stage::kWriteIncremental15) {
     if (m_IsOriginal && m_SavedOffset > 0) {
-      static constexpr FX_FILESIZE kBufferSize = 4096;
-      DataVector<uint8_t> buffer(kBufferSize);
-      FX_FILESIZE src_size = m_SavedOffset;
-      m_pParser->GetSyntax()->SetPos(0);
-      while (src_size) {
-        const uint32_t block_size =
-            static_cast<uint32_t>(std::min(kBufferSize, src_size));
-        if (!m_pParser->GetSyntax()->ReadBlock(buffer.data(), block_size))
-          return Stage::kInvalid;
-        if (!m_Archive->WriteBlock(buffer.data(), block_size))
-          return Stage::kInvalid;
-
-        src_size -= block_size;
-      }
+      if (!m_pParser->WriteToArchive(m_Archive.get(), m_SavedOffset))
+        return Stage::kInvalid;
     }
     if (m_IsOriginal && m_pParser->GetLastXRefOffset() == 0) {
       for (uint32_t num = 0; num <= m_pParser->GetLastObjNum(); ++num) {
diff --git a/core/fpdfapi/edit/cpdf_creator.h b/core/fpdfapi/edit/cpdf_creator.h
index 60c4b6b..5cef585 100644
--- a/core/fpdfapi/edit/cpdf_creator.h
+++ b/core/fpdfapi/edit/cpdf_creator.h
@@ -73,7 +73,7 @@
   CPDF_CryptoHandler* GetCryptoHandler();
 
   UnownedPtr<CPDF_Document> const m_pDocument;
-  UnownedPtr<const CPDF_Parser> const m_pParser;
+  UnownedPtr<CPDF_Parser> const m_pParser;
   RetainPtr<const CPDF_Dictionary> m_pEncryptDict;
   RetainPtr<CPDF_Dictionary> m_pNewEncryptDict;
   RetainPtr<CPDF_SecurityHandler> m_pSecurityHandler;
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 64a718f..336c0b0 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -995,6 +995,10 @@
   return result;
 }
 
+FX_FILESIZE CPDF_Parser::GetDocumentSize() const {
+  return m_pSyntax->GetDocumentSize();
+}
+
 uint32_t CPDF_Parser::GetFirstPageNo() const {
   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
 }
@@ -1144,3 +1148,64 @@
     std::unique_ptr<CPDF_SyntaxParser> parser) {
   m_pSyntax = std::move(parser);
 }
+
+std::vector<unsigned int> CPDF_Parser::GetTrailerEnds() {
+  std::vector<unsigned int> trailer_ends;
+  m_pSyntax->SetTrailerEnds(&trailer_ends);
+
+  // Traverse the document.
+  m_pSyntax->SetPos(0);
+  while (true) {
+    CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
+    if (word_result.is_number) {
+      // The object number was read. Read the generation number.
+      word_result = m_pSyntax->GetNextWord();
+      if (!word_result.is_number)
+        break;
+
+      word_result = m_pSyntax->GetNextWord();
+      if (word_result.word != "obj")
+        break;
+
+      m_pSyntax->GetObjectBody(nullptr);
+
+      word_result = m_pSyntax->GetNextWord();
+      if (word_result.word != "endobj")
+        break;
+    } else if (word_result.word == "trailer") {
+      m_pSyntax->GetObjectBody(nullptr);
+    } else if (word_result.word == "startxref") {
+      m_pSyntax->GetNextWord();
+    } else if (word_result.word == "xref") {
+      while (true) {
+        word_result = m_pSyntax->GetNextWord();
+        if (word_result.word.IsEmpty() || word_result.word == "startxref")
+          break;
+      }
+      m_pSyntax->GetNextWord();
+    } else {
+      break;
+    }
+  }
+
+  // Stop recording trailer ends.
+  m_pSyntax->SetTrailerEnds(nullptr);
+  return trailer_ends;
+}
+
+bool CPDF_Parser::WriteToArchive(IFX_ArchiveStream* archive,
+                                 FX_FILESIZE src_size) {
+  static constexpr FX_FILESIZE kBufferSize = 4096;
+  DataVector<uint8_t> buffer(kBufferSize);
+  m_pSyntax->SetPos(0);
+  while (src_size) {
+    const uint32_t block_size =
+        static_cast<uint32_t>(std::min(kBufferSize, src_size));
+    if (!m_pSyntax->ReadBlock(buffer.data(), block_size))
+      return false;
+    if (!archive->WriteBlock(buffer.data(), block_size))
+      return false;
+    src_size -= block_size;
+  }
+  return true;
+}
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 0d87606..4d2f7ab 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -31,6 +31,7 @@
 class CPDF_ReadValidator;
 class CPDF_SecurityHandler;
 class CPDF_SyntaxParser;
+class IFX_ArchiveStream;
 class IFX_SeekableReadStream;
 
 class CPDF_Parser {
@@ -109,6 +110,7 @@
   RetainPtr<CPDF_Object> ParseIndirectObjectAt(FX_FILESIZE pos,
                                                uint32_t objnum);
 
+  FX_FILESIZE GetDocumentSize() const;
   uint32_t GetFirstPageNo() const;
   const CPDF_LinearizedHeader* GetLinearizedHeader() const {
     return m_pLinearized.get();
@@ -120,7 +122,8 @@
 
   bool xref_table_rebuilt() const { return m_bXRefTableRebuilt; }
 
-  CPDF_SyntaxParser* GetSyntax() const { return m_pSyntax.get(); }
+  std::vector<unsigned int> GetTrailerEnds();
+  bool WriteToArchive(IFX_ArchiveStream* archive, FX_FILESIZE src_size);
 
   void SetLinearizedHeaderForTesting(
       std::unique_ptr<CPDF_LinearizedHeader> pLinearized);
diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp
index 1627575..e365b67 100644
--- a/fpdfsdk/fpdf_view.cpp
+++ b/fpdfsdk/fpdf_view.cpp
@@ -22,7 +22,6 @@
 #include "core/fpdfapi/parser/cpdf_parser.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_string.h"
-#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
 #include "core/fpdfapi/render/cpdf_docrenderdata.h"
 #include "core/fpdfapi/render/cpdf_pagerendercache.h"
@@ -1278,48 +1277,7 @@
 
   // Start recording trailer ends.
   auto* parser = doc->GetParser();
-  CPDF_SyntaxParser* syntax = parser->GetSyntax();
-  std::vector<unsigned int> trailer_ends;
-  syntax->SetTrailerEnds(&trailer_ends);
-
-  // Traverse the document.
-  syntax->SetPos(0);
-  while (true) {
-    CPDF_SyntaxParser::WordResult word_result = syntax->GetNextWord();
-    if (word_result.is_number) {
-      // The object number was read. Read the generation number.
-      word_result = syntax->GetNextWord();
-      if (!word_result.is_number)
-        break;
-
-      word_result = syntax->GetNextWord();
-      if (word_result.word != "obj")
-        break;
-
-      syntax->GetObjectBody(nullptr);
-
-      word_result = syntax->GetNextWord();
-      if (word_result.word != "endobj")
-        break;
-    } else if (word_result.word == "trailer") {
-      syntax->GetObjectBody(nullptr);
-    } else if (word_result.word == "startxref") {
-      syntax->GetNextWord();
-    } else if (word_result.word == "xref") {
-      while (true) {
-        word_result = syntax->GetNextWord();
-        if (word_result.word.IsEmpty() || word_result.word == "startxref")
-          break;
-      }
-      syntax->GetNextWord();
-    } else {
-      break;
-    }
-  }
-
-  // Stop recording trailer ends.
-  syntax->SetTrailerEnds(nullptr);
-
+  std::vector<unsigned int> trailer_ends = parser->GetTrailerEnds();
   const unsigned long trailer_ends_len =
       fxcrt::CollectionSize<unsigned long>(trailer_ends);
   if (buffer && length >= trailer_ends_len) {