Unify CPDF_DataAvail::Check[Header/End].

Use CPDF_Parser instead of CPDF_SyntaxParser for parse File Header and tail.

Change-Id: I9ca22b927519a6ea280e249ef43721c230faa6ab
Bug:
Reviewed-on: https://pdfium-review.googlesource.com/13970
Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Reviewed-by: dsinclair <dsinclair@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 2f79e56..6192788 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -201,7 +201,6 @@
 }
 
 bool CPDF_DataAvail::LoadAllXref() {
-  m_parser.m_pSyntax->InitParser(m_pFileRead, (uint32_t)m_dwHeaderOffset);
   if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
       !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
@@ -470,8 +469,6 @@
                                                              szHintLength))
     return false;
 
-  m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
-
   auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(GetValidator().Get(),
                                                          m_pLinearized.get());
   std::unique_ptr<CPDF_Object> pHintStream =
@@ -488,11 +485,11 @@
     FX_FILESIZE pos,
     uint32_t objnum,
     CPDF_IndirectObjectHolder* pObjList) {
-  const FX_FILESIZE SavedPos = m_syntaxParser.GetPos();
-  m_syntaxParser.SetPos(pos);
-  std::unique_ptr<CPDF_Object> result = m_syntaxParser.GetIndirectObject(
+  const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
+  GetSyntaxParser()->SetPos(pos);
+  std::unique_ptr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
       pObjList, CPDF_SyntaxParser::ParseType::kLoose);
-  m_syntaxParser.SetPos(SavedPos);
+  GetSyntaxParser()->SetPos(SavedPos);
   return (result && (!objnum || result->GetObjNum() == objnum))
              ? std::move(result)
              : nullptr;
@@ -525,10 +522,8 @@
   if (header_offset == kInvalidHeaderOffset)
     return DocAvailStatus::DataError;
 
-  m_dwHeaderOffset = header_offset;
-
-  m_syntaxParser.InitParserWithValidator(GetValidator(), header_offset);
-  m_pLinearized = CPDF_LinearizedHeader::Parse(&m_syntaxParser);
+  m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset);
+  m_pLinearized = m_parser.ParseLinearizedHeader();
   if (GetValidator()->has_read_problems())
     return DocAvailStatus::DataNotAvailable;
 
@@ -537,41 +532,17 @@
 }
 
 bool CPDF_DataAvail::CheckEnd() {
-  const uint32_t req_pos =
-      (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
-  const uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos);
-  std::vector<uint8_t> buffer(dwSize);
-  {
-    const CPDF_ReadValidator::Session read_session(GetValidator().Get());
-    m_pFileRead->ReadBlock(buffer.data(), req_pos, dwSize);
-    if (GetValidator()->has_read_problems())
-      return false;
-  }
+  const CPDF_ReadValidator::Session read_session(GetValidator().Get());
+  const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
 
-  auto file = pdfium::MakeRetain<CFX_MemoryStream>(
-      buffer.data(), static_cast<size_t>(dwSize), false);
-  m_syntaxParser.InitParser(file, 0);
-  m_syntaxParser.SetPos(dwSize - 1);
-  if (!m_syntaxParser.BackwardsSearchToWord("startxref", dwSize)) {
-    m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
-    return true;
-  }
-  m_syntaxParser.GetNextWord(nullptr);
-
-  bool bNumber;
-  ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber);
-  if (!bNumber) {
-    m_docStatus = PDF_DATAAVAIL_ERROR;
+  if (GetValidator()->has_read_problems())
     return false;
-  }
-  m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
-  if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
-    m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
-    return true;
-  }
-  m_dwLastXRefOffset = m_dwXRefOffset;
-  SetStartOffset(m_dwXRefOffset);
-  m_docStatus = PDF_DATAAVAIL_CROSSREF;
+
+  m_dwLastXRefOffset = last_xref_offset;
+  m_dwXRefOffset = last_xref_offset;
+  SetStartOffset(last_xref_offset);
+  m_docStatus =
+      (last_xref_offset > 0) ? PDF_DATAAVAIL_CROSSREF : PDF_DATAAVAIL_ERROR;
   return true;
 }
 
@@ -719,12 +690,10 @@
 }
 
 bool CPDF_DataAvail::CheckTrailer() {
-  m_syntaxParser.InitParser(m_pFileRead, 0);
-
   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
-  m_syntaxParser.SetPos(m_dwTrailerOffset);
+  GetSyntaxParser()->SetPos(m_dwTrailerOffset);
   const std::unique_ptr<CPDF_Object> pTrailer =
-      m_syntaxParser.GetObjectBody(nullptr);
+      GetSyntaxParser()->GetObjectBody(nullptr);
   if (!pTrailer) {
     if (!GetValidator()->has_read_problems())
       m_docStatus = PDF_DATAAVAIL_ERROR;
@@ -1175,6 +1144,11 @@
   return m_pFileRead;
 }
 
+CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
+  return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
+                     : m_parser.m_pSyntax.get();
+}
+
 int CPDF_DataAvail::GetPageCount() const {
   if (m_pLinearized)
     return m_pLinearized->GetPageCount();
@@ -1203,10 +1177,8 @@
   m_pDocument->SetPageObjNum(index, dwObjNum);
   // Page object already can be parsed in document.
   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
-    m_syntaxParser.InitParser(
-        m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos));
     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
-        dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
+        dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
   }
   if (!ValidatePage(index))
     return nullptr;
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index 0481408..772d335 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -169,18 +169,17 @@
   bool IsFirstCheck(uint32_t dwPage);
   void ResetFirstCheck(uint32_t dwPage);
   bool ValidatePage(uint32_t dwPage);
+  CPDF_SyntaxParser* GetSyntaxParser() const;
 
   FileAvail* const m_pFileAvail;
   RetainPtr<CPDF_ReadValidator> m_pFileRead;
   CPDF_Parser m_parser;
-  CPDF_SyntaxParser m_syntaxParser;
   std::unique_ptr<CPDF_Object> m_pRoot;
   uint32_t m_dwRootObjNum;
   uint32_t m_dwInfoObjNum;
   std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
   UnownedPtr<CPDF_Object> m_pTrailer;
   bool m_bDocAvail;
-  FX_FILESIZE m_dwHeaderOffset;
   FX_FILESIZE m_dwLastXRefOffset;
   FX_FILESIZE m_dwXRefOffset;
   FX_FILESIZE m_dwTrailerOffset;