Implement FPDF_DocumentHasValidCrossReferenceTable().

Make it possible to query if a FPDF_DOCUMENT has been successfully
parsed without having to rebuild the cross reference table.

Change-Id: I0e31c666fed1536d928e162d17af6ecdecd68c1d
Reviewed-on: https://pdfium-review.googlesource.com/c/47553
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 880aea2..26a9b36 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -211,7 +211,7 @@
   if (!m_pParser)
     SetParser(pdfium::MakeUnique<CPDF_Parser>(this));
 
-  return m_pParser->StartParse(pFileAccess, password);
+  return HandleLoadResult(m_pParser->StartParse(pFileAccess, password));
 }
 
 CPDF_Parser::Error CPDF_Document::LoadLinearizedDoc(
@@ -220,7 +220,7 @@
   if (!m_pParser)
     SetParser(pdfium::MakeUnique<CPDF_Parser>(this));
 
-  return m_pParser->StartLinearizedParse(validator, password);
+  return HandleLoadResult(m_pParser->StartLinearizedParse(validator, password));
 }
 
 void CPDF_Document::LoadPages() {
@@ -313,6 +313,12 @@
   m_pParser = std::move(pParser);
 }
 
+CPDF_Parser::Error CPDF_Document::HandleLoadResult(CPDF_Parser::Error error) {
+  if (error == CPDF_Parser::SUCCESS)
+    m_bHasValidCrossReferenceTable = !m_pParser->xref_table_rebuilt();
+  return error;
+}
+
 const CPDF_Dictionary* CPDF_Document::GetPagesDict() const {
   const CPDF_Dictionary* pRoot = GetRoot();
   return pRoot ? pRoot->GetDictFor("Pages") : nullptr;
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index d611293..b30a948 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -109,6 +109,9 @@
   CPDF_Parser::Error LoadLinearizedDoc(
       const RetainPtr<CPDF_ReadValidator>& validator,
       const char* password);
+  bool has_valid_cross_reference_table() const {
+    return m_bHasValidCrossReferenceTable;
+  }
 
   void LoadPages();
   void CreateNewDoc();
@@ -160,6 +163,7 @@
   bool InsertNewPage(int iPage, CPDF_Dictionary* pPageDict);
   void ResetTraversal();
   void SetParser(std::unique_ptr<CPDF_Parser> pParser);
+  CPDF_Parser::Error HandleLoadResult(CPDF_Parser::Error error);
 
   std::unique_ptr<CPDF_Parser> m_pParser;
   UnownedPtr<CPDF_Dictionary> m_pRootDict;
@@ -171,9 +175,13 @@
   // of the child being processed within the dictionary's /Kids array.
   std::vector<std::pair<CPDF_Dictionary*, size_t>> m_pTreeTraversal;
 
+  // True if the CPDF_Parser succeeded without having to rebuild the cross
+  // reference table.
+  bool m_bHasValidCrossReferenceTable = false;
+
   // Index of the next page that will be traversed from the page tree.
-  int m_iNextPageToTraverse = 0;
   bool m_bReachedMaxPageLevel = false;
+  int m_iNextPageToTraverse = 0;
   uint32_t m_ParsedPageCount = 0;
   std::unique_ptr<CPDF_DocPageData> m_pDocPage;
   std::unique_ptr<CPDF_DocRenderData> m_pDocRender;
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 5ddae2d..ed4eeb1 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -160,11 +160,10 @@
 
 CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
   ASSERT(!m_bHasParsed);
+  ASSERT(!m_bXRefTableRebuilt);
   m_bHasParsed = true;
   m_bXRefStream = false;
 
-  bool bXRefRebuilt = false;
-
   m_LastXRefOffset = ParseStartXRef();
   if (m_LastXRefOffset >= kPDFHeaderSize) {
     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
@@ -172,21 +171,21 @@
       if (!RebuildCrossRef())
         return FORMAT_ERROR;
 
-      bXRefRebuilt = true;
+      m_bXRefTableRebuilt = true;
       m_LastXRefOffset = 0;
     }
   } else {
     if (!RebuildCrossRef())
       return FORMAT_ERROR;
 
-    bXRefRebuilt = true;
+    m_bXRefTableRebuilt = true;
   }
   Error eRet = SetEncryptHandler();
   if (eRet != SUCCESS)
     return eRet;
 
   if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
-    if (bXRefRebuilt)
+    if (m_bXRefTableRebuilt)
       return FORMAT_ERROR;
 
     ReleaseEncryptHandler();
@@ -961,6 +960,7 @@
     const RetainPtr<CPDF_ReadValidator>& validator,
     const char* password) {
   ASSERT(!m_bHasParsed);
+  ASSERT(!m_bXRefTableRebuilt);
   SetPassword(password);
   m_bXRefStream = false;
   m_LastXRefOffset = 0;
@@ -976,13 +976,12 @@
 
   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
   FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
-  bool bXRefRebuilt = false;
   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
     if (!RebuildCrossRef())
       return FORMAT_ERROR;
 
-    bXRefRebuilt = true;
+    m_bXRefTableRebuilt = true;
     m_LastXRefOffset = 0;
   }
   if (bLoadV4) {
@@ -1001,7 +1000,7 @@
     return eRet;
 
   if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
-    if (bXRefRebuilt)
+    if (m_bXRefTableRebuilt)
       return FORMAT_ERROR;
 
     ReleaseEncryptHandler();
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index f0d497b..74b6717 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -111,6 +111,8 @@
     return m_CrossRefTable.get();
   }
 
+  bool xref_table_rebuilt() const { return m_bXRefTableRebuilt; }
+
   CPDF_SyntaxParser* GetSyntax() const { return m_pSyntax.get(); }
 
   void SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized);
@@ -175,6 +177,7 @@
 
   bool m_bHasParsed = false;
   bool m_bXRefStream = false;
+  bool m_bXRefTableRebuilt = false;
   int m_FileVersion = 0;
   // m_CrossRefTable must be destroyed after m_pSecurityHandler due to the
   // ownership of the ID array data.
diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp
index eb61c3e..f4c1515 100644
--- a/fpdfsdk/fpdf_view.cpp
+++ b/fpdfsdk/fpdf_view.cpp
@@ -289,8 +289,12 @@
   return true;
 }
 
-// jabdelmalek: changed return type from uint32_t to build on Linux (and match
-// header).
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDF_DocumentHasValidCrossReferenceTable(FPDF_DOCUMENT document) {
+  CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+  return pDoc && pDoc->has_valid_cross_reference_table();
+}
+
 FPDF_EXPORT unsigned long FPDF_CALLCONV
 FPDF_GetDocPermissions(FPDF_DOCUMENT document) {
   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 2f9a5e8..ad7599f 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -372,6 +372,7 @@
     CHK(FPDF_CountNamedDests);
     CHK(FPDF_DestroyLibrary);
     CHK(FPDF_DeviceToPage);
+    CHK(FPDF_DocumentHasValidCrossReferenceTable);
     CHK(FPDF_GetDocPermissions);
     CHK(FPDF_GetFileVersion);
     CHK(FPDF_GetLastError);
diff --git a/fpdfsdk/fpdf_view_embeddertest.cpp b/fpdfsdk/fpdf_view_embeddertest.cpp
index 1bab8c6..807c8fe 100644
--- a/fpdfsdk/fpdf_view_embeddertest.cpp
+++ b/fpdfsdk/fpdf_view_embeddertest.cpp
@@ -668,3 +668,15 @@
   FPDF_CloseDocument(doc);
   SetDelegate(nullptr);
 }
+
+TEST_F(FPDFViewEmbedderTest, DocumentHasValidCrossReferenceTable) {
+  ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+  EXPECT_TRUE(FPDF_DocumentHasValidCrossReferenceTable(document()));
+}
+
+TEST_F(FPDFViewEmbedderTest, DocumentHasInvalidCrossReferenceTable) {
+  EXPECT_FALSE(FPDF_DocumentHasValidCrossReferenceTable(nullptr));
+
+  ASSERT_TRUE(OpenDocument("bug_664284.pdf"));
+  EXPECT_FALSE(FPDF_DocumentHasValidCrossReferenceTable(document()));
+}
diff --git a/public/fpdfview.h b/public/fpdfview.h
index 7d78186..05e94f9 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h
@@ -432,7 +432,8 @@
   FPDF_RESULT (*Truncate)(FPDF_LPVOID clientData, FPDF_DWORD size);
 } FPDF_FILEHANDLER, *FPDF_LPFILEHANDLER;
 
-#endif
+#endif  // PDF_ENABLE_XFA
+
 // Function: FPDF_LoadCustomDocument
 //          Load PDF document from a custom access descriptor.
 // Parameters:
@@ -489,6 +490,21 @@
 //          function is not defined.
 FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_GetLastError();
 
+// Function: FPDF_DocumentHasValidCrossReferenceTable
+//          Whether the document's cross reference table is valid or not.
+//          Experimental API.
+// Parameters:
+//          document    -   Handle to a document. Returned by FPDF_LoadDocument.
+// Return value:
+//          True if the PDF parser did not encounter problems parsing the cross
+//          reference table. False if the parser could not parse the cross
+//          reference table and the table had to be rebuild from other data
+//          within the document.
+// Comments:
+//          The return value can change over time as the PDF parser evolves.
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDF_DocumentHasValidCrossReferenceTable(FPDF_DOCUMENT document);
+
 // Function: FPDF_GetDocPermission
 //          Get file permission flags of the document.
 // Parameters:
diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc
index 6f70c34..8155792 100644
--- a/samples/pdfium_test.cc
+++ b/samples/pdfium_test.cc
@@ -744,6 +744,9 @@
     return;
   }
 
+  if (!FPDF_DocumentHasValidCrossReferenceTable(doc.get()))
+    fprintf(stderr, "Document has invalid cross reference table\n");
+
   (void)FPDF_GetDocPermissions(doc.get());
 
   if (options.show_metadata)