Revert of Traverse PDF page tree only once in CPDF_Document (patchset #4 id:60001 of https://codereview.chromium.org/2414423002/ )

Reason for revert:
Possible cause of crbug.com/657897 reverting to find out.

BUG=657897

Original issue's description:
> Traverse PDF page tree only once in CPDF_Document
>
> In our current implementation of CPDF_Document::GetPage, we traverse
> the PDF page tree until we find the index we are looking for. This is
> slow when we do calls GetPage(0), GetPage(1), ... since in this case
> the page tree will be traversed n times if there are n pages. This CL
> makes sure the page tree is only traversed once.
>
> Time to load the PDF from the bug below in chrome official build:
> Before this CL: 1 minute 40 seconds
> After this CL: 5 seconds
>
> BUG=chromium:638513
>
> Committed: https://pdfium.googlesource.com/pdfium/+/7c29e27dae139a205755c1a29b7f3ac8b36ec0da

TBR=thestig@chromium.org,tsepez@chromium.org,npm@chromium.org
# Not skipping CQ checks because original CL landed more than 1 days ago.
BUG=chromium:638513

Review-Url: https://chromiumcodereview.appspot.com/2430313006
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index ba20ff6..c5f64a7 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -8,7 +8,6 @@
 
 #include <memory>
 #include <set>
-#include <utility>
 #include <vector>
 
 #include "core/fpdfapi/cpdf_modulemgr.h"
@@ -241,6 +240,83 @@
   InsertWidthArrayImpl(widths, size, pWidthArray);
 }
 
+int InsertDeletePDFPage(CPDF_Document* pDoc,
+                        CPDF_Dictionary* pPages,
+                        int nPagesToGo,
+                        CPDF_Dictionary* pPage,
+                        FX_BOOL bInsert,
+                        std::set<CPDF_Dictionary*>* pVisited) {
+  CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
+  if (!pKidList)
+    return -1;
+
+  for (size_t i = 0; i < pKidList->GetCount(); i++) {
+    CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
+    if (pKid->GetStringFor("Type") == "Page") {
+      if (nPagesToGo == 0) {
+        if (bInsert) {
+          pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum()));
+          pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
+        } else {
+          pKidList->RemoveAt(i);
+        }
+        pPages->SetIntegerFor(
+            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+        return 1;
+      }
+      nPagesToGo--;
+    } else {
+      int nPages = pKid->GetIntegerFor("Count");
+      if (nPagesToGo < nPages) {
+        if (pdfium::ContainsKey(*pVisited, pKid))
+          return -1;
+
+        pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
+        if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert,
+                                pVisited) < 0) {
+          return -1;
+        }
+        pPages->SetIntegerFor(
+            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+        return 1;
+      }
+      nPagesToGo -= nPages;
+    }
+  }
+  return 0;
+}
+
+int InsertNewPage(CPDF_Document* pDoc,
+                  int iPage,
+                  CPDF_Dictionary* pPageDict,
+                  CFX_ArrayTemplate<uint32_t>& pageList) {
+  CPDF_Dictionary* pRoot = pDoc->GetRoot();
+  CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
+  if (!pPages)
+    return -1;
+
+  int nPages = pDoc->GetPageCount();
+  if (iPage < 0 || iPage > nPages)
+    return -1;
+
+  if (iPage == nPages) {
+    CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
+    if (!pPagesList) {
+      pPagesList = new CPDF_Array;
+      pPages->SetFor("Kids", pPagesList);
+    }
+    pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum()));
+    pPages->SetIntegerFor("Count", nPages + 1);
+    pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
+  } else {
+    std::set<CPDF_Dictionary*> stack = {pPages};
+    if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0)
+      return -1;
+  }
+  pageList.InsertAt(iPage, pPageDict->GetObjNum());
+  return iPage;
+}
+
 int CountPages(CPDF_Dictionary* pPages,
                std::set<CPDF_Dictionary*>* visited_pages) {
   int count = pPages->GetIntegerFor("Count");
@@ -337,7 +413,6 @@
       m_pParser(std::move(pParser)),
       m_pRootDict(nullptr),
       m_pInfoDict(nullptr),
-      m_iLastPageTraversed(-1),
       m_bLinearized(false),
       m_iFirstPageNo(0),
       m_dwFirstPageObjNum(0),
@@ -402,63 +477,40 @@
   m_PageList.SetSize(RetrievePageCount());
 }
 
-CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage, int nPagesToGo) {
-  std::pair<CPDF_Dictionary*, int>* lastProc = &m_pTreeTraversal.top();
-  CPDF_Dictionary* pPages = lastProc->first;
+CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages,
+                                            int iPage,
+                                            int nPagesToGo,
+                                            int level) {
   CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
-  if (!pKidList) {
-    m_pTreeTraversal.pop();
-    if (nPagesToGo != 1)
-      return nullptr;
-    m_PageList.SetAt(iPage, pPages->GetObjNum());
-    return pPages;
-  }
+  if (!pKidList)
+    return nPagesToGo == 0 ? pPages : nullptr;
 
-  if (m_pTreeTraversal.size() >= FX_MAX_PAGE_LEVEL) {
-    m_pTreeTraversal.pop();
+  if (level >= FX_MAX_PAGE_LEVEL)
     return nullptr;
-  }
 
-  bool shouldFinish = pPages->GetIntegerFor("Count") <= nPagesToGo;
-  CPDF_Dictionary* page = nullptr;
-  for (size_t i = lastProc->second + 1; i < pKidList->GetCount(); i++) {
+  for (size_t i = 0; i < pKidList->GetCount(); i++) {
     CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
     if (!pKid) {
       nPagesToGo--;
-      lastProc->second++;
       continue;
     }
-    if (pKid == pPages) {
-      lastProc->second++;
+    if (pKid == pPages)
       continue;
-    }
     if (!pKid->KeyExist("Kids")) {
-      m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum());
+      if (nPagesToGo == 0)
+        return pKid;
+
+      m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum());
       nPagesToGo--;
-      lastProc->second++;
-      if (nPagesToGo == 0) {
-        page = pKid;
-        break;
-      }
     } else {
       int nPages = pKid->GetIntegerFor("Count");
-      m_pTreeTraversal.push(std::make_pair(pKid, -1));
-      CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo);
-      // If the stack top is current element, kid was completely processed.
-      if (lastProc == &m_pTreeTraversal.top())
-        lastProc->second++;
-      if (nPagesToGo <= nPages) {
-        page = pageKid;
-        break;
-      }
+      if (nPagesToGo < nPages)
+        return FindPDFPage(pKid, iPage, nPagesToGo, level + 1);
+
       nPagesToGo -= nPages;
     }
   }
-  if (shouldFinish ||
-      lastProc->second == static_cast<int>(pKidList->GetCount() - 1)) {
-    m_pTreeTraversal.pop();
-  }
-  return page;
+  return nullptr;
 }
 
 CPDF_Dictionary* CPDF_Document::GetPagesDict() const {
@@ -482,20 +534,21 @@
   }
 
   int objnum = m_PageList.GetAt(iPage);
-  if (!objnum || m_iLastPageTraversed < iPage) {
-    CPDF_Dictionary* pPages = GetPagesDict();
-    if (!pPages)
-      return nullptr;
-    if (m_pTreeTraversal.empty())
-      m_pTreeTraversal.push(std::make_pair(pPages, -1));
-    CPDF_Dictionary* page = TraversePDFPages(m_iLastPageTraversed + 1,
-                                             iPage - m_iLastPageTraversed);
-    m_iLastPageTraversed = iPage;
-    return page;
+  if (objnum) {
+    if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
+      return pDict;
   }
-  if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
-    return pDict;
-  return nullptr;
+
+  CPDF_Dictionary* pPages = GetPagesDict();
+  if (!pPages)
+    return nullptr;
+
+  CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0);
+  if (!pPage)
+    return nullptr;
+
+  m_PageList.SetAt(iPage, pPage->GetObjNum());
+  return pPage;
 }
 
 void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) {
@@ -657,91 +710,13 @@
   CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool);
   pDict->SetNameFor("Type", "Page");
   uint32_t dwObjNum = AddIndirectObject(pDict);
-  if (InsertNewPage(iPage, pDict, m_PageList) < 0) {
+  if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) {
     ReleaseIndirectObject(dwObjNum);
     return nullptr;
   }
   return pDict;
 }
 
-int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages,
-                                       int nPagesToGo,
-                                       CPDF_Dictionary* pPage,
-                                       FX_BOOL bInsert,
-                                       std::set<CPDF_Dictionary*>* pVisited) {
-  CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
-  if (!pKidList)
-    return -1;
-
-  for (size_t i = 0; i < pKidList->GetCount(); i++) {
-    CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
-    if (pKid->GetStringFor("Type") == "Page") {
-      if (nPagesToGo == 0) {
-        if (bInsert) {
-          pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum()));
-          pPage->SetReferenceFor("Parent", this, pPages->GetObjNum());
-        } else {
-          pKidList->RemoveAt(i);
-        }
-        pPages->SetIntegerFor(
-            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
-        // Tree will change, so reset tree transversal variables
-        m_iLastPageTraversed = -1;
-        m_pTreeTraversal = std::stack<std::pair<CPDF_Dictionary*, int>>();
-        return 1;
-      }
-      nPagesToGo--;
-    } else {
-      int nPages = pKid->GetIntegerFor("Count");
-      if (nPagesToGo < nPages) {
-        if (pdfium::ContainsKey(*pVisited, pKid))
-          return -1;
-
-        pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
-        if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) <
-            0) {
-          return -1;
-        }
-        pPages->SetIntegerFor(
-            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
-        return 1;
-      }
-      nPagesToGo -= nPages;
-    }
-  }
-  return 0;
-}
-
-int CPDF_Document::InsertNewPage(int iPage,
-                                 CPDF_Dictionary* pPageDict,
-                                 CFX_ArrayTemplate<uint32_t>& pageList) {
-  CPDF_Dictionary* pRoot = GetRoot();
-  CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
-  if (!pPages)
-    return -1;
-
-  int nPages = GetPageCount();
-  if (iPage < 0 || iPage > nPages)
-    return -1;
-
-  if (iPage == nPages) {
-    CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
-    if (!pPagesList) {
-      pPagesList = new CPDF_Array;
-      pPages->SetFor("Kids", pPagesList);
-    }
-    pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum()));
-    pPages->SetIntegerFor("Count", nPages + 1);
-    pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum());
-  } else {
-    std::set<CPDF_Dictionary*> stack = {pPages};
-    if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0)
-      return -1;
-  }
-  pageList.InsertAt(iPage, pPageDict->GetObjNum());
-  return iPage;
-}
-
 void CPDF_Document::DeletePage(int iPage) {
   CPDF_Dictionary* pPages = GetPagesDict();
   if (!pPages)
@@ -752,7 +727,7 @@
     return;
 
   std::set<CPDF_Dictionary*> stack = {pPages};
-  if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0)
+  if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0)
     return;
 
   m_PageList.RemoveAt(iPage);
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index e845ea8..c557a56 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -9,7 +9,6 @@
 
 #include <functional>
 #include <memory>
-#include <stack>
 
 #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
 #include "core/fpdfapi/parser/cpdf_object.h"
@@ -108,7 +107,10 @@
 
   // Retrieve page count information by getting count value from the tree nodes
   int RetrievePageCount() const;
-  CPDF_Dictionary* TraversePDFPages(int iPage, int nPagesToGo);
+  CPDF_Dictionary* FindPDFPage(CPDF_Dictionary* pPages,
+                               int iPage,
+                               int nPagesToGo,
+                               int level);
   int FindPageIndex(CPDF_Dictionary* pNode,
                     uint32_t& skip_count,
                     uint32_t objnum,
@@ -124,19 +126,10 @@
       FX_BOOL bVert,
       CFX_ByteString basefont,
       std::function<void(FX_WCHAR, FX_WCHAR, CPDF_Array*)> Insert);
-  int InsertDeletePDFPage(CPDF_Dictionary* pPages,
-                          int nPagesToGo,
-                          CPDF_Dictionary* pPage,
-                          FX_BOOL bInsert,
-                          std::set<CPDF_Dictionary*>* pVisited);
-  int InsertNewPage(int iPage,
-                    CPDF_Dictionary* pPageDict,
-                    CFX_ArrayTemplate<uint32_t>& pageList);
+
   std::unique_ptr<CPDF_Parser> m_pParser;
   CPDF_Dictionary* m_pRootDict;
   CPDF_Dictionary* m_pInfoDict;
-  std::stack<std::pair<CPDF_Dictionary*, int>> m_pTreeTraversal;
-  int m_iLastPageTraversed;
   bool m_bLinearized;
   int m_iFirstPageNo;
   uint32_t m_dwFirstPageObjNum;