Traverse PDF page tree only once in CPDF_Document

In our current implementation of CPDF_Document::GetPage, we traverse
the PDF page tree until we find the index we are looking for. This is
slow when we do calls GetPage(0), GetPage(1), ... since in this case
the page tree will be traversed n times if there are n pages. This CL
makes sure the page tree is only traversed once.

Time to load the PDF from the bug below in chrome official build:
Before this CL: 1 minute 40 seconds
After this CL: 5 seconds

BUG=chromium:638513

Review-Url: https://codereview.chromium.org/2414423002
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index c5f64a7..ba20ff6 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -8,6 +8,7 @@
 
 #include <memory>
 #include <set>
+#include <utility>
 #include <vector>
 
 #include "core/fpdfapi/cpdf_modulemgr.h"
@@ -240,83 +241,6 @@
   InsertWidthArrayImpl(widths, size, pWidthArray);
 }
 
-int InsertDeletePDFPage(CPDF_Document* pDoc,
-                        CPDF_Dictionary* pPages,
-                        int nPagesToGo,
-                        CPDF_Dictionary* pPage,
-                        FX_BOOL bInsert,
-                        std::set<CPDF_Dictionary*>* pVisited) {
-  CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
-  if (!pKidList)
-    return -1;
-
-  for (size_t i = 0; i < pKidList->GetCount(); i++) {
-    CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
-    if (pKid->GetStringFor("Type") == "Page") {
-      if (nPagesToGo == 0) {
-        if (bInsert) {
-          pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum()));
-          pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
-        } else {
-          pKidList->RemoveAt(i);
-        }
-        pPages->SetIntegerFor(
-            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
-        return 1;
-      }
-      nPagesToGo--;
-    } else {
-      int nPages = pKid->GetIntegerFor("Count");
-      if (nPagesToGo < nPages) {
-        if (pdfium::ContainsKey(*pVisited, pKid))
-          return -1;
-
-        pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
-        if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert,
-                                pVisited) < 0) {
-          return -1;
-        }
-        pPages->SetIntegerFor(
-            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
-        return 1;
-      }
-      nPagesToGo -= nPages;
-    }
-  }
-  return 0;
-}
-
-int InsertNewPage(CPDF_Document* pDoc,
-                  int iPage,
-                  CPDF_Dictionary* pPageDict,
-                  CFX_ArrayTemplate<uint32_t>& pageList) {
-  CPDF_Dictionary* pRoot = pDoc->GetRoot();
-  CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
-  if (!pPages)
-    return -1;
-
-  int nPages = pDoc->GetPageCount();
-  if (iPage < 0 || iPage > nPages)
-    return -1;
-
-  if (iPage == nPages) {
-    CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
-    if (!pPagesList) {
-      pPagesList = new CPDF_Array;
-      pPages->SetFor("Kids", pPagesList);
-    }
-    pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum()));
-    pPages->SetIntegerFor("Count", nPages + 1);
-    pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
-  } else {
-    std::set<CPDF_Dictionary*> stack = {pPages};
-    if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0)
-      return -1;
-  }
-  pageList.InsertAt(iPage, pPageDict->GetObjNum());
-  return iPage;
-}
-
 int CountPages(CPDF_Dictionary* pPages,
                std::set<CPDF_Dictionary*>* visited_pages) {
   int count = pPages->GetIntegerFor("Count");
@@ -413,6 +337,7 @@
       m_pParser(std::move(pParser)),
       m_pRootDict(nullptr),
       m_pInfoDict(nullptr),
+      m_iLastPageTraversed(-1),
       m_bLinearized(false),
       m_iFirstPageNo(0),
       m_dwFirstPageObjNum(0),
@@ -477,40 +402,63 @@
   m_PageList.SetSize(RetrievePageCount());
 }
 
-CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages,
-                                            int iPage,
-                                            int nPagesToGo,
-                                            int level) {
+CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage, int nPagesToGo) {
+  std::pair<CPDF_Dictionary*, int>* lastProc = &m_pTreeTraversal.top();
+  CPDF_Dictionary* pPages = lastProc->first;
   CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
-  if (!pKidList)
-    return nPagesToGo == 0 ? pPages : nullptr;
+  if (!pKidList) {
+    m_pTreeTraversal.pop();
+    if (nPagesToGo != 1)
+      return nullptr;
+    m_PageList.SetAt(iPage, pPages->GetObjNum());
+    return pPages;
+  }
 
-  if (level >= FX_MAX_PAGE_LEVEL)
+  if (m_pTreeTraversal.size() >= FX_MAX_PAGE_LEVEL) {
+    m_pTreeTraversal.pop();
     return nullptr;
+  }
 
-  for (size_t i = 0; i < pKidList->GetCount(); i++) {
+  bool shouldFinish = pPages->GetIntegerFor("Count") <= nPagesToGo;
+  CPDF_Dictionary* page = nullptr;
+  for (size_t i = lastProc->second + 1; i < pKidList->GetCount(); i++) {
     CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
     if (!pKid) {
       nPagesToGo--;
+      lastProc->second++;
       continue;
     }
-    if (pKid == pPages)
+    if (pKid == pPages) {
+      lastProc->second++;
       continue;
+    }
     if (!pKid->KeyExist("Kids")) {
-      if (nPagesToGo == 0)
-        return pKid;
-
-      m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum());
+      m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum());
       nPagesToGo--;
+      lastProc->second++;
+      if (nPagesToGo == 0) {
+        page = pKid;
+        break;
+      }
     } else {
       int nPages = pKid->GetIntegerFor("Count");
-      if (nPagesToGo < nPages)
-        return FindPDFPage(pKid, iPage, nPagesToGo, level + 1);
-
+      m_pTreeTraversal.push(std::make_pair(pKid, -1));
+      CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo);
+      // If the stack top is current element, kid was completely processed.
+      if (lastProc == &m_pTreeTraversal.top())
+        lastProc->second++;
+      if (nPagesToGo <= nPages) {
+        page = pageKid;
+        break;
+      }
       nPagesToGo -= nPages;
     }
   }
-  return nullptr;
+  if (shouldFinish ||
+      lastProc->second == static_cast<int>(pKidList->GetCount() - 1)) {
+    m_pTreeTraversal.pop();
+  }
+  return page;
 }
 
 CPDF_Dictionary* CPDF_Document::GetPagesDict() const {
@@ -534,21 +482,20 @@
   }
 
   int objnum = m_PageList.GetAt(iPage);
-  if (objnum) {
-    if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
-      return pDict;
+  if (!objnum || m_iLastPageTraversed < iPage) {
+    CPDF_Dictionary* pPages = GetPagesDict();
+    if (!pPages)
+      return nullptr;
+    if (m_pTreeTraversal.empty())
+      m_pTreeTraversal.push(std::make_pair(pPages, -1));
+    CPDF_Dictionary* page = TraversePDFPages(m_iLastPageTraversed + 1,
+                                             iPage - m_iLastPageTraversed);
+    m_iLastPageTraversed = iPage;
+    return page;
   }
-
-  CPDF_Dictionary* pPages = GetPagesDict();
-  if (!pPages)
-    return nullptr;
-
-  CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0);
-  if (!pPage)
-    return nullptr;
-
-  m_PageList.SetAt(iPage, pPage->GetObjNum());
-  return pPage;
+  if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
+    return pDict;
+  return nullptr;
 }
 
 void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) {
@@ -710,13 +657,91 @@
   CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool);
   pDict->SetNameFor("Type", "Page");
   uint32_t dwObjNum = AddIndirectObject(pDict);
-  if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) {
+  if (InsertNewPage(iPage, pDict, m_PageList) < 0) {
     ReleaseIndirectObject(dwObjNum);
     return nullptr;
   }
   return pDict;
 }
 
+int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages,
+                                       int nPagesToGo,
+                                       CPDF_Dictionary* pPage,
+                                       FX_BOOL bInsert,
+                                       std::set<CPDF_Dictionary*>* pVisited) {
+  CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
+  if (!pKidList)
+    return -1;
+
+  for (size_t i = 0; i < pKidList->GetCount(); i++) {
+    CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
+    if (pKid->GetStringFor("Type") == "Page") {
+      if (nPagesToGo == 0) {
+        if (bInsert) {
+          pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum()));
+          pPage->SetReferenceFor("Parent", this, pPages->GetObjNum());
+        } else {
+          pKidList->RemoveAt(i);
+        }
+        pPages->SetIntegerFor(
+            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+        // Tree will change, so reset tree transversal variables
+        m_iLastPageTraversed = -1;
+        m_pTreeTraversal = std::stack<std::pair<CPDF_Dictionary*, int>>();
+        return 1;
+      }
+      nPagesToGo--;
+    } else {
+      int nPages = pKid->GetIntegerFor("Count");
+      if (nPagesToGo < nPages) {
+        if (pdfium::ContainsKey(*pVisited, pKid))
+          return -1;
+
+        pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
+        if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) <
+            0) {
+          return -1;
+        }
+        pPages->SetIntegerFor(
+            "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+        return 1;
+      }
+      nPagesToGo -= nPages;
+    }
+  }
+  return 0;
+}
+
+int CPDF_Document::InsertNewPage(int iPage,
+                                 CPDF_Dictionary* pPageDict,
+                                 CFX_ArrayTemplate<uint32_t>& pageList) {
+  CPDF_Dictionary* pRoot = GetRoot();
+  CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
+  if (!pPages)
+    return -1;
+
+  int nPages = GetPageCount();
+  if (iPage < 0 || iPage > nPages)
+    return -1;
+
+  if (iPage == nPages) {
+    CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
+    if (!pPagesList) {
+      pPagesList = new CPDF_Array;
+      pPages->SetFor("Kids", pPagesList);
+    }
+    pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum()));
+    pPages->SetIntegerFor("Count", nPages + 1);
+    pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum());
+  } else {
+    std::set<CPDF_Dictionary*> stack = {pPages};
+    if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0)
+      return -1;
+  }
+  pageList.InsertAt(iPage, pPageDict->GetObjNum());
+  return iPage;
+}
+
 void CPDF_Document::DeletePage(int iPage) {
   CPDF_Dictionary* pPages = GetPagesDict();
   if (!pPages)
@@ -727,7 +752,7 @@
     return;
 
   std::set<CPDF_Dictionary*> stack = {pPages};
-  if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0)
+  if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0)
     return;
 
   m_PageList.RemoveAt(iPage);
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index c557a56..e845ea8 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -9,6 +9,7 @@
 
 #include <functional>
 #include <memory>
+#include <stack>
 
 #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
 #include "core/fpdfapi/parser/cpdf_object.h"
@@ -107,10 +108,7 @@
 
   // Retrieve page count information by getting count value from the tree nodes
   int RetrievePageCount() const;
-  CPDF_Dictionary* FindPDFPage(CPDF_Dictionary* pPages,
-                               int iPage,
-                               int nPagesToGo,
-                               int level);
+  CPDF_Dictionary* TraversePDFPages(int iPage, int nPagesToGo);
   int FindPageIndex(CPDF_Dictionary* pNode,
                     uint32_t& skip_count,
                     uint32_t objnum,
@@ -126,10 +124,19 @@
       FX_BOOL bVert,
       CFX_ByteString basefont,
       std::function<void(FX_WCHAR, FX_WCHAR, CPDF_Array*)> Insert);
-
+  int InsertDeletePDFPage(CPDF_Dictionary* pPages,
+                          int nPagesToGo,
+                          CPDF_Dictionary* pPage,
+                          FX_BOOL bInsert,
+                          std::set<CPDF_Dictionary*>* pVisited);
+  int InsertNewPage(int iPage,
+                    CPDF_Dictionary* pPageDict,
+                    CFX_ArrayTemplate<uint32_t>& pageList);
   std::unique_ptr<CPDF_Parser> m_pParser;
   CPDF_Dictionary* m_pRootDict;
   CPDF_Dictionary* m_pInfoDict;
+  std::stack<std::pair<CPDF_Dictionary*, int>> m_pTreeTraversal;
+  int m_iLastPageTraversed;
   bool m_bLinearized;
   int m_iFirstPageNo;
   uint32_t m_dwFirstPageObjNum;