Make CPDF_Document cache CPDF_Pages

We cache pages not by page number, which can bounce around as pages
are inserted or removed, but by page dictionary's object number.

Since the page may be created under one function and used under
another, we can't take the shortcut of not instantiating a render
cache nor not parsing the page.

Change-Id: I9a325cda8b3141153544ac53e78a51a44e6b411a
Reviewed-on: https://pdfium-review.googlesource.com/32830
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp
index 1244b12..859f5b3 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp
@@ -129,7 +129,7 @@
   auto pDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
   pDoc->CreateNewDoc();
   CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(0);
-  auto pTestPage = pdfium::MakeRetain<CPDF_Page>(pDoc.get(), pPageDict, false);
+  RetainPtr<CPDF_Page> pTestPage = pDoc->GetOrCreatePDFPage(pPageDict);
   CPDF_PageContentGenerator generator(pTestPage.Get());
   std::ostringstream buf;
   TestProcessPath(&generator, &buf, pPathObj.get());
@@ -168,7 +168,7 @@
   auto pDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
   pDoc->CreateNewDoc();
   CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(0);
-  auto pTestPage = pdfium::MakeRetain<CPDF_Page>(pDoc.get(), pPageDict, false);
+  RetainPtr<CPDF_Page> pTestPage = pDoc->GetOrCreatePDFPage(pPageDict);
   CPDF_PageContentGenerator generator(pTestPage.Get());
   auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
   CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc.get(), "Times-Roman");
@@ -231,7 +231,7 @@
   auto pDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
   pDoc->CreateNewDoc();
   CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(0);
-  auto pTestPage = pdfium::MakeRetain<CPDF_Page>(pDoc.get(), pPageDict, false);
+  RetainPtr<CPDF_Page> pTestPage = pDoc->GetOrCreatePDFPage(pPageDict);
   CPDF_PageContentGenerator generator(pTestPage.Get());
 
   std::ostringstream buf;
diff --git a/core/fpdfapi/page/cpdf_page.cpp b/core/fpdfapi/page/cpdf_page.cpp
index 7c0a323..c5c8563 100644
--- a/core/fpdfapi/page/cpdf_page.cpp
+++ b/core/fpdfapi/page/cpdf_page.cpp
@@ -21,11 +21,11 @@
 
 CPDF_Page::CPDF_Page(CPDF_Document* pDocument,
                      CPDF_Dictionary* pPageDict,
-                     bool bPageCache)
+                     bool bUseRenderCache)
     : CPDF_PageObjectHolder(pDocument, pPageDict),
       m_PageSize(100, 100),
       m_pPDFDocument(pDocument) {
-  if (bPageCache)
+  if (bUseRenderCache)
     m_pPageRender = pdfium::MakeUnique<CPDF_PageRenderCache>(this);
   if (!pPageDict)
     return;
diff --git a/core/fpdfapi/page/cpdf_page.h b/core/fpdfapi/page/cpdf_page.h
index 0bb99b7..b94326e 100644
--- a/core/fpdfapi/page/cpdf_page.h
+++ b/core/fpdfapi/page/cpdf_page.h
@@ -12,6 +12,7 @@
 #include "core/fpdfapi/page/cpdf_pageobjectholder.h"
 #include "core/fxcrt/fx_coordinates.h"
 #include "core/fxcrt/fx_system.h"
+#include "core/fxcrt/observable.h"
 #include "core/fxcrt/retain_ptr.h"
 #include "core/fxcrt/unowned_ptr.h"
 #include "third_party/base/optional.h"
@@ -22,7 +23,9 @@
 class CPDF_PageRenderCache;
 class CPDF_PageRenderContext;
 
-class CPDF_Page : public Retainable, public CPDF_PageObjectHolder {
+class CPDF_Page : public Retainable,
+                  public Observable<CPDF_Page>,
+                  public CPDF_PageObjectHolder {
  public:
   class View {};  // Caller implements as desired, empty here due to layering.
   class Extension : public Retainable {};  // XFA page parent class, layering.
@@ -66,7 +69,7 @@
  private:
   CPDF_Page(CPDF_Document* pDocument,
             CPDF_Dictionary* pPageDict,
-            bool bPageCache);
+            bool bUseRenderCache);
   ~CPDF_Page() override;
 
   void StartParse();
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 7968d1b..2e4baab 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -602,6 +602,19 @@
   return true;
 }
 
+RetainPtr<CPDF_Page> CPDF_Document::GetOrCreatePDFPage(
+    CPDF_Dictionary* pPageDict) {
+  std::pair<uint32_t, uint32_t> key = {pPageDict->GetObjNum(),
+                                       pPageDict->GetGenNum()};
+  if (m_PageMap[key])
+    return RetainPtr<CPDF_Page>(m_PageMap[key].Get());
+
+  auto pPage = pdfium::MakeRetain<CPDF_Page>(this, pPageDict, true);
+  pPage->ParseContent();
+  m_PageMap[key].Reset(pPage.Get());
+  return pPage;
+}
+
 void CPDF_Document::DeletePage(int iPage) {
   CPDF_Dictionary* pPages = GetPagesDict();
   if (!pPages)
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index c2774b1..c240e77 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -8,6 +8,7 @@
 #define CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_
 
 #include <functional>
+#include <map>
 #include <memory>
 #include <set>
 #include <utility>
@@ -64,6 +65,7 @@
   const CPDF_Dictionary* GetInfo() const { return m_pInfoDict.Get(); }
   CPDF_Dictionary* GetInfo() { return m_pInfoDict.Get(); }
 
+  RetainPtr<CPDF_Page> GetOrCreatePDFPage(CPDF_Dictionary* pPageDict);
   void DeletePage(int iPage);
   int GetPageCount() const;
   bool IsPageLoaded(int iPage) const;
@@ -167,7 +169,13 @@
   std::unique_ptr<CPDF_DocRenderData> m_pDocRender;
   std::unique_ptr<JBig2_DocumentContext> m_pCodecContext;
   std::unique_ptr<CPDF_LinkList> m_pLinksContext;
-  std::vector<uint32_t> m_PageList;  // Page number to page's dict objnum.
+
+  // Page number (index) to page's dict objnum.
+  std::vector<uint32_t> m_PageList;
+
+  // Dict {objnum, gennum} to page mapping.
+  std::map<std::pair<uint32_t, uint32_t>, CPDF_Page::ObservedPtr> m_PageMap;
+
   UnownedPtr<Extension> m_pExtension;
 };
 
diff --git a/fpdfsdk/fpdf_doc_embeddertest.cpp b/fpdfsdk/fpdf_doc_embeddertest.cpp
index c1f5e02..a9e4701 100644
--- a/fpdfsdk/fpdf_doc_embeddertest.cpp
+++ b/fpdfsdk/fpdf_doc_embeddertest.cpp
@@ -27,11 +27,7 @@
     ref.reset(FPDF_LoadPage(document(), 0));
     unique_pages.insert(ref.get());
   }
-#ifdef PDF_ENABLE_XFA
   EXPECT_EQ(1u, unique_pages.size());
-#else   // PDF_ENABLE_XFA
-  EXPECT_EQ(4u, unique_pages.size());
-#endif  // PDF_ENABLE_XFA
 }
 
 TEST_F(FPDFDocEmbeddertest, DestGetPageIndex) {
diff --git a/fpdfsdk/fpdf_editpage.cpp b/fpdfsdk/fpdf_editpage.cpp
index 39cf85f..9fb6c1f 100644
--- a/fpdfsdk/fpdf_editpage.cpp
+++ b/fpdfsdk/fpdf_editpage.cpp
@@ -196,8 +196,7 @@
   // Eventually, fallthru into non-XFA case once page type is consistent.
   return nullptr;
 #else  // PDF_ENABLE_XFA
-  auto pPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pPageDict, true);
-  pPage->ParseContent();
+  RetainPtr<CPDF_Page> pPage = pDoc->GetOrCreatePDFPage(pPageDict);
   return FPDFPageFromUnderlying(pPage.Leak());  // Caller takes ownership.
 #endif  // PDF_ENABLE_XFA
 }
diff --git a/fpdfsdk/fpdf_flatten.cpp b/fpdfsdk/fpdf_flatten.cpp
index 720fa9f..2d81f12 100644
--- a/fpdfsdk/fpdf_flatten.cpp
+++ b/fpdfsdk/fpdf_flatten.cpp
@@ -47,9 +47,7 @@
 void GetContentsRect(CPDF_Document* pDoc,
                      CPDF_Dictionary* pDict,
                      std::vector<CFX_FloatRect>* pRectArray) {
-  auto pPDFPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict, false);
-  pPDFPage->ParseContent();
-
+  RetainPtr<CPDF_Page> pPDFPage = pDoc->GetOrCreatePDFPage(pDict);
   for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) {
     CFX_FloatRect rc;
     rc.left = pPageObject->m_Left;
diff --git a/fpdfsdk/fpdf_ppo.cpp b/fpdfsdk/fpdf_ppo.cpp
index b730424..4ea3334 100644
--- a/fpdfsdk/fpdf_ppo.cpp
+++ b/fpdfsdk/fpdf_ppo.cpp
@@ -618,7 +618,7 @@
       if (!pSrcPageDict)
         return false;
 
-      auto srcPage = pdfium::MakeRetain<CPDF_Page>(src(), pSrcPageDict, true);
+      RetainPtr<CPDF_Page> srcPage = src()->GetOrCreatePDFPage(pSrcPageDict);
       NupPageSettings settings =
           nupState.CalculateNewPagePosition(srcPage->GetPageSize());
       AddSubPage(pSrcPageDict, settings, &objectNumberMap, &pageXObjectMap,
diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp
index af91175..9af0ca4 100644
--- a/fpdfsdk/fpdf_view.cpp
+++ b/fpdfsdk/fpdf_view.cpp
@@ -354,8 +354,7 @@
   if (!pDict)
     return nullptr;
 
-  auto pPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict, true);
-  pPage->ParseContent();
+  RetainPtr<CPDF_Page> pPage = pDoc->GetOrCreatePDFPage(pDict);
   return FPDFPageFromUnderlying(pPage.Leak());
 #endif  // PDF_ENABLE_XFA
 }
@@ -962,7 +961,7 @@
   if (!pDict)
     return false;
 
-  auto page = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict, true);
+  RetainPtr<CPDF_Page> page = pDoc->GetOrCreatePDFPage(pDict);
   *width = page->GetPageWidth();
   *height = page->GetPageHeight();
   return true;
diff --git a/fpdfsdk/fpdfxfa/cpdfxfa_page.cpp b/fpdfsdk/fpdfxfa/cpdfxfa_page.cpp
index 4d7e3bc..f412089 100644
--- a/fpdfsdk/fpdfxfa/cpdfxfa_page.cpp
+++ b/fpdfsdk/fpdfxfa/cpdfxfa_page.cpp
@@ -34,10 +34,9 @@
   if (!pDict)
     return false;
 
-  if (!m_pPDFPage || m_pPDFPage->GetFormDict() != pDict) {
-    m_pPDFPage = pdfium::MakeRetain<CPDF_Page>(pPDFDoc, pDict, true);
-    m_pPDFPage->ParseContent();
-  }
+  if (!m_pPDFPage || m_pPDFPage->GetFormDict() != pDict)
+    m_pPDFPage = pPDFDoc->GetOrCreatePDFPage(pDict);
+
   return true;
 }
 
@@ -80,9 +79,7 @@
   if (!m_pContext || m_iPageIndex < 0 || !pageDict)
     return false;
 
-  m_pPDFPage =
-      pdfium::MakeRetain<CPDF_Page>(m_pContext->GetPDFDoc(), pageDict, true);
-  m_pPDFPage->ParseContent();
+  m_pPDFPage = m_pContext->GetPDFDoc()->GetOrCreatePDFPage(pageDict);
   return true;
 }
 
diff --git a/fxjs/cjs_document.cpp b/fxjs/cjs_document.cpp
index 6657706..3a4ca27 100644
--- a/fxjs/cjs_document.cpp
+++ b/fxjs/cjs_document.cpp
@@ -1256,8 +1256,7 @@
   if (!pPageDict)
     return CJS_Return(false);
 
-  auto page = pdfium::MakeRetain<CPDF_Page>(pDocument, pPageDict, true);
-  page->ParseContent();
+  RetainPtr<CPDF_Page> page = pDocument->GetOrCreatePDFPage(pPageDict);
 
   int nWords = 0;
   WideString swRet;
@@ -1305,8 +1304,7 @@
   if (!pPageDict)
     return CJS_Return(false);
 
-  auto page = pdfium::MakeRetain<CPDF_Page>(pDocument, pPageDict, true);
-  page->ParseContent();
+  RetainPtr<CPDF_Page> page = pDocument->GetOrCreatePDFPage(pPageDict);
 
   int nWords = 0;
   for (auto& pPageObj : *page->GetPageObjectList()) {