Fix rendering issue for PDFs with incorrect first-page hint data.

For a linearized PDF, sometimes the hint table can contain incorrect
data. For example, The test file in crbug.com/1012237 has a hint table
which points the first page to the wrong object, and set the wrong
object number into |m_PageList[0]|. This results in fetching a
dictionary of a non-page-type object for the first page and rendering
the first page blank.

To fix the issue, this CL creates CPDF_Document::IsValidPageObject() to
validate if an object's type is "Page", and add this validation in
CPDF_Document::LoadPages(). If the check fails, don't set this wrong
object number to |m_PageList[0]|. Instead, traverse the page tree to
update |m_PageList|.

Bug: chromium:1012237
Change-Id: I1e8745a48ed0cabe5478e9e8e1dda51fa0a8d53f
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/67490
Commit-Queue: Hui Yingst <nigi@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 467f8a5..5e93c9a 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -111,6 +111,16 @@
 
 CPDF_Document::~CPDF_Document() = default;
 
+// static
+bool CPDF_Document::IsValidPageObject(const CPDF_Object* obj) {
+  const CPDF_Dictionary* dict = ToDictionary(obj);
+  if (!dict)
+    return false;
+
+  const CPDF_Name* name = ToName(dict->GetObjectFor("Type"));
+  return name && name->GetString() == "Page";
+}
+
 RetainPtr<CPDF_Object> CPDF_Document::ParseIndirectObject(uint32_t objnum) {
   return m_pParser ? m_pParser->ParseIndirectObject(objnum) : nullptr;
 }
@@ -152,10 +162,17 @@
     return;
   }
 
-  m_PageList.resize(linearized_header->GetPageCount());
-  ASSERT(linearized_header->GetFirstPageNo() < m_PageList.size());
-  m_PageList[linearized_header->GetFirstPageNo()] =
-      linearized_header->GetFirstPageObjNum();
+  uint32_t objnum = linearized_header->GetFirstPageObjNum();
+  if (!IsValidPageObject(GetOrParseIndirectObject(objnum))) {
+    m_PageList.resize(RetrievePageCount());
+    return;
+  }
+
+  uint32_t first_page_num = linearized_header->GetFirstPageNo();
+  uint32_t page_count = linearized_header->GetPageCount();
+  ASSERT(first_page_num < page_count);
+  m_PageList.resize(page_count);
+  m_PageList[first_page_num] = objnum;
 }
 
 CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage,
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index abffb81..cf738f2 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -76,6 +76,8 @@
 
   static const int kPageMaxNum = 0xFFFFF;
 
+  static bool IsValidPageObject(const CPDF_Object* obj);
+
   CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,
                 std::unique_ptr<PageDataIface> pPageData);
   ~CPDF_Document() override;
diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp
index c469743..c2a964e 100644
--- a/core/fpdfapi/parser/cpdf_document_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp
@@ -224,6 +224,28 @@
   EXPECT_EQ(6, page->GetIntegerFor("PageNumbering"));
 }
 
+TEST_F(cpdf_document_test, IsValidPageObject) {
+  CPDF_TestDocumentForPages document;
+
+  auto dict_type_name_page = pdfium::MakeRetain<CPDF_Dictionary>();
+  dict_type_name_page->SetNewFor<CPDF_Name>("Type", "Page");
+  EXPECT_TRUE(CPDF_Document::IsValidPageObject(
+      document.AddIndirectObject(dict_type_name_page)));
+
+  auto dict_type_string_page = pdfium::MakeRetain<CPDF_Dictionary>();
+  dict_type_string_page->SetNewFor<CPDF_String>("Type", "Page", false);
+  EXPECT_FALSE(CPDF_Document::IsValidPageObject(
+      document.AddIndirectObject(dict_type_string_page)));
+
+  auto dict_type_name_font = pdfium::MakeRetain<CPDF_Dictionary>();
+  dict_type_name_font->SetNewFor<CPDF_Name>("Type", "Font");
+  EXPECT_FALSE(CPDF_Document::IsValidPageObject(
+      document.AddIndirectObject(dict_type_name_font)));
+
+  CPDF_Object* obj_no_type = document.NewIndirect<CPDF_Dictionary>();
+  EXPECT_FALSE(CPDF_Document::IsValidPageObject(obj_no_type));
+}
+
 TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) {
   // ObjNum can be added in CPDF_DataAvail::IsPageAvail(), and PagesDict may not
   // exist in this case, e.g. when hint table is used to page check in