Fix rendering issue for PDFs with incorrect first-page hint data.
For a linearized PDF, sometimes the hint table can contain incorrect
data. For example, The test file in crbug.com/1012237 has a hint table
which points the first page to the wrong object, and set the wrong
object number into |m_PageList[0]|. This results in fetching a
dictionary of a non-page-type object for the first page and rendering
the first page blank.
To fix the issue, this CL creates CPDF_Document::IsValidPageObject() to
validate if an object's type is "Page", and add this validation in
CPDF_Document::LoadPages(). If the check fails, don't set this wrong
object number to |m_PageList[0]|. Instead, traverse the page tree to
update |m_PageList|.
Bug: chromium:1012237
Change-Id: I1e8745a48ed0cabe5478e9e8e1dda51fa0a8d53f
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/67490
Commit-Queue: Hui Yingst <nigi@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 467f8a5..5e93c9a 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -111,6 +111,16 @@
CPDF_Document::~CPDF_Document() = default;
+// static
+bool CPDF_Document::IsValidPageObject(const CPDF_Object* obj) {
+ const CPDF_Dictionary* dict = ToDictionary(obj);
+ if (!dict)
+ return false;
+
+ const CPDF_Name* name = ToName(dict->GetObjectFor("Type"));
+ return name && name->GetString() == "Page";
+}
+
RetainPtr<CPDF_Object> CPDF_Document::ParseIndirectObject(uint32_t objnum) {
return m_pParser ? m_pParser->ParseIndirectObject(objnum) : nullptr;
}
@@ -152,10 +162,17 @@
return;
}
- m_PageList.resize(linearized_header->GetPageCount());
- ASSERT(linearized_header->GetFirstPageNo() < m_PageList.size());
- m_PageList[linearized_header->GetFirstPageNo()] =
- linearized_header->GetFirstPageObjNum();
+ uint32_t objnum = linearized_header->GetFirstPageObjNum();
+ if (!IsValidPageObject(GetOrParseIndirectObject(objnum))) {
+ m_PageList.resize(RetrievePageCount());
+ return;
+ }
+
+ uint32_t first_page_num = linearized_header->GetFirstPageNo();
+ uint32_t page_count = linearized_header->GetPageCount();
+ ASSERT(first_page_num < page_count);
+ m_PageList.resize(page_count);
+ m_PageList[first_page_num] = objnum;
}
CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage,
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index abffb81..cf738f2 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -76,6 +76,8 @@
static const int kPageMaxNum = 0xFFFFF;
+ static bool IsValidPageObject(const CPDF_Object* obj);
+
CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,
std::unique_ptr<PageDataIface> pPageData);
~CPDF_Document() override;
diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp
index c469743..c2a964e 100644
--- a/core/fpdfapi/parser/cpdf_document_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp
@@ -224,6 +224,28 @@
EXPECT_EQ(6, page->GetIntegerFor("PageNumbering"));
}
+TEST_F(cpdf_document_test, IsValidPageObject) {
+ CPDF_TestDocumentForPages document;
+
+ auto dict_type_name_page = pdfium::MakeRetain<CPDF_Dictionary>();
+ dict_type_name_page->SetNewFor<CPDF_Name>("Type", "Page");
+ EXPECT_TRUE(CPDF_Document::IsValidPageObject(
+ document.AddIndirectObject(dict_type_name_page)));
+
+ auto dict_type_string_page = pdfium::MakeRetain<CPDF_Dictionary>();
+ dict_type_string_page->SetNewFor<CPDF_String>("Type", "Page", false);
+ EXPECT_FALSE(CPDF_Document::IsValidPageObject(
+ document.AddIndirectObject(dict_type_string_page)));
+
+ auto dict_type_name_font = pdfium::MakeRetain<CPDF_Dictionary>();
+ dict_type_name_font->SetNewFor<CPDF_Name>("Type", "Font");
+ EXPECT_FALSE(CPDF_Document::IsValidPageObject(
+ document.AddIndirectObject(dict_type_name_font)));
+
+ CPDF_Object* obj_no_type = document.NewIndirect<CPDF_Dictionary>();
+ EXPECT_FALSE(CPDF_Document::IsValidPageObject(obj_no_type));
+}
+
TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) {
// ObjNum can be added in CPDF_DataAvail::IsPageAvail(), and PagesDict may not
// exist in this case, e.g. when hint table is used to page check in