| // Copyright 2016 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "core/fpdfapi/parser/cpdf_data_avail.h" |
| |
| #include <algorithm> |
| #include <memory> |
| #include <utility> |
| |
| #include "core/fpdfapi/parser/cpdf_array.h" |
| #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h" |
| #include "core/fpdfapi/parser/cpdf_dictionary.h" |
| #include "core/fpdfapi/parser/cpdf_document.h" |
| #include "core/fpdfapi/parser/cpdf_hint_tables.h" |
| #include "core/fpdfapi/parser/cpdf_linearized_header.h" |
| #include "core/fpdfapi/parser/cpdf_name.h" |
| #include "core/fpdfapi/parser/cpdf_number.h" |
| #include "core/fpdfapi/parser/cpdf_page_object_avail.h" |
| #include "core/fpdfapi/parser/cpdf_read_validator.h" |
| #include "core/fpdfapi/parser/cpdf_reference.h" |
| #include "core/fpdfapi/parser/cpdf_stream.h" |
| #include "core/fpdfapi/parser/cpdf_syntax_parser.h" |
| #include "core/fpdfapi/parser/fpdf_parser_utility.h" |
| #include "core/fxcrt/fx_extension.h" |
| #include "core/fxcrt/fx_safe_types.h" |
| #include "third_party/base/compiler_specific.h" |
| #include "third_party/base/notreached.h" |
| #include "third_party/base/numerics/safe_conversions.h" |
| #include "third_party/base/stl_util.h" |
| |
| namespace { |
| |
| // static |
| CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) { |
| constexpr size_t kMaxHierarchyDepth = 64; |
| size_t depth = 0; |
| |
| CPDF_Dictionary* dictionary_to_check = pDict; |
| while (dictionary_to_check) { |
| CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources"); |
| if (result) |
| return result; |
| CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent"); |
| dictionary_to_check = parent ? parent->GetDict() : nullptr; |
| |
| if (++depth > kMaxHierarchyDepth) { |
| // We have cycle in parents hierarchy. |
| return nullptr; |
| } |
| } |
| return nullptr; |
| } |
| |
| class HintsScope { |
| public: |
| HintsScope(RetainPtr<CPDF_ReadValidator> validator, |
| CPDF_DataAvail::DownloadHints* hints) |
| : validator_(std::move(validator)) { |
| ASSERT(validator_); |
| validator_->SetDownloadHints(hints); |
| } |
| |
| ~HintsScope() { validator_->SetDownloadHints(nullptr); } |
| |
| private: |
| RetainPtr<CPDF_ReadValidator> validator_; |
| }; |
| |
| } // namespace |
| |
| CPDF_DataAvail::FileAvail::~FileAvail() = default; |
| |
| CPDF_DataAvail::DownloadHints::~DownloadHints() = default; |
| |
| CPDF_DataAvail::CPDF_DataAvail( |
| FileAvail* pFileAvail, |
| const RetainPtr<IFX_SeekableReadStream>& pFileRead, |
| bool bSupportHintTable) |
| : m_pFileRead( |
| pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, pFileAvail)), |
| m_dwFileLen(m_pFileRead->GetSize()), |
| m_bSupportHintTable(bSupportHintTable) {} |
| |
| CPDF_DataAvail::~CPDF_DataAvail() { |
| m_pHintTables.reset(); |
| if (m_pDocument) |
| m_pDocument->RemoveObserver(this); |
| } |
| |
| void CPDF_DataAvail::OnObservableDestroyed() { |
| m_pDocument = nullptr; |
| m_pFormAvail.reset(); |
| m_PagesArray.clear(); |
| m_PagesObjAvail.clear(); |
| m_PagesResourcesAvail.clear(); |
| } |
| |
| CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( |
| DownloadHints* pHints) { |
| if (!m_dwFileLen) |
| return DataError; |
| |
| const HintsScope hints_scope(GetValidator(), pHints); |
| while (!m_bDocAvail) { |
| if (!CheckDocStatus()) |
| return DataNotAvailable; |
| } |
| |
| return DataAvailable; |
| } |
| |
| bool CPDF_DataAvail::CheckDocStatus() { |
| switch (m_docStatus) { |
| case PDF_DATAAVAIL_HEADER: |
| return CheckHeader(); |
| case PDF_DATAAVAIL_FIRSTPAGE: |
| return CheckFirstPage(); |
| case PDF_DATAAVAIL_HINTTABLE: |
| return CheckHintTables(); |
| case PDF_DATAAVAIL_LOADALLCROSSREF: |
| return CheckAndLoadAllXref(); |
| case PDF_DATAAVAIL_LOADALLFILE: |
| return LoadAllFile(); |
| case PDF_DATAAVAIL_ROOT: |
| return CheckRoot(); |
| case PDF_DATAAVAIL_INFO: |
| return CheckInfo(); |
| case PDF_DATAAVAIL_PAGETREE: |
| if (m_bTotalLoadPageTree) |
| return CheckPages(); |
| return LoadDocPages(); |
| case PDF_DATAAVAIL_PAGE: |
| if (m_bTotalLoadPageTree) |
| return CheckPage(); |
| m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; |
| return true; |
| case PDF_DATAAVAIL_ERROR: |
| return LoadAllFile(); |
| case PDF_DATAAVAIL_PAGE_LATERLOAD: |
| m_docStatus = PDF_DATAAVAIL_PAGE; |
| FALLTHROUGH; |
| default: |
| m_bDocAvail = true; |
| return true; |
| } |
| } |
| |
| bool CPDF_DataAvail::CheckPageStatus() { |
| switch (m_docStatus) { |
| case PDF_DATAAVAIL_PAGETREE: |
| return CheckPages(); |
| case PDF_DATAAVAIL_PAGE: |
| return CheckPage(); |
| case PDF_DATAAVAIL_ERROR: |
| return LoadAllFile(); |
| default: |
| m_bPagesTreeLoad = true; |
| m_bPagesLoad = true; |
| return true; |
| } |
| } |
| |
| bool CPDF_DataAvail::LoadAllFile() { |
| if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) { |
| m_docStatus = PDF_DATAAVAIL_DONE; |
| return true; |
| } |
| return false; |
| } |
| |
| bool CPDF_DataAvail::CheckAndLoadAllXref() { |
| if (!m_pCrossRefAvail) { |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef(); |
| if (GetValidator()->has_read_problems()) |
| return false; |
| |
| if (last_xref_offset <= 0) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(), |
| last_xref_offset); |
| } |
| |
| switch (m_pCrossRefAvail->CheckAvail()) { |
| case DocAvailStatus::DataAvailable: |
| break; |
| case DocAvailStatus::DataNotAvailable: |
| return false; |
| case DocAvailStatus::DataError: |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| default: |
| NOTREACHED(); |
| return false; |
| } |
| |
| if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) && |
| !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) { |
| m_docStatus = PDF_DATAAVAIL_LOADALLFILE; |
| return false; |
| } |
| |
| m_docStatus = PDF_DATAAVAIL_ROOT; |
| return true; |
| } |
| |
| RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum, |
| bool* pExistInFile) { |
| CPDF_Parser* pParser = nullptr; |
| |
| if (pExistInFile) |
| *pExistInFile = true; |
| |
| pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser; |
| |
| RetainPtr<CPDF_Object> pRet; |
| if (pParser) { |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| pRet = pParser->ParseIndirectObject(objnum); |
| if (GetValidator()->has_read_problems()) |
| return nullptr; |
| } |
| |
| if (!pRet && pExistInFile) |
| *pExistInFile = false; |
| |
| return pRet; |
| } |
| |
| bool CPDF_DataAvail::CheckInfo() { |
| const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum(); |
| if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) { |
| m_docStatus = PDF_DATAAVAIL_PAGETREE; |
| return true; |
| } |
| |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| m_parser.ParseIndirectObject(dwInfoObjNum); |
| if (GetValidator()->has_read_problems()) |
| return false; |
| |
| m_docStatus = PDF_DATAAVAIL_PAGETREE; |
| return true; |
| } |
| |
| bool CPDF_DataAvail::CheckRoot() { |
| const uint32_t dwRootObjNum = m_parser.GetRootObjNum(); |
| if (dwRootObjNum == CPDF_Object::kInvalidObjNum) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return true; |
| } |
| |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum)); |
| if (GetValidator()->has_read_problems()) |
| return false; |
| |
| const CPDF_Reference* pRef = |
| ToReference(m_pRoot ? m_pRoot->GetObjectFor("Pages") : nullptr); |
| if (!pRef) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| m_PagesObjNum = pRef->GetRefObjNum(); |
| m_docStatus = PDF_DATAAVAIL_INFO; |
| return true; |
| } |
| |
| bool CPDF_DataAvail::PreparePageItem() { |
| const CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); |
| const CPDF_Reference* pRef = |
| ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr); |
| if (!pRef) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| m_PagesObjNum = pRef->GetRefObjNum(); |
| m_docStatus = PDF_DATAAVAIL_PAGETREE; |
| return true; |
| } |
| |
| bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) { |
| return m_pageMapCheckState.insert(dwPage).second; |
| } |
| |
| void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) { |
| m_pageMapCheckState.erase(dwPage); |
| } |
| |
| bool CPDF_DataAvail::CheckPage() { |
| std::vector<uint32_t> UnavailObjList; |
| for (uint32_t dwPageObjNum : m_PageObjList) { |
| bool bExists = false; |
| RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists); |
| if (!pObj) { |
| if (bExists) |
| UnavailObjList.push_back(dwPageObjNum); |
| continue; |
| } |
| CPDF_Array* pArray = ToArray(pObj.Get()); |
| if (pArray) { |
| CPDF_ArrayLocker locker(pArray); |
| for (const auto& pArrayObj : locker) { |
| if (CPDF_Reference* pRef = ToReference(pArrayObj.Get())) |
| UnavailObjList.push_back(pRef->GetRefObjNum()); |
| } |
| } |
| if (!pObj->IsDictionary()) |
| continue; |
| |
| ByteString type = pObj->GetDict()->GetNameFor("Type"); |
| if (type == "Pages") { |
| m_PagesArray.push_back(std::move(pObj)); |
| continue; |
| } |
| } |
| m_PageObjList.clear(); |
| if (!UnavailObjList.empty()) { |
| m_PageObjList = std::move(UnavailObjList); |
| return false; |
| } |
| size_t iPages = m_PagesArray.size(); |
| for (size_t i = 0; i < iPages; ++i) { |
| RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]); |
| if (pPages && !GetPageKids(pPages.Get())) { |
| m_PagesArray.clear(); |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| } |
| m_PagesArray.clear(); |
| if (m_PageObjList.empty()) |
| m_docStatus = PDF_DATAAVAIL_DONE; |
| |
| return true; |
| } |
| |
| bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) { |
| CPDF_Dictionary* pDict = pPages->GetDict(); |
| CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr; |
| if (!pKids) |
| return true; |
| |
| switch (pKids->GetType()) { |
| case CPDF_Object::kReference: |
| m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum()); |
| break; |
| case CPDF_Object::kArray: { |
| CPDF_Array* pKidsArray = pKids->AsArray(); |
| for (size_t i = 0; i < pKidsArray->size(); ++i) { |
| if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i))) |
| m_PageObjList.push_back(pRef->GetRefObjNum()); |
| } |
| break; |
| } |
| default: |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| return true; |
| } |
| |
| bool CPDF_DataAvail::CheckPages() { |
| bool bExists = false; |
| RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists); |
| if (!bExists) { |
| m_docStatus = PDF_DATAAVAIL_LOADALLFILE; |
| return true; |
| } |
| |
| if (!pPages) { |
| if (m_docStatus == PDF_DATAAVAIL_ERROR) { |
| m_docStatus = PDF_DATAAVAIL_LOADALLFILE; |
| return true; |
| } |
| return false; |
| } |
| |
| if (!GetPageKids(pPages.Get())) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| m_docStatus = PDF_DATAAVAIL_PAGE; |
| return true; |
| } |
| |
| bool CPDF_DataAvail::CheckHeader() { |
| switch (CheckHeaderAndLinearized()) { |
| case DocAvailStatus::DataAvailable: |
| m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE |
| : PDF_DATAAVAIL_LOADALLCROSSREF; |
| return true; |
| case DocAvailStatus::DataNotAvailable: |
| return false; |
| case DocAvailStatus::DataError: |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return true; |
| default: |
| NOTREACHED(); |
| return false; |
| } |
| } |
| |
| bool CPDF_DataAvail::CheckFirstPage() { |
| if (!m_pLinearized->GetFirstPageEndOffset() || |
| !m_pLinearized->GetFileSize() || |
| !m_pLinearized->GetMainXRefTableFirstEntryOffset()) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); |
| dwEnd += 512; |
| if ((FX_FILESIZE)dwEnd > m_dwFileLen) |
| dwEnd = (uint32_t)m_dwFileLen; |
| |
| const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen; |
| const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0; |
| if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos, |
| data_size)) |
| return false; |
| |
| m_docStatus = |
| m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; |
| return true; |
| } |
| |
| bool CPDF_DataAvail::CheckHintTables() { |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| m_pHintTables = |
| CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get()); |
| |
| if (GetValidator()->read_error()) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return true; |
| } |
| if (GetValidator()->has_unavailable_data()) |
| return false; |
| |
| m_docStatus = PDF_DATAAVAIL_DONE; |
| return true; |
| } |
| |
| RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt( |
| FX_FILESIZE pos, |
| uint32_t objnum, |
| CPDF_IndirectObjectHolder* pObjList) const { |
| const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos(); |
| GetSyntaxParser()->SetPos(pos); |
| RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject( |
| pObjList, CPDF_SyntaxParser::ParseType::kLoose); |
| GetSyntaxParser()->SetPos(SavedPos); |
| return (result && (!objnum || result->GetObjNum() == objnum)) |
| ? std::move(result) |
| : nullptr; |
| } |
| |
| CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { |
| switch (CheckHeaderAndLinearized()) { |
| case DocAvailStatus::DataAvailable: |
| return m_pLinearized ? DocLinearizationStatus::Linearized |
| : DocLinearizationStatus::NotLinearized; |
| case DocAvailStatus::DataNotAvailable: |
| return DocLinearizationStatus::LinearizationUnknown; |
| case DocAvailStatus::DataError: |
| return DocLinearizationStatus::NotLinearized; |
| default: |
| NOTREACHED(); |
| return DocLinearizationStatus::LinearizationUnknown; |
| } |
| } |
| |
| CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() { |
| if (m_bHeaderAvail) |
| return DocAvailStatus::DataAvailable; |
| |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| const Optional<FX_FILESIZE> header_offset = GetHeaderOffset(GetValidator()); |
| if (GetValidator()->has_read_problems()) |
| return DocAvailStatus::DataNotAvailable; |
| |
| if (!header_offset) |
| return DocAvailStatus::DataError; |
| |
| m_parser.m_pSyntax = |
| std::make_unique<CPDF_SyntaxParser>(GetValidator(), *header_offset); |
| m_pLinearized = m_parser.ParseLinearizedHeader(); |
| if (GetValidator()->has_read_problems()) |
| return DocAvailStatus::DataNotAvailable; |
| |
| m_bHeaderAvail = true; |
| return DocAvailStatus::DataAvailable; |
| } |
| |
| bool CPDF_DataAvail::CheckPage(uint32_t dwPage) { |
| while (true) { |
| switch (m_docStatus) { |
| case PDF_DATAAVAIL_PAGETREE: |
| if (!LoadDocPages()) |
| return false; |
| break; |
| case PDF_DATAAVAIL_PAGE: |
| if (!LoadDocPage(dwPage)) |
| return false; |
| break; |
| case PDF_DATAAVAIL_ERROR: |
| return LoadAllFile(); |
| default: |
| m_bPagesTreeLoad = true; |
| m_bPagesLoad = true; |
| m_bCurPageDictLoadOK = true; |
| m_docStatus = PDF_DATAAVAIL_PAGE; |
| return true; |
| } |
| } |
| } |
| |
| bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo, |
| PageNode* pPageNode) { |
| bool bExists = false; |
| RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists); |
| if (!bExists) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| if (!pPages) |
| return false; |
| |
| CPDF_Array* pArray = pPages->AsArray(); |
| if (!pArray) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| pPageNode->m_type = PDF_PAGENODE_PAGES; |
| for (size_t i = 0; i < pArray->size(); ++i) { |
| CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i)); |
| if (!pKid) |
| continue; |
| |
| auto pNode = std::make_unique<PageNode>(); |
| pNode->m_dwPageNo = pKid->GetRefObjNum(); |
| pPageNode->m_ChildNodes.push_back(std::move(pNode)); |
| } |
| return true; |
| } |
| |
| bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo, |
| PageNode* pPageNode) { |
| bool bExists = false; |
| RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists); |
| if (!bExists) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| if (!pPage) |
| return false; |
| |
| if (pPage->IsArray()) { |
| pPageNode->m_dwPageNo = dwPageNo; |
| pPageNode->m_type = PDF_PAGENODE_ARRAY; |
| return true; |
| } |
| |
| if (!pPage->IsDictionary()) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| pPageNode->m_dwPageNo = dwPageNo; |
| CPDF_Dictionary* pDict = pPage->GetDict(); |
| const ByteString type = pDict->GetNameFor("Type"); |
| if (type == "Page") { |
| pPageNode->m_type = PDF_PAGENODE_PAGE; |
| return true; |
| } |
| |
| if (type != "Pages") { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| |
| pPageNode->m_type = PDF_PAGENODE_PAGES; |
| CPDF_Object* pKids = pDict->GetObjectFor("Kids"); |
| if (!pKids) { |
| m_docStatus = PDF_DATAAVAIL_PAGE; |
| return true; |
| } |
| |
| switch (pKids->GetType()) { |
| case CPDF_Object::kReference: { |
| CPDF_Reference* pKid = pKids->AsReference(); |
| auto pNode = std::make_unique<PageNode>(); |
| pNode->m_dwPageNo = pKid->GetRefObjNum(); |
| pPageNode->m_ChildNodes.push_back(std::move(pNode)); |
| break; |
| } |
| case CPDF_Object::kArray: { |
| CPDF_Array* pKidsArray = pKids->AsArray(); |
| for (size_t i = 0; i < pKidsArray->size(); ++i) { |
| CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i)); |
| if (!pKid) |
| continue; |
| |
| auto pNode = std::make_unique<PageNode>(); |
| pNode->m_dwPageNo = pKid->GetRefObjNum(); |
| pPageNode->m_ChildNodes.push_back(std::move(pNode)); |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| return true; |
| } |
| |
| bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, |
| int32_t iPage, |
| int32_t& iCount, |
| int level) { |
| if (level >= kMaxPageRecursionDepth) |
| return false; |
| |
| int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes); |
| if (iSize <= 0 || iPage >= iSize) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| for (int32_t i = 0; i < iSize; ++i) { |
| PageNode* pNode = pageNode.m_ChildNodes[i].get(); |
| if (!pNode) |
| continue; |
| |
| if (pNode->m_type == PDF_PAGENODE_UNKNOWN) { |
| // Updates the type for the unknown page node. |
| if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode)) |
| return false; |
| } |
| if (pNode->m_type == PDF_PAGENODE_ARRAY) { |
| // Updates a more specific type for the array page node. |
| if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode)) |
| return false; |
| } |
| switch (pNode->m_type) { |
| case PDF_PAGENODE_PAGE: |
| iCount++; |
| if (iPage == iCount && m_pDocument) |
| m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo); |
| break; |
| case PDF_PAGENODE_PAGES: |
| if (!CheckPageNode(*pNode, iPage, iCount, level + 1)) |
| return false; |
| break; |
| case PDF_PAGENODE_UNKNOWN: |
| case PDF_PAGENODE_ARRAY: |
| // Already converted above, error if we get here. |
| return false; |
| } |
| if (iPage == iCount) { |
| m_docStatus = PDF_DATAAVAIL_DONE; |
| return true; |
| } |
| } |
| return true; |
| } |
| |
| bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) { |
| int iPage = pdfium::base::checked_cast<int>(dwPage); |
| if (m_pDocument->GetPageCount() <= iPage || |
| m_pDocument->IsPageLoaded(iPage)) { |
| m_docStatus = PDF_DATAAVAIL_DONE; |
| return true; |
| } |
| if (m_PageNode.m_type == PDF_PAGENODE_PAGE) { |
| m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR; |
| return true; |
| } |
| int32_t iCount = -1; |
| return CheckPageNode(m_PageNode, iPage, iCount, 0); |
| } |
| |
| bool CPDF_DataAvail::CheckPageCount() { |
| bool bExists = false; |
| RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists); |
| if (!bExists) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| if (!pPages) |
| return false; |
| |
| CPDF_Dictionary* pPagesDict = pPages->GetDict(); |
| if (!pPagesDict) { |
| m_docStatus = PDF_DATAAVAIL_ERROR; |
| return false; |
| } |
| if (!pPagesDict->KeyExist("Kids")) |
| return true; |
| |
| return pPagesDict->GetIntegerFor("Count") > 0; |
| } |
| |
| bool CPDF_DataAvail::LoadDocPages() { |
| if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode)) |
| return false; |
| |
| if (CheckPageCount()) { |
| m_docStatus = PDF_DATAAVAIL_PAGE; |
| return true; |
| } |
| |
| m_bTotalLoadPageTree = true; |
| return false; |
| } |
| |
| bool CPDF_DataAvail::LoadPages() { |
| while (!m_bPagesTreeLoad) { |
| if (!CheckPageStatus()) |
| return false; |
| } |
| |
| if (m_bPagesLoad) |
| return true; |
| |
| m_pDocument->LoadPages(); |
| return false; |
| } |
| |
| CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() { |
| if (m_bLinearedDataOK) |
| return DataAvailable; |
| ASSERT(m_pLinearized); |
| if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument || |
| !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) { |
| return DataError; |
| } |
| |
| if (!m_bMainXRefLoadTried) { |
| const FX_SAFE_FILESIZE prev = |
| m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev"); |
| const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1); |
| if (main_xref_offset < 0) |
| return DataError; |
| |
| if (main_xref_offset == 0) |
| return DataAvailable; |
| |
| FX_SAFE_SIZE_T data_size = m_dwFileLen; |
| data_size -= main_xref_offset; |
| if (!data_size.IsValid()) |
| return DataError; |
| |
| if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( |
| main_xref_offset, data_size.ValueOrDie())) |
| return DataNotAvailable; |
| |
| CPDF_Parser::Error eRet = |
| m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); |
| m_bMainXRefLoadTried = true; |
| if (eRet != CPDF_Parser::SUCCESS) |
| return DataError; |
| |
| if (!PreparePageItem()) |
| return DataNotAvailable; |
| |
| m_bMainXRefLoadedOK = true; |
| m_bLinearedDataOK = true; |
| } |
| |
| return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; |
| } |
| |
| CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( |
| uint32_t dwPage, |
| DownloadHints* pHints) { |
| if (!m_pDocument) |
| return DataError; |
| |
| const int iPage = pdfium::base::checked_cast<int>(dwPage); |
| if (iPage >= m_pDocument->GetPageCount()) { |
| // This is XFA page. |
| return DataAvailable; |
| } |
| |
| if (IsFirstCheck(dwPage)) { |
| m_bCurPageDictLoadOK = false; |
| } |
| |
| if (pdfium::Contains(m_pagesLoadState, dwPage)) |
| return DataAvailable; |
| |
| const HintsScope hints_scope(GetValidator(), pHints); |
| if (m_pLinearized) { |
| if (dwPage == m_pLinearized->GetFirstPageNo()) { |
| auto* pPageDict = m_pDocument->GetPageDictionary(iPage); |
| if (!pPageDict) |
| return DataError; |
| |
| auto page_num_obj = std::make_pair( |
| dwPage, std::make_unique<CPDF_PageObjectAvail>( |
| GetValidator(), m_pDocument.Get(), pPageDict)); |
| |
| CPDF_PageObjectAvail* page_obj_avail = |
| m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get(); |
| // TODO(art-snake): Check resources. |
| return page_obj_avail->CheckAvail(); |
| } |
| |
| DocAvailStatus nResult = CheckLinearizedData(); |
| if (nResult != DataAvailable) |
| return nResult; |
| |
| if (m_pHintTables) { |
| nResult = m_pHintTables->CheckPage(dwPage); |
| if (nResult != DataAvailable) |
| return nResult; |
| if (GetPageDictionary(dwPage)) { |
| m_pagesLoadState.insert(dwPage); |
| return DataAvailable; |
| } |
| } |
| |
| if (!m_bMainXRefLoadedOK) { |
| if (!LoadAllFile()) |
| return DataNotAvailable; |
| m_pDocument->GetParser()->RebuildCrossRef(); |
| ResetFirstCheck(dwPage); |
| return DataAvailable; |
| } |
| if (m_bTotalLoadPageTree) { |
| if (!LoadPages()) |
| return DataNotAvailable; |
| } else { |
| if (!m_bCurPageDictLoadOK && !CheckPage(dwPage)) |
| return DataNotAvailable; |
| } |
| } else { |
| if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) { |
| return DataNotAvailable; |
| } |
| } |
| |
| if (CheckAcroForm() == DocFormStatus::FormNotAvailable) |
| return DataNotAvailable; |
| |
| auto* pPageDict = m_pDocument->GetPageDictionary(iPage); |
| if (!pPageDict) |
| return DataError; |
| |
| { |
| auto page_num_obj = std::make_pair( |
| dwPage, std::make_unique<CPDF_PageObjectAvail>( |
| GetValidator(), m_pDocument.Get(), pPageDict)); |
| CPDF_PageObjectAvail* page_obj_avail = |
| m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get(); |
| const DocAvailStatus status = page_obj_avail->CheckAvail(); |
| if (status != DocAvailStatus::DataAvailable) |
| return status; |
| } |
| |
| const DocAvailStatus resources_status = CheckResources(pPageDict); |
| if (resources_status != DocAvailStatus::DataAvailable) |
| return resources_status; |
| |
| m_bCurPageDictLoadOK = false; |
| ResetFirstCheck(dwPage); |
| m_pagesLoadState.insert(dwPage); |
| return DataAvailable; |
| } |
| |
| CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources( |
| CPDF_Dictionary* page) { |
| ASSERT(page); |
| const CPDF_ReadValidator::Session read_session(GetValidator()); |
| CPDF_Object* resources = GetResourceObject(page); |
| if (GetValidator()->has_read_problems()) |
| return DocAvailStatus::DataNotAvailable; |
| |
| if (!resources) |
| return DocAvailStatus::DataAvailable; |
| |
| CPDF_PageObjectAvail* resource_avail = |
| m_PagesResourcesAvail |
| .insert(std::make_pair( |
| resources, std::make_unique<CPDF_PageObjectAvail>( |
| GetValidator(), m_pDocument.Get(), resources))) |
| .first->second.get(); |
| return resource_avail->CheckAvail(); |
| } |
| |
| RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const { |
| return m_pFileRead; |
| } |
| |
| CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const { |
| return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get() |
| : m_parser.m_pSyntax.get(); |
| } |
| |
| int CPDF_DataAvail::GetPageCount() const { |
| if (m_pLinearized) |
| return m_pLinearized->GetPageCount(); |
| return m_pDocument ? m_pDocument->GetPageCount() : 0; |
| } |
| |
| CPDF_Dictionary* CPDF_DataAvail::GetPageDictionary(int index) const { |
| if (!m_pDocument || index < 0 || index >= GetPageCount()) |
| return nullptr; |
| CPDF_Dictionary* page = m_pDocument->GetPageDictionary(index); |
| if (page) |
| return page; |
| if (!m_pLinearized || !m_pHintTables) |
| return nullptr; |
| |
| if (index == static_cast<int>(m_pLinearized->GetFirstPageNo())) |
| return nullptr; |
| FX_FILESIZE szPageStartPos = 0; |
| FX_FILESIZE szPageLength = 0; |
| uint32_t dwObjNum = 0; |
| const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos, |
| &szPageLength, &dwObjNum); |
| if (!bPagePosGot || !dwObjNum) |
| return nullptr; |
| // We should say to the document, which object is the page. |
| m_pDocument->SetPageObjNum(index, dwObjNum); |
| // Page object already can be parsed in document. |
| if (!m_pDocument->GetIndirectObject(dwObjNum)) { |
| m_pDocument->ReplaceIndirectObjectIfHigherGeneration( |
| dwObjNum, |
| ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument.Get())); |
| } |
| if (!ValidatePage(index)) |
| return nullptr; |
| return m_pDocument->GetPageDictionary(index); |
| } |
| |
| CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( |
| DownloadHints* pHints) { |
| const HintsScope hints_scope(GetValidator(), pHints); |
| return CheckAcroForm(); |
| } |
| |
| CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() { |
| if (!m_pDocument) |
| return FormAvailable; |
| |
| if (m_pLinearized) { |
| DocAvailStatus nDocStatus = CheckLinearizedData(); |
| if (nDocStatus == DataError) |
| return FormError; |
| if (nDocStatus == DataNotAvailable) |
| return FormNotAvailable; |
| } |
| |
| if (!m_pFormAvail) { |
| CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); |
| if (!pRoot) |
| return FormAvailable; |
| |
| CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); |
| if (!pAcroForm) |
| return FormNotExist; |
| |
| m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>( |
| GetValidator(), m_pDocument.Get(), pAcroForm); |
| } |
| switch (m_pFormAvail->CheckAvail()) { |
| case DocAvailStatus::DataError: |
| return DocFormStatus::FormError; |
| case DocAvailStatus::DataNotAvailable: |
| return DocFormStatus::FormNotAvailable; |
| case DocAvailStatus::DataAvailable: |
| return DocFormStatus::FormAvailable; |
| default: |
| NOTREACHED(); |
| } |
| return DocFormStatus::FormError; |
| } |
| |
| bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const { |
| int iPage = pdfium::base::checked_cast<int>(dwPage); |
| auto* pPageDict = m_pDocument->GetPageDictionary(iPage); |
| if (!pPageDict) |
| return false; |
| CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument.Get(), pPageDict); |
| return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable; |
| } |
| |
| std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> |
| CPDF_DataAvail::ParseDocument( |
| std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData, |
| std::unique_ptr<CPDF_Document::PageDataIface> pPageData, |
| const char* password) { |
| if (m_pDocument) { |
| // We already returned parsed document. |
| return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr); |
| } |
| auto document = std::make_unique<CPDF_Document>(std::move(pRenderData), |
| std::move(pPageData)); |
| document->AddObserver(this); |
| |
| CPDF_ReadValidator::Session read_session(GetValidator()); |
| CPDF_Parser::Error error = |
| document->LoadLinearizedDoc(GetValidator(), password); |
| |
| // Additional check, that all ok. |
| if (GetValidator()->has_read_problems()) { |
| NOTREACHED(); |
| return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr); |
| } |
| |
| if (error != CPDF_Parser::SUCCESS) |
| return std::make_pair(error, nullptr); |
| |
| m_pDocument = document.get(); |
| return std::make_pair(CPDF_Parser::SUCCESS, std::move(document)); |
| } |
| |
| CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} |
| |
| CPDF_DataAvail::PageNode::~PageNode() = default; |