blob: d1513a603b870c13a9085c8234de6e12567cf7c7 [file] [log] [blame] [edit]
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/parser/cpdf_data_avail.h"
#include <algorithm>
#include <memory>
#include <utility>
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_hint_tables.h"
#include "core/fpdfapi/parser/cpdf_linearized_header.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_page_object_avail.h"
#include "core/fpdfapi/parser/cpdf_read_validator.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
#include "third_party/base/compiler_specific.h"
#include "third_party/base/notreached.h"
#include "third_party/base/numerics/safe_conversions.h"
#include "third_party/base/stl_util.h"
namespace {
// static
CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) {
constexpr size_t kMaxHierarchyDepth = 64;
size_t depth = 0;
CPDF_Dictionary* dictionary_to_check = pDict;
while (dictionary_to_check) {
CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
if (result)
return result;
CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
dictionary_to_check = parent ? parent->GetDict() : nullptr;
if (++depth > kMaxHierarchyDepth) {
// We have cycle in parents hierarchy.
return nullptr;
}
}
return nullptr;
}
class HintsScope {
public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,
CPDF_DataAvail::DownloadHints* hints)
: validator_(std::move(validator)) {
ASSERT(validator_);
validator_->SetDownloadHints(hints);
}
~HintsScope() { validator_->SetDownloadHints(nullptr); }
private:
RetainPtr<CPDF_ReadValidator> validator_;
};
} // namespace
CPDF_DataAvail::FileAvail::~FileAvail() = default;
CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
CPDF_DataAvail::CPDF_DataAvail(
FileAvail* pFileAvail,
const RetainPtr<IFX_SeekableReadStream>& pFileRead,
bool bSupportHintTable)
: m_pFileRead(
pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, pFileAvail)),
m_dwFileLen(m_pFileRead->GetSize()),
m_bSupportHintTable(bSupportHintTable) {}
CPDF_DataAvail::~CPDF_DataAvail() {
m_pHintTables.reset();
if (m_pDocument)
m_pDocument->RemoveObserver(this);
}
void CPDF_DataAvail::OnObservableDestroyed() {
m_pDocument = nullptr;
m_pFormAvail.reset();
m_PagesArray.clear();
m_PagesObjAvail.clear();
m_PagesResourcesAvail.clear();
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
DownloadHints* pHints) {
if (!m_dwFileLen)
return DataError;
const HintsScope hints_scope(GetValidator(), pHints);
while (!m_bDocAvail) {
if (!CheckDocStatus())
return DataNotAvailable;
}
return DataAvailable;
}
bool CPDF_DataAvail::CheckDocStatus() {
switch (m_docStatus) {
case PDF_DATAAVAIL_HEADER:
return CheckHeader();
case PDF_DATAAVAIL_FIRSTPAGE:
return CheckFirstPage();
case PDF_DATAAVAIL_HINTTABLE:
return CheckHintTables();
case PDF_DATAAVAIL_LOADALLCROSSREF:
return CheckAndLoadAllXref();
case PDF_DATAAVAIL_LOADALLFILE:
return LoadAllFile();
case PDF_DATAAVAIL_ROOT:
return CheckRoot();
case PDF_DATAAVAIL_INFO:
return CheckInfo();
case PDF_DATAAVAIL_PAGETREE:
if (m_bTotalLoadPageTree)
return CheckPages();
return LoadDocPages();
case PDF_DATAAVAIL_PAGE:
if (m_bTotalLoadPageTree)
return CheckPage();
m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
return true;
case PDF_DATAAVAIL_ERROR:
return LoadAllFile();
case PDF_DATAAVAIL_PAGE_LATERLOAD:
m_docStatus = PDF_DATAAVAIL_PAGE;
FALLTHROUGH;
default:
m_bDocAvail = true;
return true;
}
}
bool CPDF_DataAvail::CheckPageStatus() {
switch (m_docStatus) {
case PDF_DATAAVAIL_PAGETREE:
return CheckPages();
case PDF_DATAAVAIL_PAGE:
return CheckPage();
case PDF_DATAAVAIL_ERROR:
return LoadAllFile();
default:
m_bPagesTreeLoad = true;
m_bPagesLoad = true;
return true;
}
}
bool CPDF_DataAvail::LoadAllFile() {
if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
m_docStatus = PDF_DATAAVAIL_DONE;
return true;
}
return false;
}
bool CPDF_DataAvail::CheckAndLoadAllXref() {
if (!m_pCrossRefAvail) {
const CPDF_ReadValidator::Session read_session(GetValidator());
const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
if (GetValidator()->has_read_problems())
return false;
if (last_xref_offset <= 0) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
last_xref_offset);
}
switch (m_pCrossRefAvail->CheckAvail()) {
case DocAvailStatus::DataAvailable:
break;
case DocAvailStatus::DataNotAvailable:
return false;
case DocAvailStatus::DataError:
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
default:
NOTREACHED();
return false;
}
if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
!m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
return false;
}
m_docStatus = PDF_DATAAVAIL_ROOT;
return true;
}
RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
bool* pExistInFile) {
CPDF_Parser* pParser = nullptr;
if (pExistInFile)
*pExistInFile = true;
pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
RetainPtr<CPDF_Object> pRet;
if (pParser) {
const CPDF_ReadValidator::Session read_session(GetValidator());
pRet = pParser->ParseIndirectObject(objnum);
if (GetValidator()->has_read_problems())
return nullptr;
}
if (!pRet && pExistInFile)
*pExistInFile = false;
return pRet;
}
bool CPDF_DataAvail::CheckInfo() {
const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
m_docStatus = PDF_DATAAVAIL_PAGETREE;
return true;
}
const CPDF_ReadValidator::Session read_session(GetValidator());
m_parser.ParseIndirectObject(dwInfoObjNum);
if (GetValidator()->has_read_problems())
return false;
m_docStatus = PDF_DATAAVAIL_PAGETREE;
return true;
}
bool CPDF_DataAvail::CheckRoot() {
const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return true;
}
const CPDF_ReadValidator::Session read_session(GetValidator());
m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
if (GetValidator()->has_read_problems())
return false;
const CPDF_Reference* pRef =
ToReference(m_pRoot ? m_pRoot->GetObjectFor("Pages") : nullptr);
if (!pRef) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
m_PagesObjNum = pRef->GetRefObjNum();
m_docStatus = PDF_DATAAVAIL_INFO;
return true;
}
bool CPDF_DataAvail::PreparePageItem() {
const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
const CPDF_Reference* pRef =
ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
if (!pRef) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
m_PagesObjNum = pRef->GetRefObjNum();
m_docStatus = PDF_DATAAVAIL_PAGETREE;
return true;
}
bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
return m_pageMapCheckState.insert(dwPage).second;
}
void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
m_pageMapCheckState.erase(dwPage);
}
bool CPDF_DataAvail::CheckPage() {
std::vector<uint32_t> UnavailObjList;
for (uint32_t dwPageObjNum : m_PageObjList) {
bool bExists = false;
RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
if (!pObj) {
if (bExists)
UnavailObjList.push_back(dwPageObjNum);
continue;
}
CPDF_Array* pArray = ToArray(pObj.Get());
if (pArray) {
CPDF_ArrayLocker locker(pArray);
for (const auto& pArrayObj : locker) {
if (CPDF_Reference* pRef = ToReference(pArrayObj.Get()))
UnavailObjList.push_back(pRef->GetRefObjNum());
}
}
if (!pObj->IsDictionary())
continue;
ByteString type = pObj->GetDict()->GetNameFor("Type");
if (type == "Pages") {
m_PagesArray.push_back(std::move(pObj));
continue;
}
}
m_PageObjList.clear();
if (!UnavailObjList.empty()) {
m_PageObjList = std::move(UnavailObjList);
return false;
}
size_t iPages = m_PagesArray.size();
for (size_t i = 0; i < iPages; ++i) {
RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
if (pPages && !GetPageKids(pPages.Get())) {
m_PagesArray.clear();
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
}
m_PagesArray.clear();
if (m_PageObjList.empty())
m_docStatus = PDF_DATAAVAIL_DONE;
return true;
}
bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
CPDF_Dictionary* pDict = pPages->GetDict();
CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
if (!pKids)
return true;
switch (pKids->GetType()) {
case CPDF_Object::kReference:
m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
break;
case CPDF_Object::kArray: {
CPDF_Array* pKidsArray = pKids->AsArray();
for (size_t i = 0; i < pKidsArray->size(); ++i) {
if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
m_PageObjList.push_back(pRef->GetRefObjNum());
}
break;
}
default:
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
return true;
}
bool CPDF_DataAvail::CheckPages() {
bool bExists = false;
RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
if (!bExists) {
m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
return true;
}
if (!pPages) {
if (m_docStatus == PDF_DATAAVAIL_ERROR) {
m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
return true;
}
return false;
}
if (!GetPageKids(pPages.Get())) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
m_docStatus = PDF_DATAAVAIL_PAGE;
return true;
}
bool CPDF_DataAvail::CheckHeader() {
switch (CheckHeaderAndLinearized()) {
case DocAvailStatus::DataAvailable:
m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
: PDF_DATAAVAIL_LOADALLCROSSREF;
return true;
case DocAvailStatus::DataNotAvailable:
return false;
case DocAvailStatus::DataError:
m_docStatus = PDF_DATAAVAIL_ERROR;
return true;
default:
NOTREACHED();
return false;
}
}
bool CPDF_DataAvail::CheckFirstPage() {
if (!m_pLinearized->GetFirstPageEndOffset() ||
!m_pLinearized->GetFileSize() ||
!m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
dwEnd += 512;
if ((FX_FILESIZE)dwEnd > m_dwFileLen)
dwEnd = (uint32_t)m_dwFileLen;
const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
data_size))
return false;
m_docStatus =
m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
return true;
}
bool CPDF_DataAvail::CheckHintTables() {
const CPDF_ReadValidator::Session read_session(GetValidator());
m_pHintTables =
CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
if (GetValidator()->read_error()) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return true;
}
if (GetValidator()->has_unavailable_data())
return false;
m_docStatus = PDF_DATAAVAIL_DONE;
return true;
}
RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
FX_FILESIZE pos,
uint32_t objnum,
CPDF_IndirectObjectHolder* pObjList) const {
const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
GetSyntaxParser()->SetPos(pos);
RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
pObjList, CPDF_SyntaxParser::ParseType::kLoose);
GetSyntaxParser()->SetPos(SavedPos);
return (result && (!objnum || result->GetObjNum() == objnum))
? std::move(result)
: nullptr;
}
CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
switch (CheckHeaderAndLinearized()) {
case DocAvailStatus::DataAvailable:
return m_pLinearized ? DocLinearizationStatus::Linearized
: DocLinearizationStatus::NotLinearized;
case DocAvailStatus::DataNotAvailable:
return DocLinearizationStatus::LinearizationUnknown;
case DocAvailStatus::DataError:
return DocLinearizationStatus::NotLinearized;
default:
NOTREACHED();
return DocLinearizationStatus::LinearizationUnknown;
}
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
if (m_bHeaderAvail)
return DocAvailStatus::DataAvailable;
const CPDF_ReadValidator::Session read_session(GetValidator());
const Optional<FX_FILESIZE> header_offset = GetHeaderOffset(GetValidator());
if (GetValidator()->has_read_problems())
return DocAvailStatus::DataNotAvailable;
if (!header_offset)
return DocAvailStatus::DataError;
m_parser.m_pSyntax =
std::make_unique<CPDF_SyntaxParser>(GetValidator(), *header_offset);
m_pLinearized = m_parser.ParseLinearizedHeader();
if (GetValidator()->has_read_problems())
return DocAvailStatus::DataNotAvailable;
m_bHeaderAvail = true;
return DocAvailStatus::DataAvailable;
}
bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
while (true) {
switch (m_docStatus) {
case PDF_DATAAVAIL_PAGETREE:
if (!LoadDocPages())
return false;
break;
case PDF_DATAAVAIL_PAGE:
if (!LoadDocPage(dwPage))
return false;
break;
case PDF_DATAAVAIL_ERROR:
return LoadAllFile();
default:
m_bPagesTreeLoad = true;
m_bPagesLoad = true;
m_bCurPageDictLoadOK = true;
m_docStatus = PDF_DATAAVAIL_PAGE;
return true;
}
}
}
bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
PageNode* pPageNode) {
bool bExists = false;
RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
if (!bExists) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
if (!pPages)
return false;
CPDF_Array* pArray = pPages->AsArray();
if (!pArray) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
pPageNode->m_type = PDF_PAGENODE_PAGES;
for (size_t i = 0; i < pArray->size(); ++i) {
CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
if (!pKid)
continue;
auto pNode = std::make_unique<PageNode>();
pNode->m_dwPageNo = pKid->GetRefObjNum();
pPageNode->m_ChildNodes.push_back(std::move(pNode));
}
return true;
}
bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
PageNode* pPageNode) {
bool bExists = false;
RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
if (!bExists) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
if (!pPage)
return false;
if (pPage->IsArray()) {
pPageNode->m_dwPageNo = dwPageNo;
pPageNode->m_type = PDF_PAGENODE_ARRAY;
return true;
}
if (!pPage->IsDictionary()) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
pPageNode->m_dwPageNo = dwPageNo;
CPDF_Dictionary* pDict = pPage->GetDict();
const ByteString type = pDict->GetNameFor("Type");
if (type == "Page") {
pPageNode->m_type = PDF_PAGENODE_PAGE;
return true;
}
if (type != "Pages") {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
pPageNode->m_type = PDF_PAGENODE_PAGES;
CPDF_Object* pKids = pDict->GetObjectFor("Kids");
if (!pKids) {
m_docStatus = PDF_DATAAVAIL_PAGE;
return true;
}
switch (pKids->GetType()) {
case CPDF_Object::kReference: {
CPDF_Reference* pKid = pKids->AsReference();
auto pNode = std::make_unique<PageNode>();
pNode->m_dwPageNo = pKid->GetRefObjNum();
pPageNode->m_ChildNodes.push_back(std::move(pNode));
break;
}
case CPDF_Object::kArray: {
CPDF_Array* pKidsArray = pKids->AsArray();
for (size_t i = 0; i < pKidsArray->size(); ++i) {
CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
if (!pKid)
continue;
auto pNode = std::make_unique<PageNode>();
pNode->m_dwPageNo = pKid->GetRefObjNum();
pPageNode->m_ChildNodes.push_back(std::move(pNode));
}
break;
}
default:
break;
}
return true;
}
bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
int32_t iPage,
int32_t& iCount,
int level) {
if (level >= kMaxPageRecursionDepth)
return false;
int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
if (iSize <= 0 || iPage >= iSize) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
for (int32_t i = 0; i < iSize; ++i) {
PageNode* pNode = pageNode.m_ChildNodes[i].get();
if (!pNode)
continue;
if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
// Updates the type for the unknown page node.
if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
return false;
}
if (pNode->m_type == PDF_PAGENODE_ARRAY) {
// Updates a more specific type for the array page node.
if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
return false;
}
switch (pNode->m_type) {
case PDF_PAGENODE_PAGE:
iCount++;
if (iPage == iCount && m_pDocument)
m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
break;
case PDF_PAGENODE_PAGES:
if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
return false;
break;
case PDF_PAGENODE_UNKNOWN:
case PDF_PAGENODE_ARRAY:
// Already converted above, error if we get here.
return false;
}
if (iPage == iCount) {
m_docStatus = PDF_DATAAVAIL_DONE;
return true;
}
}
return true;
}
bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
int iPage = pdfium::base::checked_cast<int>(dwPage);
if (m_pDocument->GetPageCount() <= iPage ||
m_pDocument->IsPageLoaded(iPage)) {
m_docStatus = PDF_DATAAVAIL_DONE;
return true;
}
if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
return true;
}
int32_t iCount = -1;
return CheckPageNode(m_PageNode, iPage, iCount, 0);
}
bool CPDF_DataAvail::CheckPageCount() {
bool bExists = false;
RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
if (!bExists) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
if (!pPages)
return false;
CPDF_Dictionary* pPagesDict = pPages->GetDict();
if (!pPagesDict) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
if (!pPagesDict->KeyExist("Kids"))
return true;
return pPagesDict->GetIntegerFor("Count") > 0;
}
bool CPDF_DataAvail::LoadDocPages() {
if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
return false;
if (CheckPageCount()) {
m_docStatus = PDF_DATAAVAIL_PAGE;
return true;
}
m_bTotalLoadPageTree = true;
return false;
}
bool CPDF_DataAvail::LoadPages() {
while (!m_bPagesTreeLoad) {
if (!CheckPageStatus())
return false;
}
if (m_bPagesLoad)
return true;
m_pDocument->LoadPages();
return false;
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
if (m_bLinearedDataOK)
return DataAvailable;
ASSERT(m_pLinearized);
if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
!m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
return DataError;
}
if (!m_bMainXRefLoadTried) {
const FX_SAFE_FILESIZE prev =
m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
if (main_xref_offset < 0)
return DataError;
if (main_xref_offset == 0)
return DataAvailable;
FX_SAFE_SIZE_T data_size = m_dwFileLen;
data_size -= main_xref_offset;
if (!data_size.IsValid())
return DataError;
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
main_xref_offset, data_size.ValueOrDie()))
return DataNotAvailable;
CPDF_Parser::Error eRet =
m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
m_bMainXRefLoadTried = true;
if (eRet != CPDF_Parser::SUCCESS)
return DataError;
if (!PreparePageItem())
return DataNotAvailable;
m_bMainXRefLoadedOK = true;
m_bLinearedDataOK = true;
}
return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
uint32_t dwPage,
DownloadHints* pHints) {
if (!m_pDocument)
return DataError;
const int iPage = pdfium::base::checked_cast<int>(dwPage);
if (iPage >= m_pDocument->GetPageCount()) {
// This is XFA page.
return DataAvailable;
}
if (IsFirstCheck(dwPage)) {
m_bCurPageDictLoadOK = false;
}
if (pdfium::Contains(m_pagesLoadState, dwPage))
return DataAvailable;
const HintsScope hints_scope(GetValidator(), pHints);
if (m_pLinearized) {
if (dwPage == m_pLinearized->GetFirstPageNo()) {
auto* pPageDict = m_pDocument->GetPageDictionary(iPage);
if (!pPageDict)
return DataError;
auto page_num_obj = std::make_pair(
dwPage, std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), m_pDocument.Get(), pPageDict));
CPDF_PageObjectAvail* page_obj_avail =
m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
// TODO(art-snake): Check resources.
return page_obj_avail->CheckAvail();
}
DocAvailStatus nResult = CheckLinearizedData();
if (nResult != DataAvailable)
return nResult;
if (m_pHintTables) {
nResult = m_pHintTables->CheckPage(dwPage);
if (nResult != DataAvailable)
return nResult;
if (GetPageDictionary(dwPage)) {
m_pagesLoadState.insert(dwPage);
return DataAvailable;
}
}
if (!m_bMainXRefLoadedOK) {
if (!LoadAllFile())
return DataNotAvailable;
m_pDocument->GetParser()->RebuildCrossRef();
ResetFirstCheck(dwPage);
return DataAvailable;
}
if (m_bTotalLoadPageTree) {
if (!LoadPages())
return DataNotAvailable;
} else {
if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
return DataNotAvailable;
}
} else {
if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
return DataNotAvailable;
}
}
if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
return DataNotAvailable;
auto* pPageDict = m_pDocument->GetPageDictionary(iPage);
if (!pPageDict)
return DataError;
{
auto page_num_obj = std::make_pair(
dwPage, std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), m_pDocument.Get(), pPageDict));
CPDF_PageObjectAvail* page_obj_avail =
m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
const DocAvailStatus status = page_obj_avail->CheckAvail();
if (status != DocAvailStatus::DataAvailable)
return status;
}
const DocAvailStatus resources_status = CheckResources(pPageDict);
if (resources_status != DocAvailStatus::DataAvailable)
return resources_status;
m_bCurPageDictLoadOK = false;
ResetFirstCheck(dwPage);
m_pagesLoadState.insert(dwPage);
return DataAvailable;
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
CPDF_Dictionary* page) {
ASSERT(page);
const CPDF_ReadValidator::Session read_session(GetValidator());
CPDF_Object* resources = GetResourceObject(page);
if (GetValidator()->has_read_problems())
return DocAvailStatus::DataNotAvailable;
if (!resources)
return DocAvailStatus::DataAvailable;
CPDF_PageObjectAvail* resource_avail =
m_PagesResourcesAvail
.insert(std::make_pair(
resources, std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), m_pDocument.Get(), resources)))
.first->second.get();
return resource_avail->CheckAvail();
}
RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
return m_pFileRead;
}
CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
: m_parser.m_pSyntax.get();
}
int CPDF_DataAvail::GetPageCount() const {
if (m_pLinearized)
return m_pLinearized->GetPageCount();
return m_pDocument ? m_pDocument->GetPageCount() : 0;
}
CPDF_Dictionary* CPDF_DataAvail::GetPageDictionary(int index) const {
if (!m_pDocument || index < 0 || index >= GetPageCount())
return nullptr;
CPDF_Dictionary* page = m_pDocument->GetPageDictionary(index);
if (page)
return page;
if (!m_pLinearized || !m_pHintTables)
return nullptr;
if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
return nullptr;
FX_FILESIZE szPageStartPos = 0;
FX_FILESIZE szPageLength = 0;
uint32_t dwObjNum = 0;
const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
&szPageLength, &dwObjNum);
if (!bPagePosGot || !dwObjNum)
return nullptr;
// We should say to the document, which object is the page.
m_pDocument->SetPageObjNum(index, dwObjNum);
// Page object already can be parsed in document.
if (!m_pDocument->GetIndirectObject(dwObjNum)) {
m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
dwObjNum,
ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument.Get()));
}
if (!ValidatePage(index))
return nullptr;
return m_pDocument->GetPageDictionary(index);
}
CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
DownloadHints* pHints) {
const HintsScope hints_scope(GetValidator(), pHints);
return CheckAcroForm();
}
CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
if (!m_pDocument)
return FormAvailable;
if (m_pLinearized) {
DocAvailStatus nDocStatus = CheckLinearizedData();
if (nDocStatus == DataError)
return FormError;
if (nDocStatus == DataNotAvailable)
return FormNotAvailable;
}
if (!m_pFormAvail) {
CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
if (!pRoot)
return FormAvailable;
CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
if (!pAcroForm)
return FormNotExist;
m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), m_pDocument.Get(), pAcroForm);
}
switch (m_pFormAvail->CheckAvail()) {
case DocAvailStatus::DataError:
return DocFormStatus::FormError;
case DocAvailStatus::DataNotAvailable:
return DocFormStatus::FormNotAvailable;
case DocAvailStatus::DataAvailable:
return DocFormStatus::FormAvailable;
default:
NOTREACHED();
}
return DocFormStatus::FormError;
}
bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
int iPage = pdfium::base::checked_cast<int>(dwPage);
auto* pPageDict = m_pDocument->GetPageDictionary(iPage);
if (!pPageDict)
return false;
CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument.Get(), pPageDict);
return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
}
std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
CPDF_DataAvail::ParseDocument(
std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
const char* password) {
if (m_pDocument) {
// We already returned parsed document.
return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
}
auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
std::move(pPageData));
document->AddObserver(this);
CPDF_ReadValidator::Session read_session(GetValidator());
CPDF_Parser::Error error =
document->LoadLinearizedDoc(GetValidator(), password);
// Additional check, that all ok.
if (GetValidator()->has_read_problems()) {
NOTREACHED();
return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
}
if (error != CPDF_Parser::SUCCESS)
return std::make_pair(error, nullptr);
m_pDocument = document.get();
return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
}
CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
CPDF_DataAvail::PageNode::~PageNode() = default;