blob: 9a1f58ef63cc645dae8cb730c42257e7a5446396 [file] [log] [blame]
// Copyright 2016 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/parser/cpdf_data_avail.h"
#include <algorithm>
#include <memory>
#include <utility>
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_hint_tables.h"
#include "core/fpdfapi/parser/cpdf_linearized_header.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_page_object_avail.h"
#include "core/fpdfapi/parser/cpdf_read_validator.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcrt/autorestorer.h"
#include "core/fxcrt/check.h"
#include "core/fxcrt/containers/contains.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/notreached.h"
#include "core/fxcrt/numerics/safe_conversions.h"
#include "core/fxcrt/stl_util.h"
namespace {
RetainPtr<CPDF_Object> GetResourceObject(RetainPtr<CPDF_Dictionary> dict) {
static constexpr size_t kMaxHierarchyDepth = 64;
size_t depth = 0;
while (dict) {
RetainPtr<CPDF_Object> result = dict->GetMutableObjectFor("Resources");
if (result) {
return result;
}
if (++depth > kMaxHierarchyDepth) {
// We have cycle in parents hierarchy.
return nullptr;
}
RetainPtr<CPDF_Object> parent = dict->GetMutableObjectFor("Parent");
dict = parent ? parent->GetMutableDict() : nullptr;
}
return nullptr;
}
class HintsScope {
public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,
CPDF_DataAvail::DownloadHints* hints)
: validator_(std::move(validator)) {
DCHECK(validator_);
validator_->SetDownloadHints(hints);
}
~HintsScope() { validator_->SetDownloadHints(nullptr); }
private:
RetainPtr<CPDF_ReadValidator> validator_;
};
} // namespace
CPDF_DataAvail::FileAvail::~FileAvail() = default;
CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
RetainPtr<IFX_SeekableReadStream> pFileRead)
: file_read_(pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileRead),
pFileAvail)),
file_len_(file_read_->GetSize()) {}
CPDF_DataAvail::~CPDF_DataAvail() {
hint_tables_.reset();
if (document_) {
document_->RemoveObserver(this);
}
}
void CPDF_DataAvail::OnObservableDestroyed() {
document_ = nullptr;
form_avail_.reset();
pages_array_.clear();
pages_obj_avail_.clear();
pages_resources_avail_.clear();
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
DownloadHints* pHints) {
if (!file_len_) {
return kDataError;
}
DCHECK(seen_page_obj_list_.empty());
AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&seen_page_obj_list_);
const HintsScope hints_scope(GetValidator(), pHints);
while (!doc_avail_) {
if (!CheckDocStatus()) {
return kDataNotAvailable;
}
}
return kDataAvailable;
}
bool CPDF_DataAvail::CheckDocStatus() {
switch (internal_status_) {
case InternalStatus::kHeader:
return CheckHeader();
case InternalStatus::kFirstPage:
return CheckFirstPage();
case InternalStatus::kHintTable:
return CheckHintTables();
case InternalStatus::kLoadAllCrossRef:
return CheckAndLoadAllXref();
case InternalStatus::kLoadAllFile:
return LoadAllFile();
case InternalStatus::kRoot:
return CheckRoot();
case InternalStatus::kInfo:
return CheckInfo();
case InternalStatus::kPageTree:
if (total_load_page_tree_) {
return CheckPages();
}
return LoadDocPages();
case InternalStatus::kPage:
if (total_load_page_tree_) {
return CheckPage();
}
internal_status_ = InternalStatus::kPageLaterLoad;
return true;
case InternalStatus::kError:
return LoadAllFile();
case InternalStatus::kPageLaterLoad:
internal_status_ = InternalStatus::kPage;
[[fallthrough]];
default:
doc_avail_ = true;
return true;
}
}
bool CPDF_DataAvail::CheckPageStatus() {
switch (internal_status_) {
case InternalStatus::kPageTree:
return CheckPages();
case InternalStatus::kPage:
return CheckPage();
case InternalStatus::kError:
return LoadAllFile();
default:
pages_tree_load_ = true;
pages_load_ = true;
return true;
}
}
bool CPDF_DataAvail::LoadAllFile() {
if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
internal_status_ = InternalStatus::kDone;
return true;
}
return false;
}
bool CPDF_DataAvail::CheckAndLoadAllXref() {
if (!cross_ref_avail_) {
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
const FX_FILESIZE last_xref_offset = parser_.ParseStartXRef();
if (GetValidator()->has_read_problems()) {
return false;
}
if (last_xref_offset <= 0) {
internal_status_ = InternalStatus::kError;
return false;
}
cross_ref_avail_ = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
last_xref_offset);
}
switch (cross_ref_avail_->CheckAvail()) {
case kDataAvailable:
break;
case kDataNotAvailable:
return false;
case kDataError:
internal_status_ = InternalStatus::kError;
return false;
}
if (!parser_.LoadAllCrossRefTablesAndStreams(
cross_ref_avail_->last_crossref_offset())) {
internal_status_ = InternalStatus::kLoadAllFile;
return false;
}
internal_status_ = InternalStatus::kRoot;
return true;
}
RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
bool* pExistInFile) {
*pExistInFile = false;
CPDF_Parser* pParser = document_ ? document_->GetParser() : &parser_;
if (!pParser) {
return nullptr;
}
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
if (!pRet) {
return nullptr;
}
*pExistInFile = true;
if (GetValidator()->has_read_problems()) {
return nullptr;
}
return pRet;
}
bool CPDF_DataAvail::CheckInfo() {
const uint32_t dwInfoObjNum = parser_.GetInfoObjNum();
if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
internal_status_ = InternalStatus::kPageTree;
return true;
}
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
parser_.ParseIndirectObject(dwInfoObjNum);
if (GetValidator()->has_read_problems()) {
return false;
}
internal_status_ = InternalStatus::kPageTree;
return true;
}
bool CPDF_DataAvail::CheckRoot() {
const uint32_t dwRootObjNum = parser_.GetRootObjNum();
if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
internal_status_ = InternalStatus::kError;
return true;
}
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
root_ = ToDictionary(parser_.ParseIndirectObject(dwRootObjNum));
if (GetValidator()->has_read_problems()) {
return false;
}
if (!root_) {
internal_status_ = InternalStatus::kError;
return false;
}
RetainPtr<const CPDF_Reference> pRef =
ToReference(root_->GetObjectFor("Pages"));
if (!pRef) {
internal_status_ = InternalStatus::kError;
return false;
}
pages_obj_num_ = pRef->GetRefObjNum();
internal_status_ = InternalStatus::kInfo;
return true;
}
bool CPDF_DataAvail::PreparePageItem() {
const CPDF_Dictionary* pRoot = document_->GetRoot();
if (!pRoot) {
internal_status_ = InternalStatus::kError;
return false;
}
RetainPtr<const CPDF_Reference> pRef =
ToReference(pRoot->GetObjectFor("Pages"));
if (!pRef) {
internal_status_ = InternalStatus::kError;
return false;
}
pages_obj_num_ = pRef->GetRefObjNum();
internal_status_ = InternalStatus::kPageTree;
return true;
}
bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
return page_map_check_state_.insert(dwPage).second;
}
void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
page_map_check_state_.erase(dwPage);
}
bool CPDF_DataAvail::CheckPage() {
std::vector<uint32_t> UnavailObjList;
for (uint32_t dwPageObjNum : page_obj_list_) {
bool bExists = false;
RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
if (!pObj) {
if (bExists) {
UnavailObjList.push_back(dwPageObjNum);
}
continue;
}
switch (pObj->GetType()) {
case CPDF_Object::kArray: {
CPDF_ArrayLocker locker(pObj->AsArray());
for (const auto& pArrayObj : locker) {
const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
if (pRef) {
UnavailObjList.push_back(pRef->GetRefObjNum());
}
}
break;
}
case CPDF_Object::kDictionary:
if (pObj->GetDict()->GetNameFor("Type") == "Pages") {
pages_array_.push_back(std::move(pObj));
}
break;
default:
break;
}
}
page_obj_list_.clear();
if (!UnavailObjList.empty()) {
page_obj_list_ = std::move(UnavailObjList);
return false;
}
size_t iPages = pages_array_.size();
for (size_t i = 0; i < iPages; ++i) {
RetainPtr<CPDF_Object> pPages = std::move(pages_array_[i]);
if (pPages && !GetPageKids(pPages.Get())) {
pages_array_.clear();
internal_status_ = InternalStatus::kError;
return false;
}
}
pages_array_.clear();
if (page_obj_list_.empty()) {
internal_status_ = InternalStatus::kDone;
}
return true;
}
bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
RetainPtr<const CPDF_Dictionary> dict = pPages->GetDict();
if (!dict) {
return true;
}
RetainPtr<const CPDF_Object> pKids = dict->GetObjectFor("Kids");
if (!pKids) {
return true;
}
std::vector<uint32_t> object_numbers;
switch (pKids->GetType()) {
case CPDF_Object::kReference:
object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
break;
case CPDF_Object::kArray: {
CPDF_ArrayLocker locker(pKids->AsArray());
for (const auto& pArrayObj : locker) {
const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
if (pRef) {
object_numbers.push_back(pRef->GetRefObjNum());
}
}
break;
}
default:
internal_status_ = InternalStatus::kError;
return false;
}
for (uint32_t num : object_numbers) {
bool inserted = seen_page_obj_list_.insert(num).second;
if (inserted) {
page_obj_list_.push_back(num);
}
}
return true;
}
bool CPDF_DataAvail::CheckPages() {
bool bExists = false;
RetainPtr<CPDF_Object> pPages = GetObject(pages_obj_num_, &bExists);
if (!bExists) {
internal_status_ = InternalStatus::kLoadAllFile;
return true;
}
if (!pPages) {
if (internal_status_ == InternalStatus::kError) {
internal_status_ = InternalStatus::kLoadAllFile;
return true;
}
return false;
}
if (!GetPageKids(pPages.Get())) {
internal_status_ = InternalStatus::kError;
return false;
}
internal_status_ = InternalStatus::kPage;
return true;
}
bool CPDF_DataAvail::CheckHeader() {
switch (CheckHeaderAndLinearized()) {
case kDataAvailable:
internal_status_ = linearized_ ? InternalStatus::kFirstPage
: InternalStatus::kLoadAllCrossRef;
return true;
case kDataNotAvailable:
return false;
case kDataError:
internal_status_ = InternalStatus::kError;
return true;
}
}
bool CPDF_DataAvail::CheckFirstPage() {
if (!linearized_->GetFirstPageEndOffset() || !linearized_->GetFileSize() ||
!linearized_->GetMainXRefTableFirstEntryOffset()) {
internal_status_ = InternalStatus::kError;
return false;
}
uint32_t dwEnd = linearized_->GetFirstPageEndOffset();
dwEnd += 512;
if ((FX_FILESIZE)dwEnd > file_len_) {
dwEnd = (uint32_t)file_len_;
}
const FX_FILESIZE start_pos = file_len_ > 1024 ? 1024 : file_len_;
const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
data_size)) {
return false;
}
internal_status_ = InternalStatus::kHintTable;
return true;
}
bool CPDF_DataAvail::CheckHintTables() {
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
hint_tables_ = CPDF_HintTables::Parse(GetSyntaxParser(), linearized_.get());
if (GetValidator()->read_error()) {
internal_status_ = InternalStatus::kError;
return true;
}
if (GetValidator()->has_unavailable_data()) {
return false;
}
internal_status_ = InternalStatus::kDone;
return true;
}
RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
FX_FILESIZE pos,
uint32_t objnum,
CPDF_IndirectObjectHolder* pObjList) const {
const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
GetSyntaxParser()->SetPos(pos);
RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
pObjList, CPDF_SyntaxParser::ParseType::kLoose);
GetSyntaxParser()->SetPos(SavedPos);
return (result && (!objnum || result->GetObjNum() == objnum))
? std::move(result)
: nullptr;
}
CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
switch (CheckHeaderAndLinearized()) {
case kDataAvailable:
return linearized_ ? kLinearized : kNotLinearized;
case kDataNotAvailable:
return kLinearizationUnknown;
case kDataError:
return kNotLinearized;
}
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
if (header_avail_) {
return kDataAvailable;
}
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
const std::optional<FX_FILESIZE> header_offset =
GetHeaderOffset(GetValidator());
if (GetValidator()->has_read_problems()) {
return kDataNotAvailable;
}
if (!header_offset.has_value()) {
return kDataError;
}
parser_.syntax_ = std::make_unique<CPDF_SyntaxParser>(GetValidator(),
header_offset.value());
linearized_ = parser_.ParseLinearizedHeader();
if (GetValidator()->has_read_problems()) {
return kDataNotAvailable;
}
header_avail_ = true;
return kDataAvailable;
}
bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
while (true) {
switch (internal_status_) {
case InternalStatus::kPageTree:
if (!LoadDocPages()) {
return false;
}
break;
case InternalStatus::kPage:
if (!LoadDocPage(dwPage)) {
return false;
}
break;
case InternalStatus::kError:
return LoadAllFile();
default:
pages_tree_load_ = true;
pages_load_ = true;
cur_page_dict_load_ok_ = true;
internal_status_ = InternalStatus::kPage;
return true;
}
}
}
bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
PageNode* pPageNode) {
bool bExists = false;
RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
if (!bExists) {
internal_status_ = InternalStatus::kError;
return false;
}
if (!pPages) {
return false;
}
const CPDF_Array* pArray = pPages->AsArray();
if (!pArray) {
internal_status_ = InternalStatus::kError;
return false;
}
pPageNode->type_ = PageNode::Type::kPages;
for (size_t i = 0; i < pArray->size(); ++i) {
RetainPtr<const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
if (!pKid) {
continue;
}
auto pNode = std::make_unique<PageNode>();
pNode->page_no_ = pKid->GetRefObjNum();
pPageNode->child_nodes_.push_back(std::move(pNode));
}
return true;
}
bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
PageNode* pPageNode) {
bool bExists = false;
RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
if (!bExists) {
internal_status_ = InternalStatus::kError;
return false;
}
if (!pPage) {
return false;
}
if (pPage->IsArray()) {
pPageNode->page_no_ = dwPageNo;
pPageNode->type_ = PageNode::Type::kArray;
return true;
}
if (!pPage->IsDictionary()) {
internal_status_ = InternalStatus::kError;
return false;
}
pPageNode->page_no_ = dwPageNo;
RetainPtr<CPDF_Dictionary> dict = pPage->GetMutableDict();
const ByteString type = dict->GetNameFor("Type");
if (type == "Page") {
pPageNode->type_ = PageNode::Type::kPage;
return true;
}
if (type != "Pages") {
internal_status_ = InternalStatus::kError;
return false;
}
pPageNode->type_ = PageNode::Type::kPages;
RetainPtr<CPDF_Object> pKids = dict->GetMutableObjectFor("Kids");
if (!pKids) {
internal_status_ = InternalStatus::kPage;
return true;
}
switch (pKids->GetType()) {
case CPDF_Object::kReference: {
const CPDF_Reference* pKid = pKids->AsReference();
auto pNode = std::make_unique<PageNode>();
pNode->page_no_ = pKid->GetRefObjNum();
pPageNode->child_nodes_.push_back(std::move(pNode));
break;
}
case CPDF_Object::kArray: {
const CPDF_Array* pKidsArray = pKids->AsArray();
for (size_t i = 0; i < pKidsArray->size(); ++i) {
RetainPtr<const CPDF_Reference> pKid =
ToReference(pKidsArray->GetObjectAt(i));
if (!pKid) {
continue;
}
auto pNode = std::make_unique<PageNode>();
pNode->page_no_ = pKid->GetRefObjNum();
pPageNode->child_nodes_.push_back(std::move(pNode));
}
break;
}
default:
break;
}
return true;
}
bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
int32_t iPage,
int32_t& iCount,
int level) {
if (level >= kMaxPageRecursionDepth) {
return false;
}
int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.child_nodes_);
if (iSize <= 0 || iPage >= iSize) {
internal_status_ = InternalStatus::kError;
return false;
}
for (int32_t i = 0; i < iSize; ++i) {
PageNode* pNode = pageNode.child_nodes_[i].get();
if (!pNode) {
continue;
}
if (pNode->type_ == PageNode::Type::kUnknown) {
// Updates the type for the unknown page node.
if (!CheckUnknownPageNode(pNode->page_no_, pNode)) {
return false;
}
}
if (pNode->type_ == PageNode::Type::kArray) {
// Updates a more specific type for the array page node.
if (!CheckArrayPageNode(pNode->page_no_, pNode)) {
return false;
}
}
switch (pNode->type_) {
case PageNode::Type::kPage:
iCount++;
if (iPage == iCount && document_) {
document_->SetPageObjNum(iPage, pNode->page_no_);
}
break;
case PageNode::Type::kPages:
if (!CheckPageNode(*pNode, iPage, iCount, level + 1)) {
return false;
}
break;
case PageNode::Type::kUnknown:
case PageNode::Type::kArray:
// Already converted above, error if we get here.
return false;
}
if (iPage == iCount) {
internal_status_ = InternalStatus::kDone;
return true;
}
}
return true;
}
bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
int iPage = pdfium::checked_cast<int>(dwPage);
if (document_->GetPageCount() <= iPage || document_->IsPageLoaded(iPage)) {
internal_status_ = InternalStatus::kDone;
return true;
}
if (page_node_.type_ == PageNode::Type::kPage) {
internal_status_ =
iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
return true;
}
int32_t iCount = -1;
return CheckPageNode(page_node_, iPage, iCount, 0);
}
bool CPDF_DataAvail::CheckPageCount() {
bool bExists = false;
RetainPtr<CPDF_Object> pPages = GetObject(pages_obj_num_, &bExists);
if (!bExists) {
internal_status_ = InternalStatus::kError;
return false;
}
if (!pPages) {
return false;
}
RetainPtr<const CPDF_Dictionary> pPagesDict = pPages->GetDict();
if (!pPagesDict) {
internal_status_ = InternalStatus::kError;
return false;
}
if (!pPagesDict->KeyExist("Kids")) {
return true;
}
return pPagesDict->GetIntegerFor("Count") > 0;
}
bool CPDF_DataAvail::LoadDocPages() {
if (!CheckUnknownPageNode(pages_obj_num_, &page_node_)) {
return false;
}
if (CheckPageCount()) {
internal_status_ = InternalStatus::kPage;
return true;
}
total_load_page_tree_ = true;
return false;
}
bool CPDF_DataAvail::LoadPages() {
while (!pages_tree_load_) {
if (!CheckPageStatus()) {
return false;
}
}
if (pages_load_) {
return true;
}
document_->LoadPages();
return false;
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
if (lineared_data_ok_) {
return kDataAvailable;
}
DCHECK(linearized_);
if (!linearized_->GetMainXRefTableFirstEntryOffset() || !document_ ||
!document_->GetParser() || !document_->GetParser()->GetTrailer()) {
return kDataError;
}
if (!main_xref_load_tried_) {
const FX_SAFE_FILESIZE prev =
document_->GetParser()->GetTrailer()->GetIntegerFor("Prev");
const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
if (main_xref_offset < 0) {
return kDataError;
}
if (main_xref_offset == 0) {
return kDataAvailable;
}
FX_SAFE_SIZE_T data_size = file_len_;
data_size -= main_xref_offset;
if (!data_size.IsValid()) {
return kDataError;
}
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
main_xref_offset, data_size.ValueOrDie())) {
return kDataNotAvailable;
}
CPDF_Parser::Error eRet =
document_->GetParser()->LoadLinearizedMainXRefTable();
main_xref_load_tried_ = true;
if (eRet != CPDF_Parser::SUCCESS) {
return kDataError;
}
if (!PreparePageItem()) {
return kDataNotAvailable;
}
main_xref_loaded_ok_ = true;
lineared_data_ok_ = true;
}
return lineared_data_ok_ ? kDataAvailable : kDataNotAvailable;
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
uint32_t dwPage,
DownloadHints* pHints) {
if (!document_) {
return kDataError;
}
const int iPage = pdfium::checked_cast<int>(dwPage);
if (iPage >= document_->GetPageCount()) {
// This is XFA page.
return kDataAvailable;
}
if (IsFirstCheck(dwPage)) {
cur_page_dict_load_ok_ = false;
}
if (pdfium::Contains(pages_load_state_, dwPage)) {
return kDataAvailable;
}
const HintsScope hints_scope(GetValidator(), pHints);
if (linearized_) {
if (dwPage == linearized_->GetFirstPageNo()) {
RetainPtr<const CPDF_Dictionary> pPageDict =
document_->GetPageDictionary(iPage);
if (!pPageDict) {
return kDataError;
}
auto page_num_obj =
std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), document_, pPageDict));
CPDF_PageObjectAvail* page_obj_avail =
pages_obj_avail_.insert(std::move(page_num_obj)).first->second.get();
// TODO(art-snake): Check resources.
return page_obj_avail->CheckAvail();
}
DocAvailStatus nResult = CheckLinearizedData();
if (nResult != kDataAvailable) {
return nResult;
}
if (hint_tables_) {
nResult = hint_tables_->CheckPage(dwPage);
if (nResult != kDataAvailable) {
return nResult;
}
if (GetPageDictionary(dwPage)) {
pages_load_state_.insert(dwPage);
return kDataAvailable;
}
}
if (!main_xref_loaded_ok_) {
if (!LoadAllFile()) {
return kDataNotAvailable;
}
document_->GetParser()->RebuildCrossRef();
ResetFirstCheck(dwPage);
return kDataAvailable;
}
if (total_load_page_tree_) {
if (!LoadPages()) {
return kDataNotAvailable;
}
} else {
if (!cur_page_dict_load_ok_ && !CheckPage(dwPage)) {
return kDataNotAvailable;
}
}
} else {
if (!total_load_page_tree_ && !cur_page_dict_load_ok_ &&
!CheckPage(dwPage)) {
return kDataNotAvailable;
}
}
if (CheckAcroForm() == kFormNotAvailable) {
return kDataNotAvailable;
}
RetainPtr<CPDF_Dictionary> pPageDict =
document_->GetMutablePageDictionary(iPage);
if (!pPageDict) {
return kDataError;
}
{
auto page_num_obj =
std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), document_, pPageDict));
CPDF_PageObjectAvail* page_obj_avail =
pages_obj_avail_.insert(std::move(page_num_obj)).first->second.get();
const DocAvailStatus status = page_obj_avail->CheckAvail();
if (status != kDataAvailable) {
return status;
}
}
const DocAvailStatus resources_status = CheckResources(std::move(pPageDict));
if (resources_status != kDataAvailable) {
return resources_status;
}
cur_page_dict_load_ok_ = false;
ResetFirstCheck(dwPage);
pages_load_state_.insert(dwPage);
return kDataAvailable;
}
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
RetainPtr<CPDF_Dictionary> page) {
DCHECK(page);
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
RetainPtr<CPDF_Object> resources = GetResourceObject(std::move(page));
if (GetValidator()->has_read_problems()) {
return kDataNotAvailable;
}
if (!resources) {
return kDataAvailable;
}
CPDF_PageObjectAvail* resource_avail =
pages_resources_avail_
.insert(std::make_pair(resources,
std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), document_, resources)))
.first->second.get();
return resource_avail->CheckAvail();
}
RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
return file_read_;
}
CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
return document_ ? document_->GetParser()->syntax_.get()
: parser_.syntax_.get();
}
int CPDF_DataAvail::GetPageCount() const {
if (linearized_) {
return linearized_->GetPageCount();
}
return document_ ? document_->GetPageCount() : 0;
}
RetainPtr<const CPDF_Dictionary> CPDF_DataAvail::GetPageDictionary(
int index) const {
if (!document_ || index < 0 || index >= GetPageCount()) {
return nullptr;
}
RetainPtr<const CPDF_Dictionary> page = document_->GetPageDictionary(index);
if (page) {
return page;
}
if (!linearized_ || !hint_tables_) {
return nullptr;
}
if (index == static_cast<int>(linearized_->GetFirstPageNo())) {
return nullptr;
}
FX_FILESIZE szPageStartPos = 0;
FX_FILESIZE szPageLength = 0;
uint32_t dwObjNum = 0;
const bool bPagePosGot = hint_tables_->GetPagePos(index, &szPageStartPos,
&szPageLength, &dwObjNum);
if (!bPagePosGot || !dwObjNum) {
return nullptr;
}
// We should say to the document, which object is the page.
document_->SetPageObjNum(index, dwObjNum);
// Page object already can be parsed in document.
if (!document_->GetIndirectObject(dwObjNum)) {
document_->ReplaceIndirectObjectIfHigherGeneration(
dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, document_));
}
if (!ValidatePage(index)) {
return nullptr;
}
return document_->GetPageDictionary(index);
}
CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
DownloadHints* pHints) {
const HintsScope hints_scope(GetValidator(), pHints);
return CheckAcroForm();
}
CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
if (!document_) {
return kFormAvailable;
}
if (linearized_) {
DocAvailStatus nDocStatus = CheckLinearizedData();
if (nDocStatus == kDataError) {
return kFormError;
}
if (nDocStatus == kDataNotAvailable) {
return kFormNotAvailable;
}
}
if (!form_avail_) {
const CPDF_Dictionary* pRoot = document_->GetRoot();
if (!pRoot) {
return kFormAvailable;
}
RetainPtr<const CPDF_Object> pAcroForm = pRoot->GetObjectFor("AcroForm");
if (!pAcroForm) {
return kFormNotExist;
}
form_avail_ = std::make_unique<CPDF_PageObjectAvail>(
GetValidator(), document_, std::move(pAcroForm));
}
switch (form_avail_->CheckAvail()) {
case kDataError:
return kFormError;
case kDataNotAvailable:
return kFormNotAvailable;
case kDataAvailable:
return kFormAvailable;
}
}
bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
int iPage = pdfium::checked_cast<int>(dwPage);
RetainPtr<const CPDF_Dictionary> pPageDict =
document_->GetPageDictionary(iPage);
if (!pPageDict) {
return false;
}
CPDF_PageObjectAvail obj_avail(GetValidator(), document_,
std::move(pPageDict));
return obj_avail.CheckAvail() == kDataAvailable;
}
std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
CPDF_DataAvail::ParseDocument(
std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
const ByteString& password) {
if (document_) {
// We already returned parsed document.
return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
}
auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
std::move(pPageData));
document->AddObserver(this);
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
CPDF_Parser::Error error =
document->LoadLinearizedDoc(GetValidator(), password);
// Additional check, that all ok.
if (GetValidator()->has_read_problems()) {
// TODO(crbug.com/42271016): Figure out if this should be a CHECK() or the
// DCHECK() removed.
DCHECK(false);
return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
}
if (error != CPDF_Parser::SUCCESS) {
return std::make_pair(error, nullptr);
}
document_ = document.get();
return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
}
CPDF_DataAvail::PageNode::PageNode() = default;
CPDF_DataAvail::PageNode::~PageNode() = default;