blob: 6489b1ed980fd6fbe147226fc78dd54a7b7d6c6d [file] [log] [blame]
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/include/fpdfapi/fpdf_parser.h"
#include <algorithm>
#include <memory>
#include <set>
#include <utility>
#include <vector>
#include "core/include/fpdfapi/fpdf_module.h"
#include "core/include/fpdfapi/fpdf_page.h"
#include "core/include/fxcrt/fx_ext.h"
#include "core/include/fxcrt/fx_safe_types.h"
#include "core/src/fpdfapi/fpdf_page/pageint.h"
#include "core/src/fpdfapi/fpdf_parser/parser_int.h"
#include "third_party/base/stl_util.h"
namespace {
// A limit on the size of the xref table. Theoretical limits are higher, but
// this may be large enough in practice.
const int32_t kMaxXRefSize = 1048576;
// A limit on the maximum object number in the xref table. Theoretical limits
// are higher, but this may be large enough in practice.
const FX_DWORD kMaxObjectNumber = 1048576;
struct SearchTagRecord {
const char* m_pTag;
FX_DWORD m_Len;
FX_DWORD m_Offset;
};
int32_t GetHeaderOffset(IFX_FileRead* pFile) {
const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
const size_t kBufSize = 4;
uint8_t buf[kBufSize];
int32_t offset = 0;
while (offset <= 1024) {
if (!pFile->ReadBlock(buf, offset, kBufSize))
return -1;
if (*(FX_DWORD*)buf == tag)
return offset;
++offset;
}
return -1;
}
int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
return pObj ? pObj->GetInteger() : 0;
}
FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
FX_DWORD result = 0;
for (int32_t i = 0; i < n; ++i)
result = result * 256 + p[i];
return result;
}
int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
return pObjStream->GetDict()->GetIntegerBy("N");
}
int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
return pObjStream->GetDict()->GetIntegerBy("First");
}
bool CanReadFromBitStream(const CFX_BitStream* hStream,
const FX_SAFE_DWORD& num_bits) {
return (num_bits.IsValid() &&
hStream->BitsRemaining() >= num_bits.ValueOrDie());
}
} // namespace
// TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal.
// Come up or wait for something better.
using ScopedFileStream =
std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
bool IsSignatureDict(const CPDF_Dictionary* pDict) {
CPDF_Object* pType = pDict->GetElementValue("Type");
if (!pType)
pType = pDict->GetElementValue("FT");
return pType && pType->GetString() == "Sig";
}
CPDF_Parser::CPDF_Parser()
: m_pDocument(nullptr),
m_bOwnFileRead(true),
m_FileVersion(0),
m_pTrailer(nullptr),
m_pEncryptDict(nullptr),
m_pLinearized(nullptr),
m_dwFirstPageNo(0),
m_dwXrefStartObjNum(0) {}
CPDF_Parser::~CPDF_Parser() {
CloseParser();
}
FX_DWORD CPDF_Parser::GetLastObjNum() const {
return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
}
bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
}
FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
auto it = m_ObjectInfo.find(objnum);
return it != m_ObjectInfo.end() ? it->second.pos : 0;
}
uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const {
ASSERT(IsValidObjectNumber(objnum));
auto it = m_ObjectInfo.find(objnum);
return it != m_ObjectInfo.end() ? it->second.type : 0;
}
uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const {
ASSERT(IsValidObjectNumber(objnum));
auto it = m_ObjectInfo.find(objnum);
return it != m_ObjectInfo.end() ? it->second.gennum : 0;
}
bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {
uint8_t type = GetObjectType(objnum);
return type == 0 || type == 255;
}
void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
m_pEncryptDict = pDict;
}
void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
if (objnum == 0) {
m_ObjectInfo.clear();
return;
}
auto it = m_ObjectInfo.lower_bound(objnum);
while (it != m_ObjectInfo.end()) {
auto saved_it = it++;
m_ObjectInfo.erase(saved_it);
}
if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
m_ObjectInfo[objnum - 1].pos = 0;
}
void CPDF_Parser::CloseParser() {
m_bVersionUpdated = FALSE;
delete m_pDocument;
m_pDocument = nullptr;
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = nullptr;
}
ReleaseEncryptHandler();
SetEncryptDictionary(nullptr);
if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {
m_Syntax.m_pFileAccess->Release();
m_Syntax.m_pFileAccess = nullptr;
}
m_ObjectStreamMap.clear();
m_ObjCache.clear();
m_SortedOffset.clear();
m_ObjectInfo.clear();
int32_t iLen = m_Trailers.GetSize();
for (int32_t i = 0; i < iLen; ++i) {
if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
trailer->Release();
}
m_Trailers.RemoveAll();
if (m_pLinearized) {
m_pLinearized->Release();
m_pLinearized = nullptr;
}
}
CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
CloseParser();
m_bXRefStream = FALSE;
m_LastXRefOffset = 0;
m_bOwnFileRead = true;
int32_t offset = GetHeaderOffset(pFileAccess);
if (offset == -1) {
if (pFileAccess)
pFileAccess->Release();
return FORMAT_ERROR;
}
m_Syntax.InitParser(pFileAccess, offset);
uint8_t ch;
if (!m_Syntax.GetCharAt(5, ch))
return FORMAT_ERROR;
if (std::isdigit(ch))
m_FileVersion = FXSYS_toDecimalDigit(ch) * 10;
if (!m_Syntax.GetCharAt(7, ch))
return FORMAT_ERROR;
if (std::isdigit(ch))
m_FileVersion += FXSYS_toDecimalDigit(ch);
if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9)
return FORMAT_ERROR;
m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
m_pDocument = new CPDF_Document(this);
FX_BOOL bXRefRebuilt = FALSE;
if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) {
m_SortedOffset.insert(m_Syntax.SavePos());
m_Syntax.GetKeyword();
bool bNumber;
CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber);
if (!bNumber)
return FORMAT_ERROR;
m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
!LoadAllCrossRefV5(m_LastXRefOffset)) {
if (!RebuildCrossRef())
return FORMAT_ERROR;
bXRefRebuilt = TRUE;
m_LastXRefOffset = 0;
}
} else {
if (!RebuildCrossRef())
return FORMAT_ERROR;
bXRefRebuilt = TRUE;
}
Error eRet = SetEncryptHandler();
if (eRet != SUCCESS)
return eRet;
m_pDocument->LoadDoc();
if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
if (bXRefRebuilt)
return FORMAT_ERROR;
ReleaseEncryptHandler();
if (!RebuildCrossRef())
return FORMAT_ERROR;
eRet = SetEncryptHandler();
if (eRet != SUCCESS)
return eRet;
m_pDocument->LoadDoc();
if (!m_pDocument->GetRoot())
return FORMAT_ERROR;
}
if (GetRootObjNum() == 0) {
ReleaseEncryptHandler();
if (!RebuildCrossRef() || GetRootObjNum() == 0)
return FORMAT_ERROR;
eRet = SetEncryptHandler();
if (eRet != SUCCESS)
return eRet;
}
if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
CPDF_Reference* pMetadata =
ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
if (pMetadata)
m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
}
return SUCCESS;
}
CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
ReleaseEncryptHandler();
SetEncryptDictionary(NULL);
if (!m_pTrailer) {
return FORMAT_ERROR;
}
CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
if (pEncryptObj) {
if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
SetEncryptDictionary(pEncryptDict);
} else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum());
if (pEncryptObj)
SetEncryptDictionary(pEncryptObj->GetDict());
}
}
if (m_pEncryptDict) {
CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter");
std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler;
Error err = HANDLER_ERROR;
if (filter == "Standard") {
pSecurityHandler.reset(new CPDF_StandardSecurityHandler);
err = PASSWORD_ERROR;
}
if (!pSecurityHandler) {
return HANDLER_ERROR;
}
if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
return err;
}
m_pSecurityHandler = std::move(pSecurityHandler);
std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
m_pSecurityHandler->CreateCryptoHandler());
if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
return HANDLER_ERROR;
m_Syntax.SetEncrypt(std::move(pCryptoHandler));
}
return SUCCESS;
}
void CPDF_Parser::ReleaseEncryptHandler() {
m_Syntax.m_pCryptoHandler.reset();
m_pSecurityHandler.reset();
}
FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
if (!IsValidObjectNumber(objnum))
return 0;
if (GetObjectType(objnum) == 1)
return GetObjectPositionOrZero(objnum);
if (GetObjectType(objnum) == 2) {
FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
return GetObjectPositionOrZero(pos);
}
return 0;
}
FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
if (!LoadCrossRefV4(xrefpos, 0, TRUE)) {
return FALSE;
}
m_pTrailer = LoadTrailerV4();
if (!m_pTrailer) {
return FALSE;
}
int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
ShrinkObjectMap(xrefsize);
std::vector<FX_FILESIZE> CrossRefList;
std::vector<FX_FILESIZE> XRefStreamList;
std::set<FX_FILESIZE> seen_xrefpos;
CrossRefList.push_back(xrefpos);
XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
seen_xrefpos.insert(xrefpos);
// When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
// numerical, GetDirectInteger() returns 0. Loading will end.
xrefpos = GetDirectInteger(m_pTrailer, "Prev");
while (xrefpos) {
// Check for circular references.
if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
return FALSE;
seen_xrefpos.insert(xrefpos);
// SLOW ...
CrossRefList.insert(CrossRefList.begin(), xrefpos);
LoadCrossRefV4(xrefpos, 0, TRUE);
std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
LoadTrailerV4());
if (!pDict)
return FALSE;
xrefpos = GetDirectInteger(pDict.get(), "Prev");
// SLOW ...
XRefStreamList.insert(XRefStreamList.begin(),
pDict->GetIntegerBy("XRefStm"));
m_Trailers.Add(pDict.release());
}
for (size_t i = 0; i < CrossRefList.size(); ++i) {
if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
return FALSE;
}
return TRUE;
}
FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
FX_DWORD dwObjCount) {
if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
return FALSE;
}
m_pTrailer = LoadTrailerV4();
if (!m_pTrailer) {
return FALSE;
}
int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
if (xrefsize == 0) {
return FALSE;
}
std::vector<FX_FILESIZE> CrossRefList;
std::vector<FX_FILESIZE> XRefStreamList;
std::set<FX_FILESIZE> seen_xrefpos;
CrossRefList.push_back(xrefpos);
XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
seen_xrefpos.insert(xrefpos);
xrefpos = GetDirectInteger(m_pTrailer, "Prev");
while (xrefpos) {
// Check for circular references.
if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
return FALSE;
seen_xrefpos.insert(xrefpos);
// SLOW ...
CrossRefList.insert(CrossRefList.begin(), xrefpos);
LoadCrossRefV4(xrefpos, 0, TRUE);
std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
LoadTrailerV4());
if (!pDict) {
return FALSE;
}
xrefpos = GetDirectInteger(pDict.get(), "Prev");
// SLOW ...
XRefStreamList.insert(XRefStreamList.begin(),
pDict->GetIntegerBy("XRefStm"));
m_Trailers.Add(pDict.release());
}
for (size_t i = 1; i < CrossRefList.size(); ++i)
if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) {
return FALSE;
}
return TRUE;
}
FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
FX_DWORD dwObjCount) {
FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
m_Syntax.RestorePos(dwStartPos);
m_SortedOffset.insert(pos);
FX_DWORD start_objnum = 0;
FX_DWORD count = dwObjCount;
FX_FILESIZE SavedPos = m_Syntax.SavePos();
const int32_t recordsize = 20;
std::vector<char> buf(1024 * recordsize + 1);
buf[1024 * recordsize] = '\0';
int32_t nBlocks = count / 1024 + 1;
for (int32_t block = 0; block < nBlocks; block++) {
int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
FX_DWORD dwReadSize = block_size * recordsize;
if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
return FALSE;
}
if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
dwReadSize)) {
return FALSE;
}
for (int32_t i = 0; i < block_size; i++) {
FX_DWORD objnum = start_objnum + block * 1024 + i;
char* pEntry = &buf[i * recordsize];
if (pEntry[17] == 'f') {
m_ObjectInfo[objnum].pos = 0;
m_ObjectInfo[objnum].type = 0;
} else {
int32_t offset = FXSYS_atoi(pEntry);
if (offset == 0) {
for (int32_t c = 0; c < 10; c++) {
if (!std::isdigit(pEntry[c]))
return FALSE;
}
}
m_ObjectInfo[objnum].pos = offset;
int32_t version = FXSYS_atoi(pEntry + 11);
if (version >= 1) {
m_bVersionUpdated = TRUE;
}
m_ObjectInfo[objnum].gennum = version;
if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) {
m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
}
m_ObjectInfo[objnum].type = 1;
}
}
}
m_Syntax.RestorePos(SavedPos + count * recordsize);
return TRUE;
}
bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
FX_FILESIZE streampos,
FX_BOOL bSkip) {
m_Syntax.RestorePos(pos);
if (m_Syntax.GetKeyword() != "xref")
return false;
m_SortedOffset.insert(pos);
if (streampos)
m_SortedOffset.insert(streampos);
while (1) {
FX_FILESIZE SavedPos = m_Syntax.SavePos();
bool bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
if (word.IsEmpty())
return false;
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
break;
}
FX_DWORD start_objnum = FXSYS_atoi(word);
if (start_objnum >= kMaxObjectNumber)
return false;
FX_DWORD count = m_Syntax.GetDirectNum();
m_Syntax.ToNextWord();
SavedPos = m_Syntax.SavePos();
const int32_t recordsize = 20;
m_dwXrefStartObjNum = start_objnum;
if (!bSkip) {
std::vector<char> buf(1024 * recordsize + 1);
buf[1024 * recordsize] = '\0';
int32_t nBlocks = count / 1024 + 1;
for (int32_t block = 0; block < nBlocks; block++) {
int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
block_size * recordsize);
for (int32_t i = 0; i < block_size; i++) {
FX_DWORD objnum = start_objnum + block * 1024 + i;
char* pEntry = &buf[i * recordsize];
if (pEntry[17] == 'f') {
m_ObjectInfo[objnum].pos = 0;
m_ObjectInfo[objnum].type = 0;
} else {
FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
if (offset == 0) {
for (int32_t c = 0; c < 10; c++) {
if (!std::isdigit(pEntry[c]))
return false;
}
}
m_ObjectInfo[objnum].pos = offset;
int32_t version = FXSYS_atoi(pEntry + 11);
if (version >= 1) {
m_bVersionUpdated = TRUE;
}
m_ObjectInfo[objnum].gennum = version;
if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) {
m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
}
m_ObjectInfo[objnum].type = 1;
}
}
}
}
m_Syntax.RestorePos(SavedPos + count * recordsize);
}
return !streampos || LoadCrossRefV5(&streampos, FALSE);
}
FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
if (!LoadCrossRefV5(&xrefpos, TRUE)) {
return FALSE;
}
std::set<FX_FILESIZE> seen_xrefpos;
while (xrefpos) {
seen_xrefpos.insert(xrefpos);
if (!LoadCrossRefV5(&xrefpos, FALSE)) {
return FALSE;
}
// Check for circular references.
if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) {
return FALSE;
}
}
m_ObjectStreamMap.clear();
m_bXRefStream = TRUE;
return TRUE;
}
FX_BOOL CPDF_Parser::RebuildCrossRef() {
m_ObjectInfo.clear();
m_SortedOffset.clear();
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = NULL;
}
int32_t status = 0;
int32_t inside_index = 0;
FX_DWORD objnum = 0;
FX_DWORD gennum = 0;
int32_t depth = 0;
const FX_DWORD kBufferSize = 4096;
std::vector<uint8_t> buffer(kBufferSize);
FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
FX_FILESIZE start_pos = 0;
FX_FILESIZE start_pos1 = 0;
FX_FILESIZE last_obj = -1;
FX_FILESIZE last_xref = -1;
FX_FILESIZE last_trailer = -1;
while (pos < m_Syntax.m_FileLen) {
const FX_FILESIZE saved_pos = pos;
bool bOverFlow = false;
FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);
if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))
break;
for (FX_DWORD i = 0; i < size; i++) {
uint8_t byte = buffer[i];
switch (status) {
case 0:
if (PDFCharIsWhitespace(byte))
status = 1;
if (std::isdigit(byte)) {
--i;
status = 1;
}
if (byte == '%') {
inside_index = 0;
status = 9;
}
if (byte == '(') {
status = 10;
depth = 1;
}
if (byte == '<') {
inside_index = 1;
status = 11;
}
if (byte == '\\')
status = 13;
if (byte == 't') {
status = 7;
inside_index = 1;
}
break;
case 1:
if (PDFCharIsWhitespace(byte)) {
break;
} else if (std::isdigit(byte)) {
start_pos = pos + i;
status = 2;
objnum = FXSYS_toDecimalDigit(byte);
} else if (byte == 't') {
status = 7;
inside_index = 1;
} else if (byte == 'x') {
status = 8;
inside_index = 1;
} else {
--i;
status = 0;
}
break;
case 2:
if (std::isdigit(byte)) {
objnum = objnum * 10 + FXSYS_toDecimalDigit(byte);
break;
} else if (PDFCharIsWhitespace(byte)) {
status = 3;
} else {
--i;
status = 14;
inside_index = 0;
}
break;
case 3:
if (std::isdigit(byte)) {
start_pos1 = pos + i;
status = 4;
gennum = FXSYS_toDecimalDigit(byte);
} else if (PDFCharIsWhitespace(byte)) {
break;
} else if (byte == 't') {
status = 7;
inside_index = 1;
} else {
--i;
status = 0;
}
break;
case 4:
if (std::isdigit(byte)) {
gennum = gennum * 10 + FXSYS_toDecimalDigit(byte);
break;
} else if (PDFCharIsWhitespace(byte)) {
status = 5;
} else {
--i;
status = 0;
}
break;
case 5:
if (byte == 'o') {
status = 6;
inside_index = 1;
} else if (PDFCharIsWhitespace(byte)) {
break;
} else if (std::isdigit(byte)) {
objnum = gennum;
gennum = FXSYS_toDecimalDigit(byte);
start_pos = start_pos1;
start_pos1 = pos + i;
status = 4;
} else if (byte == 't') {
status = 7;
inside_index = 1;
} else {
--i;
status = 0;
}
break;
case 6:
switch (inside_index) {
case 1:
if (byte != 'b') {
--i;
status = 0;
} else {
inside_index++;
}
break;
case 2:
if (byte != 'j') {
--i;
status = 0;
} else {
inside_index++;
}
break;
case 3:
if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
if (objnum > 0x1000000) {
status = 0;
break;
}
FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
m_SortedOffset.insert(obj_pos);
last_obj = start_pos;
FX_FILESIZE obj_end = 0;
CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
m_pDocument, obj_pos, objnum, &obj_end);
if (CPDF_Stream* pStream = ToStream(pObject)) {
if (CPDF_Dictionary* pDict = pStream->GetDict()) {
if ((pDict->KeyExist("Type")) &&
(pDict->GetStringBy("Type") == "XRef" &&
pDict->KeyExist("Size"))) {
CPDF_Object* pRoot = pDict->GetElement("Root");
if (pRoot && pRoot->GetDict() &&
pRoot->GetDict()->GetElement("Pages")) {
if (m_pTrailer)
m_pTrailer->Release();
m_pTrailer = ToDictionary(pDict->Clone());
}
}
}
}
FX_FILESIZE offset = 0;
m_Syntax.RestorePos(obj_pos);
offset = m_Syntax.FindTag("obj", 0);
if (offset == -1) {
offset = 0;
} else {
offset += 3;
}
FX_FILESIZE nLen = obj_end - obj_pos - offset;
if ((FX_DWORD)nLen > size - i) {
pos = obj_end + m_Syntax.m_HeaderOffset;
bOverFlow = true;
} else {
i += (FX_DWORD)nLen;
}
if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
m_ObjectInfo[objnum].pos) {
if (pObject) {
FX_DWORD oldgen = GetObjectGenNum(objnum);
m_ObjectInfo[objnum].pos = obj_pos;
m_ObjectInfo[objnum].gennum = gennum;
if (oldgen != gennum) {
m_bVersionUpdated = TRUE;
}
}
} else {
m_ObjectInfo[objnum].pos = obj_pos;
m_ObjectInfo[objnum].type = 1;
m_ObjectInfo[objnum].gennum = gennum;
}
if (pObject) {
pObject->Release();
}
}
--i;
status = 0;
break;
}
break;
case 7:
if (inside_index == 7) {
if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
last_trailer = pos + i - 7;
m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true);
if (pObj) {
if (!pObj->IsDictionary() && !pObj->AsStream()) {
pObj->Release();
} else {
CPDF_Stream* pStream = pObj->AsStream();
if (CPDF_Dictionary* pTrailer =
pStream ? pStream->GetDict() : pObj->AsDictionary()) {
if (m_pTrailer) {
CPDF_Object* pRoot = pTrailer->GetElement("Root");
CPDF_Reference* pRef = ToReference(pRoot);
if (!pRoot ||
(pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
auto it = pTrailer->begin();
while (it != pTrailer->end()) {
const CFX_ByteString& key = it->first;
CPDF_Object* pElement = it->second;
++it;
FX_DWORD dwObjNum =
pElement ? pElement->GetObjNum() : 0;
if (dwObjNum) {
m_pTrailer->SetAtReference(key, m_pDocument,
dwObjNum);
} else {
m_pTrailer->SetAt(key, pElement->Clone());
}
}
pObj->Release();
} else {
pObj->Release();
}
} else {
if (pObj->IsStream()) {
m_pTrailer = ToDictionary(pTrailer->Clone());
pObj->Release();
} else {
m_pTrailer = pTrailer;
}
FX_FILESIZE dwSavePos = m_Syntax.SavePos();
CFX_ByteString strWord = m_Syntax.GetKeyword();
if (!strWord.Compare("startxref")) {
bool bNumber;
CFX_ByteString bsOffset =
m_Syntax.GetNextWord(&bNumber);
if (bNumber) {
m_LastXRefOffset = FXSYS_atoi(bsOffset);
}
}
m_Syntax.RestorePos(dwSavePos);
}
} else {
pObj->Release();
}
}
}
}
--i;
status = 0;
} else if (byte == "trailer"[inside_index]) {
inside_index++;
} else {
--i;
status = 0;
}
break;
case 8:
if (inside_index == 4) {
last_xref = pos + i - 4;
status = 1;
} else if (byte == "xref"[inside_index]) {
inside_index++;
} else {
--i;
status = 0;
}
break;
case 9:
if (byte == '\r' || byte == '\n') {
status = 0;
}
break;
case 10:
if (byte == ')') {
if (depth > 0) {
depth--;
}
} else if (byte == '(') {
depth++;
}
if (!depth) {
status = 0;
}
break;
case 11:
if (byte == '>' || (byte == '<' && inside_index == 1))
status = 0;
inside_index = 0;
break;
case 13:
if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
--i;
status = 0;
}
break;
case 14:
if (PDFCharIsWhitespace(byte)) {
status = 0;
} else if (byte == '%' || byte == '(' || byte == '<' ||
byte == '\\') {
status = 0;
--i;
} else if (inside_index == 6) {
status = 0;
--i;
} else if (byte == "endobj"[inside_index]) {
inside_index++;
}
break;
}
if (bOverFlow) {
size = 0;
break;
}
}
pos += size;
// If the position has not changed at all in a loop iteration, then break
// out to prevent infinite looping.
if (pos == saved_pos)
break;
}
if (last_xref != -1 && last_xref > last_obj) {
last_trailer = last_xref;
} else if (last_trailer == -1 || last_xref < last_obj) {
last_trailer = m_Syntax.m_FileLen;
}
m_SortedOffset.insert(last_trailer - m_Syntax.m_HeaderOffset);
return m_pTrailer && !m_ObjectInfo.empty();
}
FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
if (!pObject)
return FALSE;
if (m_pDocument) {
FX_BOOL bInserted = FALSE;
CPDF_Dictionary* pDict = m_pDocument->GetRoot();
if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
} else {
if (pObject->IsStream())
pObject->Release();
}
if (!bInserted)
return FALSE;
}
CPDF_Stream* pStream = pObject->AsStream();
if (!pStream)
return FALSE;
*pos = pStream->GetDict()->GetIntegerBy("Prev");
int32_t size = pStream->GetDict()->GetIntegerBy("Size");
if (size < 0) {
pStream->Release();
return FALSE;
}
if (bMainXRef) {
m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
ShrinkObjectMap(size);
for (auto it : m_ObjectInfo)
it.second.type = 0;
} else {
m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
}
std::vector<std::pair<int32_t, int32_t> > arrIndex;
CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index");
if (pArray) {
FX_DWORD nPairSize = pArray->GetCount() / 2;
for (FX_DWORD i = 0; i < nPairSize; i++) {
CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
int nStartNum = pStartNumObj->GetInteger();
int nCount = pCountObj->GetInteger();
if (nStartNum >= 0 && nCount > 0) {
arrIndex.push_back(std::make_pair(nStartNum, nCount));
}
}
}
}
if (arrIndex.size() == 0) {
arrIndex.push_back(std::make_pair(0, size));
}
pArray = pStream->GetDict()->GetArrayBy("W");
if (!pArray) {
pStream->Release();
return FALSE;
}
CFX_DWordArray WidthArray;
FX_SAFE_DWORD dwAccWidth = 0;
for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
WidthArray.Add(pArray->GetIntegerAt(i));
dwAccWidth += WidthArray[i];
}
if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
pStream->Release();
return FALSE;
}
FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
CPDF_StreamAcc acc;
acc.LoadAllData(pStream);
const uint8_t* pData = acc.GetData();
FX_DWORD dwTotalSize = acc.GetSize();
FX_DWORD segindex = 0;
for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
int32_t startnum = arrIndex[i].first;
if (startnum < 0) {
continue;
}
m_dwXrefStartObjNum =
pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
FX_DWORD count =
pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
FX_SAFE_DWORD dwCaculatedSize = segindex;
dwCaculatedSize += count;
dwCaculatedSize *= totalWidth;
if (!dwCaculatedSize.IsValid() ||
dwCaculatedSize.ValueOrDie() > dwTotalSize) {
continue;
}
const uint8_t* segstart = pData + segindex * totalWidth;
FX_SAFE_DWORD dwMaxObjNum = startnum;
dwMaxObjNum += count;
FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) {
continue;
}
for (FX_DWORD j = 0; j < count; j++) {
int32_t type = 1;
const uint8_t* entrystart = segstart + j * totalWidth;
if (WidthArray[0]) {
type = GetVarInt(entrystart, WidthArray[0]);
}
if (GetObjectType(startnum + j) == 255) {
FX_FILESIZE offset =
GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
m_ObjectInfo[startnum + j].pos = offset;
m_SortedOffset.insert(offset);
continue;
}
if (GetObjectType(startnum + j)) {
continue;
}
m_ObjectInfo[startnum + j].type = type;
if (type == 0) {
m_ObjectInfo[startnum + j].pos = 0;
} else {
FX_FILESIZE offset =
GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
m_ObjectInfo[startnum + j].pos = offset;
if (type == 1) {
m_SortedOffset.insert(offset);
} else {
if (offset < 0 || !IsValidObjectNumber(offset)) {
pStream->Release();
return FALSE;
}
m_ObjectInfo[offset].type = 255;
}
}
}
segindex += count;
}
pStream->Release();
return TRUE;
}
CPDF_Array* CPDF_Parser::GetIDArray() {
CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : NULL;
if (!pID)
return nullptr;
if (CPDF_Reference* pRef = pID->AsReference()) {
pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
m_pTrailer->SetAt("ID", pID);
}
return ToArray(pID);
}
FX_DWORD CPDF_Parser::GetRootObjNum() {
CPDF_Reference* pRef =
ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
return pRef ? pRef->GetRefObjNum() : 0;
}
FX_DWORD CPDF_Parser::GetInfoObjNum() {
CPDF_Reference* pRef =
ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
return pRef ? pRef->GetRefObjNum() : 0;
}
FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
bForm = FALSE;
if (!IsValidObjectNumber(objnum))
return TRUE;
if (GetObjectType(objnum) == 0)
return TRUE;
if (GetObjectType(objnum) == 2)
return TRUE;
FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
auto it = m_SortedOffset.find(pos);
if (it == m_SortedOffset.end())
return TRUE;
if (++it == m_SortedOffset.end())
return FALSE;
FX_FILESIZE size = *it - pos;
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
const char kFormStream[] = "/Form\0stream";
const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
m_Syntax.RestorePos(SavedPos);
return TRUE;
}
CPDF_Object* CPDF_Parser::ParseIndirectObject(
CPDF_IndirectObjectHolder* pObjList,
FX_DWORD objnum) {
if (!IsValidObjectNumber(objnum))
return nullptr;
// Prevent circular parsing the same object.
if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
return nullptr;
ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
if (pos <= 0)
return nullptr;
return ParseIndirectObjectAt(pObjList, pos, objnum);
}
if (GetObjectType(objnum) != 2)
return nullptr;
CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
if (!pObjStream)
return nullptr;
ScopedFileStream file(FX_CreateMemoryStream(
(uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
CPDF_SyntaxParser syntax;
syntax.InitParser(file.get(), 0);
const int32_t offset = GetStreamFirst(pObjStream);
// Read object numbers from |pObjStream| into a cache.
if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
FX_DWORD thisnum = syntax.GetDirectNum();
FX_DWORD thisoff = syntax.GetDirectNum();
m_ObjCache[pObjStream][thisnum] = thisoff;
}
}
const auto it = m_ObjCache[pObjStream].find(objnum);
if (it == m_ObjCache[pObjStream].end())
return nullptr;
syntax.RestorePos(offset + it->second);
return syntax.GetObject(pObjList, 0, 0, true);
}
CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
auto it = m_ObjectStreamMap.find(objnum);
if (it != m_ObjectStreamMap.end())
return it->second.get();
if (!m_pDocument)
return nullptr;
const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum));
if (!pStream)
return nullptr;
CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
pStreamAcc->LoadAllData(pStream);
m_ObjectStreamMap[objnum].reset(pStreamAcc);
return pStreamAcc;
}
FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
if (!IsValidObjectNumber(objnum))
return 0;
if (GetObjectType(objnum) == 2)
objnum = GetObjectPositionOrZero(objnum);
if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
return 0;
FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
if (offset == 0)
return 0;
auto it = m_SortedOffset.find(offset);
if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
return 0;
return *it - offset;
}
void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
uint8_t*& pBuffer,
FX_DWORD& size) {
pBuffer = NULL;
size = 0;
if (!IsValidObjectNumber(objnum))
return;
if (GetObjectType(objnum) == 2) {
CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
if (!pObjStream)
return;
int32_t offset = GetStreamFirst(pObjStream);
const uint8_t* pData = pObjStream->GetData();
FX_DWORD totalsize = pObjStream->GetSize();
ScopedFileStream file(
FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
CPDF_SyntaxParser syntax;
syntax.InitParser(file.get(), 0);
for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
FX_DWORD thisnum = syntax.GetDirectNum();
FX_DWORD thisoff = syntax.GetDirectNum();
if (thisnum != objnum)
continue;
if (i == 1) {
size = totalsize - (thisoff + offset);
} else {
syntax.GetDirectNum(); // Skip nextnum.
FX_DWORD nextoff = syntax.GetDirectNum();
size = nextoff - thisoff;
}
pBuffer = FX_Alloc(uint8_t, size);
FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
return;
}
return;
}
if (GetObjectType(objnum) != 1)
return;
FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
if (pos == 0) {
return;
}
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
bool bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return;
}
FX_DWORD parser_objnum = FXSYS_atoi(word);
if (parser_objnum && parser_objnum != objnum) {
m_Syntax.RestorePos(SavedPos);
return;
}
word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return;
}
if (m_Syntax.GetKeyword() != "obj") {
m_Syntax.RestorePos(SavedPos);
return;
}
auto it = m_SortedOffset.find(pos);
if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
m_Syntax.RestorePos(SavedPos);
return;
}
FX_FILESIZE nextoff = *it;
FX_BOOL bNextOffValid = FALSE;
if (nextoff != pos) {
m_Syntax.RestorePos(nextoff);
word = m_Syntax.GetNextWord(&bIsNumber);
if (word == "xref") {
bNextOffValid = TRUE;
} else if (bIsNumber) {
word = m_Syntax.GetNextWord(&bIsNumber);
if (bIsNumber && m_Syntax.GetKeyword() == "obj") {
bNextOffValid = TRUE;
}
}
}
if (!bNextOffValid) {
m_Syntax.RestorePos(pos);
while (1) {
if (m_Syntax.GetKeyword() == "endobj") {
break;
}
if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
break;
}
}
nextoff = m_Syntax.SavePos();
}
size = (FX_DWORD)(nextoff - pos);
pBuffer = FX_Alloc(uint8_t, size);
m_Syntax.RestorePos(pos);
m_Syntax.ReadBlock(pBuffer, size);
m_Syntax.RestorePos(SavedPos);
}
CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
CPDF_IndirectObjectHolder* pObjList,
FX_FILESIZE pos,
FX_DWORD objnum) {
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
bool bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_FILESIZE objOffset = m_Syntax.SavePos();
objOffset -= word.GetLength();
FX_DWORD parser_objnum = FXSYS_atoi(word);
if (objnum && parser_objnum != objnum) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_DWORD parser_gennum = FXSYS_atoi(word);
if (m_Syntax.GetKeyword() != "obj") {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, parser_gennum, true);
m_Syntax.SavePos();
CFX_ByteString bsWord = m_Syntax.GetKeyword();
if (bsWord == "endobj") {
m_Syntax.SavePos();
}
m_Syntax.RestorePos(SavedPos);
if (pObj) {
if (!objnum)
pObj->m_ObjNum = parser_objnum;
pObj->m_GenNum = parser_gennum;
}
return pObj;
}
CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
CPDF_IndirectObjectHolder* pObjList,
FX_FILESIZE pos,
FX_DWORD objnum,
FX_FILESIZE* pResultPos) {
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
bool bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_DWORD parser_objnum = FXSYS_atoi(word);
if (objnum && parser_objnum != objnum) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_DWORD gennum = FXSYS_atoi(word);
if (m_Syntax.GetKeyword() != "obj") {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum);
if (pResultPos) {
*pResultPos = m_Syntax.m_Pos;
}
m_Syntax.RestorePos(SavedPos);
return pObj;
}
CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
if (m_Syntax.GetKeyword() != "trailer")
return nullptr;
std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
m_Syntax.GetObject(m_pDocument, 0, 0, true));
if (!ToDictionary(pObj.get()))
return nullptr;
return pObj.release()->AsDictionary();
}
FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
if (!m_pSecurityHandler) {
return (FX_DWORD)-1;
}
FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {
dwPermission &= 0xFFFFFFFC;
dwPermission |= 0xFFFFF0C0;
if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) {
dwPermission &= 0xFFFFF0FF;
}
}
return dwPermission;
}
FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
FX_DWORD offset) {
m_Syntax.InitParser(pFileAccess, offset);
m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
FX_FILESIZE SavedPos = m_Syntax.SavePos();
bool bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
return FALSE;
}
FX_DWORD objnum = FXSYS_atoi(word);
word = m_Syntax.GetNextWord(&bIsNumber);
if (!bIsNumber) {
return FALSE;
}
FX_DWORD gennum = FXSYS_atoi(word);
if (m_Syntax.GetKeyword() != "obj") {
m_Syntax.RestorePos(SavedPos);
return FALSE;
}
m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, true);
if (!m_pLinearized) {
return FALSE;
}
CPDF_Dictionary* pDict = m_pLinearized->GetDict();
if (pDict && pDict->GetElement("Linearized")) {
m_Syntax.GetNextWord(nullptr);
CPDF_Object* pLen = pDict->GetElement("L");
if (!pLen) {
m_pLinearized->Release();
m_pLinearized = NULL;
return FALSE;
}
if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
return FALSE;
}
if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
m_dwFirstPageNo = pNo->GetInteger();
if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
m_LastXRefOffset = pTable->GetInteger();
return TRUE;
}
m_pLinearized->Release();
m_pLinearized = NULL;
return FALSE;
}
CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
CloseParser();
m_bXRefStream = FALSE;
m_LastXRefOffset = 0;
m_bOwnFileRead = true;
int32_t offset = GetHeaderOffset(pFileAccess);
if (offset == -1) {
return FORMAT_ERROR;
}
if (!IsLinearizedFile(pFileAccess, offset)) {
m_Syntax.m_pFileAccess = nullptr;
return StartParse(pFileAccess);
}
m_pDocument = new CPDF_Document(this);
FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
FX_BOOL bXRefRebuilt = FALSE;
FX_BOOL bLoadV4 = FALSE;
if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
!LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
if (!RebuildCrossRef()) {
return FORMAT_ERROR;
}
bXRefRebuilt = TRUE;
m_LastXRefOffset = 0;
}
if (bLoadV4) {
m_pTrailer = LoadTrailerV4();
if (!m_pTrailer) {
return SUCCESS;
}
int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
if (xrefsize > 0)
ShrinkObjectMap(xrefsize);
}
Error eRet = SetEncryptHandler();
if (eRet != SUCCESS) {
return eRet;
}
m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
if (bXRefRebuilt) {
return FORMAT_ERROR;
}
ReleaseEncryptHandler();
if (!RebuildCrossRef()) {
return FORMAT_ERROR;
}
eRet = SetEncryptHandler();
if (eRet != SUCCESS) {
return eRet;
}
m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
if (!m_pDocument->GetRoot()) {
return FORMAT_ERROR;
}
}
if (GetRootObjNum() == 0) {
ReleaseEncryptHandler();
if (!RebuildCrossRef() || GetRootObjNum() == 0)
return FORMAT_ERROR;
eRet = SetEncryptHandler();
if (eRet != SUCCESS) {
return eRet;
}
}
if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
if (CPDF_Reference* pMetadata =
ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
}
return SUCCESS;
}
FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
if (!LoadCrossRefV5(&xrefpos, FALSE)) {
return FALSE;
}
std::set<FX_FILESIZE> seen_xrefpos;
while (xrefpos) {
seen_xrefpos.insert(xrefpos);
if (!LoadCrossRefV5(&xrefpos, FALSE)) {
return FALSE;
}
// Check for circular references.
if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) {
return FALSE;
}
}
m_ObjectStreamMap.clear();
m_bXRefStream = TRUE;
return TRUE;
}
CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
m_Syntax.m_MetadataObjnum = 0;
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = NULL;
}
m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
uint8_t ch = 0;
FX_DWORD dwCount = 0;
m_Syntax.GetNextChar(ch);
while (PDFCharIsWhitespace(ch)) {
++dwCount;
if (m_Syntax.m_FileLen >=
(FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
break;
}
m_Syntax.GetNextChar(ch);
}
m_LastXRefOffset += dwCount;
m_ObjectStreamMap.clear();
m_ObjCache.clear();
if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
!LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
m_LastXRefOffset = 0;
m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
return FORMAT_ERROR;
}
m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
return SUCCESS;
}
// static
int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
CPDF_SyntaxParser::CPDF_SyntaxParser() {
m_pFileAccess = NULL;
m_pFileBuf = NULL;
m_BufSize = CPDF_ModuleMgr::kFileBufSize;
m_pFileBuf = NULL;
m_MetadataObjnum = 0;
}
CPDF_SyntaxParser::~CPDF_SyntaxParser() {
FX_Free(m_pFileBuf);
}
FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
m_Pos = pos;
return GetNextChar(ch);
}
FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
FX_FILESIZE pos = m_Pos + m_HeaderOffset;
if (pos >= m_FileLen) {
return FALSE;
}
if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
FX_FILESIZE read_pos = pos;
FX_DWORD read_size = m_BufSize;
if ((FX_FILESIZE)read_size > m_FileLen) {
read_size = (FX_DWORD)m_FileLen;
}
if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
if (m_FileLen < (FX_FILESIZE)read_size) {
read_pos = 0;
read_size = (FX_DWORD)m_FileLen;
} else {
read_pos = m_FileLen - read_size;
}
}
if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
return FALSE;
}
m_BufOffset = read_pos;
}
ch = m_pFileBuf[pos - m_BufOffset];
m_Pos++;
return TRUE;
}
FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
pos += m_HeaderOffset;
if (pos >= m_FileLen) {
return FALSE;
}
if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
FX_FILESIZE read_pos;
if (pos < (FX_FILESIZE)m_BufSize) {
read_pos = 0;
} else {
read_pos = pos - m_BufSize + 1;
}
FX_DWORD read_size = m_BufSize;
if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
if (m_FileLen < (FX_FILESIZE)read_size) {
read_pos = 0;
read_size = (FX_DWORD)m_FileLen;
} else {
read_pos = m_FileLen - read_size;
}
}
if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
return FALSE;
}
m_BufOffset = read_pos;
}
ch = m_pFileBuf[pos - m_BufOffset];
return TRUE;
}
FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
return FALSE;
}
m_Pos += size;
return TRUE;
}
void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
m_WordSize = 0;
if (bIsNumber)
*bIsNumber = true;
uint8_t ch;
if (!GetNextChar(ch)) {
return;
}
while (1) {
while (PDFCharIsWhitespace(ch)) {
if (!GetNextChar(ch))
return;
}
if (ch != '%')
break;
while (1) {
if (!GetNextChar(ch))
return;
if (PDFCharIsLineEnding(ch))
break;
}
}
if (PDFCharIsDelimiter(ch)) {
if (bIsNumber)
*bIsNumber = false;
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (1) {
if (!GetNextChar(ch))
return;
if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_Pos--;
return;
}
if (m_WordSize < sizeof(m_WordBuffer) - 1)
m_WordBuffer[m_WordSize++] = ch;
}
} else if (ch == '<') {
if (!GetNextChar(ch))
return;
if (ch == '<')
m_WordBuffer[m_WordSize++] = ch;
else
m_Pos--;
} else if (ch == '>') {
if (!GetNextChar(ch))
return;
if (ch == '>')
m_WordBuffer[m_WordSize++] = ch;
else
m_Pos--;
}
return;
}
while (1) {
if (m_WordSize < sizeof(m_WordBuffer) - 1)
m_WordBuffer[m_WordSize++] = ch;
if (!PDFCharIsNumeric(ch))
if (bIsNumber)
*bIsNumber = false;
if (!GetNextChar(ch))
return;
if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
}
CFX_ByteString CPDF_SyntaxParser::ReadString() {
uint8_t ch;
if (!GetNextChar(ch)) {
return CFX_ByteString();
}
CFX_ByteTextBuf buf;
int32_t parlevel = 0;
int32_t status = 0, iEscCode = 0;
while (1) {
switch (status) {
case 0:
if (ch == ')') {
if (parlevel == 0) {
return buf.GetByteString();
}
parlevel--;
buf.AppendChar(')');
} else if (ch == '(') {
parlevel++;
buf.AppendChar('(');
} else if (ch == '\\') {
status = 1;
} else {
buf.AppendChar(ch);
}
break;
case 1:
if (ch >= '0' && ch <= '7') {
iEscCode = FXSYS_toDecimalDigit(ch);
status = 2;
break;
}
if (ch == 'n') {
buf.AppendChar('\n');
} else if (ch == 'r') {
buf.AppendChar('\r');
} else if (ch == 't') {
buf.AppendChar('\t');
} else if (ch == 'b') {
buf.AppendChar('\b');
} else if (ch == 'f') {
buf.AppendChar('\f');
} else if (ch == '\r') {
status = 4;
break;
} else if (ch == '\n') {
} else {
buf.AppendChar(ch);
}
status = 0;
break;
case 2:
if (ch >= '0' && ch <= '7') {
iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
status = 3;
} else {
buf.AppendChar(iEscCode);
status = 0;
continue;
}
break;
case 3:
if (ch >= '0' && ch <= '7') {
iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
buf.AppendChar(iEscCode);
status = 0;
} else {
buf.AppendChar(iEscCode);
status = 0;
continue;
}
break;
case 4:
status = 0;
if (ch != '\n') {
continue;
}
break;
}
if (!GetNextChar(ch)) {
break;
}
}
GetNextChar(ch);
return buf.GetByteString();
}
CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
uint8_t ch;
if (!GetNextChar(ch))
return CFX_ByteString();
CFX_BinaryBuf buf;
bool bFirst = true;
uint8_t code = 0;
while (1) {
if (ch == '>')
break;
if (std::isxdigit(ch)) {
int val = FXSYS_toHexDigit(ch);
if (bFirst) {
code = val * 16;
} else {
code += val;
buf.AppendByte((uint8_t)code);
}
bFirst = !bFirst;
}
if (!GetNextChar(ch))
break;
}
if (!bFirst)
buf.AppendByte((uint8_t)code);
return buf.GetByteString();
}
void CPDF_SyntaxParser::ToNextLine() {
uint8_t ch;
while (GetNextChar(ch)) {
if (ch == '\n') {
break;
}
if (ch == '\r') {
GetNextChar(ch);
if (ch != '\n') {
--m_Pos;
}
break;
}
}
}
void CPDF_SyntaxParser::ToNextWord() {
uint8_t ch;
if (!GetNextChar(ch))
return;
while (1) {
while (PDFCharIsWhitespace(ch)) {
if (!GetNextChar(ch))
return;
}
if (ch != '%')
break;
while (1) {
if (!GetNextChar(ch))
return;
if (PDFCharIsLineEnding(ch))
break;
}
}
m_Pos--;
}
CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
GetNextWordInternal(bIsNumber);
return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
}
CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
return GetNextWord(nullptr);
}
CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
FX_DWORD objnum,
FX_DWORD gennum,
FX_BOOL bDecrypt) {
CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
return nullptr;
}
FX_FILESIZE SavedPos = m_Pos;
bool bIsNumber;
CFX_ByteString word = GetNextWord(&bIsNumber);
if (word.GetLength() == 0) {
return nullptr;
}
if (bIsNumber) {
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString nextword = GetNextWord(&bIsNumber);
if (bIsNumber) {
CFX_ByteString nextword2 = GetNextWord(nullptr);
if (nextword2 == "R") {
FX_DWORD objnum = FXSYS_atoi(word);
return new CPDF_Reference(pObjList, objnum);
}
}
m_Pos = SavedPos;
return new CPDF_Number(word);
}
if (word == "true" || word == "false") {
return new CPDF_Boolean(word == "true");
}
if (word == "null") {
return new CPDF_Null;
}
if (word == "(") {
CFX_ByteString str = ReadString();
if (m_pCryptoHandler && bDecrypt) {
m_pCryptoHandler->Decrypt(objnum, gennum, str);
}
return new CPDF_String(str, FALSE);
}
if (word == "<") {
CFX_ByteString str = ReadHexString();
if (m_pCryptoHandler && bDecrypt) {
m_pCryptoHandler->Decrypt(objnum, gennum, str);
}
return new CPDF_String(str, TRUE);
}
if (word == "[") {
CPDF_Array* pArray = new CPDF_Array;
while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) {
pArray->Add(pObj);
}
return pArray;
}
if (word[0] == '/') {
return new CPDF_Name(
PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
}
if (word == "<<") {
int32_t nKeys = 0;
FX_FILESIZE dwSignValuePos = 0;
std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
new CPDF_Dictionary);
while (1) {
CFX_ByteString key = GetNextWord(nullptr);
if (key.IsEmpty())
return nullptr;
FX_FILESIZE SavedPos = m_Pos - key.GetLength();
if (key == ">>")
break;
if (key == "endobj") {
m_Pos = SavedPos;
break;
}
if (key[0] != '/')
continue;
++nKeys;
key = PDF_NameDecode(key);
if (key.IsEmpty())
continue;
if (key == "/Contents")
dwSignValuePos = m_Pos;
CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
if (!pObj)
continue;
CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
pDict->SetAt(keyNoSlash, pObj);
}
// Only when this is a signature dictionary and has contents, we reset the
// contents to the un-decrypted form.
if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
m_Pos = dwSignValuePos;
pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
}
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString nextword = GetNextWord(nullptr);
if (nextword != "stream") {
m_Pos = SavedPos;
return pDict.release();
}
return ReadStream(pDict.release(), objnum, gennum);
}
if (word == ">>") {
m_Pos = SavedPos;
}
return nullptr;
}
CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
CPDF_IndirectObjectHolder* pObjList,
FX_DWORD objnum,
FX_DWORD gennum) {
CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
return nullptr;
}
FX_FILESIZE SavedPos = m_Pos;
bool bIsNumber;
CFX_ByteString word = GetNextWord(&bIsNumber);
if (word.GetLength() == 0) {
return nullptr;
}
if (bIsNumber) {
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString nextword = GetNextWord(&bIsNumber);
if (bIsNumber) {
CFX_ByteString nextword2 = GetNextWord(nullptr);
if (nextword2 == "R") {
return new CPDF_Reference(pObjList, FXSYS_atoi(word));
}
}
m_Pos = SavedPos;
return new CPDF_Number(word);
}
if (word == "true" || word == "false") {
return new CPDF_Boolean(word == "true");
}
if (word == "null") {
return new CPDF_Null;
}
if (word == "(") {
CFX_ByteString str = ReadString();
if (m_pCryptoHandler)
m_pCryptoHandler->Decrypt(objnum, gennum, str);
return new CPDF_String(str, FALSE);
}
if (word == "<") {
CFX_ByteString str = ReadHexString();
if (m_pCryptoHandler)
m_pCryptoHandler->Decrypt(objnum, gennum, str);
return new CPDF_String(str, TRUE);
}
if (word == "[") {
std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
new CPDF_Array);
while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) {
pArray->Add(pObj);
}
return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
}
if (word[0] == '/') {
return new CPDF_Name(
PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
}
if (word == "<<") {
std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
new CPDF_Dictionary);
while (1) {
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString key = GetNextWord(nullptr);
if (key.IsEmpty())
return nullptr;
if (key == ">>")
break;
if (key == "endobj") {
m_Pos = SavedPos;
break;
}
if (key[0] != '/')
continue;
key = PDF_NameDecode(key);
std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
GetObject(pObjList, objnum, gennum, true));
if (!obj) {
uint8_t ch;
while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
}
return nullptr;
}
if (key.GetLength() > 1) {
pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
obj.release());
}
}
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString nextword = GetNextWord(nullptr);
if (nextword != "stream") {
m_Pos = SavedPos;
return pDict.release();
}
return ReadStream(pDict.release(), objnum, gennum);
}
if (word == ">>") {
m_Pos = SavedPos;
}
return nullptr;
}
unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
unsigned char byte1 = 0;
unsigned char byte2 = 0;
GetCharAt(pos, byte1);
GetCharAt(pos + 1, byte2);
unsigned int markers = 0;
if (byte1 == '\r' && byte2 == '\n') {
markers = 2;
} else if (byte1 == '\r' || byte1 == '\n') {
markers = 1;
}
return markers;
}
CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
FX_DWORD objnum,
FX_DWORD gennum) {
CPDF_Object* pLenObj = pDict->GetElement("Length");
FX_FILESIZE len = -1;
CPDF_Reference* pLenObjRef = ToReference(pLenObj);
bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
pLenObjRef->GetRefObjNum() != objnum);
if (pLenObj && differingObjNum)
len = pLenObj->GetInteger();
// Locate the start of stream.
ToNextLine();
FX_FILESIZE streamStartPos = m_Pos;
const CFX_ByteStringC kEndStreamStr("endstream");
const CFX_ByteStringC kEndObjStr("endobj");
CPDF_CryptoHandler* pCryptoHandler =
objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
if (!pCryptoHandler) {
FX_BOOL bSearchForKeyword = TRUE;
if (len >= 0) {
pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
pos += len;
if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
m_Pos = pos.ValueOrDie();
}
m_Pos += ReadEOLMarkers(m_Pos);
FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
GetNextWordInternal(nullptr);
// Earlier version of PDF specification doesn't require EOL marker before
// 'endstream' keyword. If keyword 'endstream' follows the bytes in
// specified length, it signals the end of stream.
if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
kEndStreamStr.GetLength()) == 0) {
bSearchForKeyword = FALSE;
}
}
if (bSearchForKeyword) {
// If len is not available, len needs to be calculated
// by searching the keywords "endstream" or "endobj".
m_Pos = streamStartPos;
FX_FILESIZE endStreamOffset = 0;
while (endStreamOffset >= 0) {
endStreamOffset = FindTag(kEndStreamStr, 0);
if (endStreamOffset < 0) {
// Can't find any "endstream".
break;
}
if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
kEndStreamStr, TRUE)) {
// Stop searching when the keyword "endstream" is found.
endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
break;
}
}
m_Pos = streamStartPos;
FX_FILESIZE endObjOffset = 0;
while (endObjOffset >= 0) {
endObjOffset = FindTag(kEndObjStr, 0);
if (endObjOffset < 0) {
// Can't find any "endobj".
break;
}
if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
TRUE)) {
// Stop searching when the keyword "endobj" is found.
endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
break;
}
}
if (endStreamOffset < 0 && endObjOffset < 0) {
// Can't find "endstream" or "endobj".
pDict->Release();
return nullptr;
}
if (endStreamOffset < 0 && endObjOffset >= 0) {
// Correct the position of end stream.
endStreamOffset = endObjOffset;
} else if (endStreamOffset >= 0 && endObjOffset < 0) {
// Correct the position of end obj.
endObjOffset = endStreamOffset;
} else if (endStreamOffset > endObjOffset) {
endStreamOffset = endObjOffset;
}
len = endStreamOffset;
int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
if (numMarkers == 2) {
len -= 2;
} else {
numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
if (numMarkers == 1) {
len -= 1;
}
}
if (len < 0) {
pDict->Release();
return nullptr;
}
pDict->SetAtInteger("Length", len);
}
m_Pos = streamStartPos;
}
if (len < 0) {
pDict->Release();
return nullptr;
}
uint8_t* pData = nullptr;
if (len > 0) {
pData = FX_Alloc(uint8_t, len);
ReadBlock(pData, len);
if (pCryptoHandler) {
CFX_BinaryBuf dest_buf;
dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
void* context = pCryptoHandler->DecryptStart(objnum, gennum);
pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
pCryptoHandler->DecryptFinish(context, dest_buf);
FX_Free(pData);
pData = dest_buf.GetBuffer();
len = dest_buf.GetSize();
dest_buf.DetachBuffer();
}
}
CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
streamStartPos = m_Pos;
FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
GetNextWordInternal(nullptr);
int numMarkers = ReadEOLMarkers(m_Pos);
if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
0) {
m_Pos = streamStartPos;
}
return pStream;
}
void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
FX_DWORD HeaderOffset) {
FX_Free(m_pFileBuf);
m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
m_HeaderOffset = HeaderOffset;
m_FileLen = pFileAccess->GetSize();
m_Pos = 0;
m_pFileAccess = pFileAccess;
m_BufOffset = 0;
pFileAccess->ReadBlock(
m_pFileBuf, 0,
(size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
}
int32_t CPDF_SyntaxParser::GetDirectNum() {
bool bIsNumber;
GetNextWordInternal(&bIsNumber);
if (!bIsNumber)
return 0;
m_WordBuffer[m_WordSize] = 0;
return FXSYS_atoi(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
}
bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
const CFX_ByteStringC& tag,
FX_BOOL checkKeyword) {
const FX_DWORD taglen = tag.GetLength();
bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
!PDFCharIsWhitespace(tag[taglen - 1]);
uint8_t ch;
if (bCheckRight && startpos + (int32_t)taglen <= limit &&
GetCharAt(startpos + (int32_t)taglen, ch)) {
if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
(checkKeyword && PDFCharIsDelimiter(ch))) {
return false;
}
}
if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
(checkKeyword && PDFCharIsDelimiter(ch))) {
return false;
}
}
return true;
}
FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
FX_BOOL bWholeWord,
FX_BOOL bForward,
FX_FILESIZE limit) {
int32_t taglen = tag.GetLength();
if (taglen == 0) {
return FALSE;
}
FX_FILESIZE pos = m_Pos;
int32_t offset = 0;
if (!bForward) {
offset = taglen - 1;
}
const uint8_t* tag_data = tag.GetPtr();
uint8_t byte;
while (1) {
if (bForward) {
if (limit) {
if (pos >= m_Pos + limit) {
return FALSE;
}
}
if (!GetCharAt(pos, byte)) {
return FALSE;
}
} else {
if (limit) {
if (pos <= m_Pos - limit) {
return FALSE;
}
}
if (!GetCharAtBackward(pos, byte)) {
return FALSE;
}
}
if (byte == tag_data[offset]) {
if (bForward) {
offset++;
if (offset < taglen) {
pos++;
continue;
}
} else {
offset--;
if (offset >= 0) {
pos--;
continue;
}
}
FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
m_Pos = startpos;
return TRUE;
}
}
if (bForward) {
offset = byte == tag_data[0] ? 1 : 0;
pos++;
} else {
offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
pos--;
}
if (pos < 0) {
return FALSE;
}
}
return FALSE;
}
int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
FX_BOOL bWholeWord,
FX_FILESIZE limit) {
int32_t ntags = 1;
for (int i = 0; i < tags.GetLength(); ++i) {
if (tags[i] == 0) {
++ntags;
}
}
std::vector<SearchTagRecord> patterns(ntags);
FX_DWORD start = 0;
FX_DWORD itag = 0;
FX_DWORD max_len = 0;
for (int i = 0; i <= tags.GetLength(); ++i) {
if (tags[i] == 0) {
FX_DWORD len = i - start;
max_len = std::max(len, max_len);
patterns[itag].m_pTag = tags.GetCStr() + start;
patterns[itag].m_Len = len;
patterns[itag].m_Offset = 0;
start = i + 1;
++itag;
}
}
const FX_FILESIZE pos_limit = m_Pos + limit;
for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
uint8_t byte;
if (!GetCharAt(pos, byte))
break;
for (int i = 0; i < ntags; ++i) {
SearchTagRecord& pat = patterns[i];
if (pat.m_pTag[pat.m_Offset] != byte) {
pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
continue;
}
++pat.m_Offset;
if (pat.m_Offset != pat.m_Len)
continue;
if (!bWholeWord ||
IsWholeWord(pos - pat.m_Len, limit,
CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
return i;
}
pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
}
}
return -1;
}
FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
FX_FILESIZE limit) {
int32_t taglen = tag.GetLength();
int32_t match = 0;
limit += m_Pos;
FX_FILESIZE startpos = m_Pos;
while (1) {
uint8_t ch;
if (!GetNextChar(ch)) {
return -1;
}
if (ch == tag[match]) {
match++;
if (match == taglen) {
return m_Pos - startpos - taglen;
}
} else {
match = ch == tag[0] ? 1 : 0;
}
if (limit && m_Pos == limit) {
return -1;
}
}
return -1;
}
void CPDF_SyntaxParser::SetEncrypt(
std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
m_pCryptoHandler = std::move(pCryptoHandler);
}
class CPDF_DataAvail final : public IPDF_DataAvail {
public:
CPDF_DataAvail(IFX_FileAvail* pFileAvail,
IFX_FileRead* pFileRead,
FX_BOOL bSupportHintTable);
~CPDF_DataAvail() override;
// IPDF_DataAvail:
DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;
void SetDocument(CPDF_Document* pDoc) override;
DocAvailStatus IsPageAvail(int iPage, IFX_DownloadHints* pHints) override;
DocFormStatus IsFormAvail(IFX_DownloadHints* pHints) override;
DocLinearizationStatus IsLinearizedPDF() override;
FX_BOOL IsLinearized() override { return m_bLinearized; }
void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) override;
int GetPageCount() const;
CPDF_Dictionary* GetPage(int index);