blob: 97e0d6f5e36a596a660772e5a988518f3bbb2acd [file] [log] [blame]
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "../../../include/fpdfapi/fpdf_parser.h"
#include "../../../include/fpdfapi/fpdf_module.h"
#include "../../../include/fpdfapi/fpdf_page.h"
#include "../../../../third_party/numerics/safe_math.h"
#include "../fpdf_page/pageint.h"
#include <utility>
#include <vector>
#define _PARSER_OBJECT_LEVLE_ 64
extern const FX_LPCSTR _PDF_CharType;
FX_BOOL IsSignatureDict(const CPDF_Dictionary* pDict)
{
CPDF_Object* pType = pDict->GetElementValue(FX_BSTRC("Type"));
if (!pType) {
pType = pDict->GetElementValue(FX_BSTRC("FT"));
if (!pType) {
return FALSE;
}
}
if (pType->GetString() == FX_BSTRC("Sig")) {
return TRUE;
}
return FALSE;
}
static FX_INT32 _CompareDWord(const void* p1, const void* p2)
{
return (*(FX_DWORD*)p1) - (*(FX_DWORD*)p2);
}
static int _CompareFileSize(const void* p1, const void* p2)
{
FX_FILESIZE ret = (*(FX_FILESIZE*)p1) - (*(FX_FILESIZE*)p2);
if (ret > 0) {
return 1;
}
if (ret < 0) {
return -1;
}
return 0;
}
CPDF_Parser::CPDF_Parser()
{
m_pDocument = NULL;
m_pTrailer = NULL;
m_pEncryptDict = NULL;
m_pSecurityHandler = NULL;
m_pLinearized = NULL;
m_dwFirstPageNo = 0;
m_dwXrefStartObjNum = 0;
m_bOwnFileRead = TRUE;
m_FileVersion = 0;
m_bForceUseSecurityHandler = FALSE;
}
CPDF_Parser::~CPDF_Parser()
{
CloseParser(FALSE);
}
FX_DWORD CPDF_Parser::GetLastObjNum()
{
FX_DWORD dwSize = m_CrossRef.GetSize();
return dwSize ? dwSize - 1 : 0;
}
void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict)
{
m_pEncryptDict = pDict;
}
void CPDF_Parser::CloseParser(FX_BOOL bReParse)
{
m_bVersionUpdated = FALSE;
if (m_pDocument && !bReParse) {
delete m_pDocument;
m_pDocument = NULL;
}
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = NULL;
}
ReleaseEncryptHandler();
SetEncryptDictionary(NULL);
if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {
m_Syntax.m_pFileAccess->Release();
m_Syntax.m_pFileAccess = NULL;
}
FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
while (pos) {
FX_LPVOID objnum;
CPDF_StreamAcc* pStream;
m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
delete pStream;
}
m_ObjectStreamMap.RemoveAll();
m_SortedOffset.RemoveAll();
m_CrossRef.RemoveAll();
m_V5Type.RemoveAll();
m_ObjVersion.RemoveAll();
FX_INT32 iLen = m_Trailers.GetSize();
for (FX_INT32 i = 0; i < iLen; ++i) {
if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
trailer->Release();
}
m_Trailers.RemoveAll();
if (m_pLinearized) {
m_pLinearized->Release();
m_pLinearized = NULL;
}
}
static FX_INT32 GetHeaderOffset(IFX_FileRead* pFile)
{
FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
FX_BYTE buf[4];
FX_INT32 offset = 0;
while (1) {
if (!pFile->ReadBlock(buf, offset, 4)) {
return -1;
}
if (*(FX_DWORD*)buf == tag) {
return offset;
}
offset ++;
if (offset > 1024) {
return -1;
}
}
return -1;
}
FX_DWORD CPDF_Parser::StartParse(FX_LPCSTR filename, FX_BOOL bReParse)
{
IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
if (!pFileAccess) {
return PDFPARSE_ERROR_FILE;
}
return StartParse(pFileAccess, bReParse);
}
FX_DWORD CPDF_Parser::StartParse(FX_LPCWSTR filename, FX_BOOL bReParse)
{
IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
if (!pFileAccess) {
return PDFPARSE_ERROR_FILE;
}
return StartParse(pFileAccess, bReParse);
}
CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler();
CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*);
FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
{
CloseParser(bReParse);
m_bXRefStream = FALSE;
m_LastXRefOffset = 0;
m_bOwnFileRead = bOwnFileRead;
FX_INT32 offset = GetHeaderOffset(pFileAccess);
if (offset == -1) {
if (bOwnFileRead && pFileAccess) {
pFileAccess->Release();
}
return PDFPARSE_ERROR_FORMAT;
}
m_Syntax.InitParser(pFileAccess, offset);
FX_BYTE ch;
if (!m_Syntax.GetCharAt(5, ch)) {
return PDFPARSE_ERROR_FORMAT;
}
if (ch >= '0' && ch <= '9') {
m_FileVersion = (ch - '0') * 10;
}
if (!m_Syntax.GetCharAt(7, ch)) {
return PDFPARSE_ERROR_FORMAT;
}
if (ch >= '0' && ch <= '9') {
m_FileVersion += ch - '0';
}
if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9) {
return PDFPARSE_ERROR_FORMAT;
}
m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
if (!bReParse) {
m_pDocument = FX_NEW CPDF_Document(this);
}
FX_BOOL bXRefRebuilt = FALSE;
if (m_Syntax.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, 4096)) {
FX_FILESIZE startxref_offset = m_Syntax.SavePos();
FX_LPVOID pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(startxref_offset);
}
m_Syntax.GetKeyword();
FX_BOOL bNumber;
CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(bNumber);
if (!bNumber) {
return PDFPARSE_ERROR_FORMAT;
}
m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
if (!LoadAllCrossRefV4(m_LastXRefOffset) && !LoadAllCrossRefV5(m_LastXRefOffset)) {
if (!RebuildCrossRef()) {
return PDFPARSE_ERROR_FORMAT;
}
bXRefRebuilt = TRUE;
m_LastXRefOffset = 0;
}
} else {
if (!RebuildCrossRef()) {
return PDFPARSE_ERROR_FORMAT;
}
bXRefRebuilt = TRUE;
}
FX_DWORD dwRet = SetEncryptHandler();
if (dwRet != PDFPARSE_ERROR_SUCCESS) {
return dwRet;
}
m_pDocument->LoadDoc();
if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
if (bXRefRebuilt) {
return PDFPARSE_ERROR_FORMAT;
}
ReleaseEncryptHandler();
if (!RebuildCrossRef()) {
return PDFPARSE_ERROR_FORMAT;
}
dwRet = SetEncryptHandler();
if (dwRet != PDFPARSE_ERROR_SUCCESS) {
return dwRet;
}
m_pDocument->LoadDoc();
if (m_pDocument->GetRoot() == NULL) {
return PDFPARSE_ERROR_FORMAT;
}
}
FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
FX_DWORD RootObjNum = GetRootObjNum();
if (RootObjNum == 0) {
ReleaseEncryptHandler();
RebuildCrossRef();
RootObjNum = GetRootObjNum();
if (RootObjNum == 0) {
return PDFPARSE_ERROR_FORMAT;
}
dwRet = SetEncryptHandler();
if (dwRet != PDFPARSE_ERROR_SUCCESS) {
return dwRet;
}
}
if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
CPDF_Reference* pMetadata = (CPDF_Reference*)m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
}
}
return PDFPARSE_ERROR_SUCCESS;
}
FX_DWORD CPDF_Parser::SetEncryptHandler()
{
ReleaseEncryptHandler();
SetEncryptDictionary(NULL);
if (m_pTrailer == NULL) {
return PDFPARSE_ERROR_FORMAT;
}
CPDF_Object* pEncryptObj = m_pTrailer->GetElement(FX_BSTRC("Encrypt"));
if (pEncryptObj) {
if (pEncryptObj->GetType() == PDFOBJ_DICTIONARY) {
SetEncryptDictionary((CPDF_Dictionary*)pEncryptObj);
} else if (pEncryptObj->GetType() == PDFOBJ_REFERENCE) {
pEncryptObj = m_pDocument->GetIndirectObject(((CPDF_Reference*)pEncryptObj)->GetRefObjNum());
if (pEncryptObj) {
SetEncryptDictionary(pEncryptObj->GetDict());
}
}
}
if (m_bForceUseSecurityHandler) {
FX_DWORD err = PDFPARSE_ERROR_HANDLER;
if (m_pSecurityHandler == NULL) {
return PDFPARSE_ERROR_HANDLER;
}
if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) {
return err;
}
CPDF_CryptoHandler* pCryptoHandler = m_pSecurityHandler->CreateCryptoHandler();
if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
delete pCryptoHandler;
pCryptoHandler = NULL;
return PDFPARSE_ERROR_HANDLER;
}
m_Syntax.SetEncrypt(pCryptoHandler);
} else if (m_pEncryptDict) {
CFX_ByteString filter = m_pEncryptDict->GetString(FX_BSTRC("Filter"));
CPDF_SecurityHandler* pSecurityHandler = NULL;
FX_DWORD err = PDFPARSE_ERROR_HANDLER;
if (filter == FX_BSTRC("Standard")) {
pSecurityHandler = FPDF_CreateStandardSecurityHandler();
err = PDFPARSE_ERROR_PASSWORD;
}
if (pSecurityHandler == NULL) {
return PDFPARSE_ERROR_HANDLER;
}
if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
delete pSecurityHandler;
pSecurityHandler = NULL;
return err;
}
m_pSecurityHandler = pSecurityHandler;
CPDF_CryptoHandler* pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
delete pCryptoHandler;
pCryptoHandler = NULL;
return PDFPARSE_ERROR_HANDLER;
}
m_Syntax.SetEncrypt(pCryptoHandler);
}
return PDFPARSE_ERROR_SUCCESS;
}
void CPDF_Parser::ReleaseEncryptHandler()
{
if (m_Syntax.m_pCryptoHandler) {
delete m_Syntax.m_pCryptoHandler;
m_Syntax.m_pCryptoHandler = NULL;
}
if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
delete m_pSecurityHandler;
m_pSecurityHandler = NULL;
}
}
FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum)
{
if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
return 0;
}
if (m_V5Type[objnum] == 1) {
return m_CrossRef[objnum];
}
if (m_V5Type[objnum] == 2) {
return m_CrossRef[(FX_INT32)m_CrossRef[objnum]];
}
return 0;
}
static FX_INT32 GetDirectInteger(CPDF_Dictionary* pDict, FX_BSTR key)
{
CPDF_Object* pObj = pDict->GetElement(key);
if (pObj == NULL) {
return 0;
}
if (pObj->GetType() == PDFOBJ_NUMBER) {
return ((CPDF_Number*)pObj)->GetInteger();
}
return 0;
}
static FX_BOOL CheckDirectType(CPDF_Dictionary* pDict, FX_BSTR key, FX_INT32 iType)
{
CPDF_Object* pObj = pDict->GetElement(key);
if (!pObj) {
return TRUE;
}
return pObj->GetType() == iType;
}
FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos)
{
if (!LoadCrossRefV4(xrefpos, 0, TRUE, FALSE)) {
return FALSE;
}
m_pTrailer = LoadTrailerV4();
if (m_pTrailer == NULL) {
return FALSE;
}
FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
if (xrefsize <= 0 || xrefsize > (1 << 20)) {
return FALSE;
}
m_CrossRef.SetSize(xrefsize);
m_V5Type.SetSize(xrefsize);
CFX_FileSizeArray CrossRefList, XRefStreamList;
CrossRefList.Add(xrefpos);
XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
if (!CheckDirectType(m_pTrailer, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
return FALSE;
}
FX_FILESIZE newxrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
if (newxrefpos == xrefpos) {
return FALSE;
}
xrefpos = newxrefpos;
while (xrefpos) {
CrossRefList.InsertAt(0, xrefpos);
LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
CPDF_Dictionary* pDict = LoadTrailerV4();
if (pDict == NULL) {
return FALSE;
}
if (!CheckDirectType(pDict, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
pDict->Release();
return FALSE;
}
newxrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
if (newxrefpos == xrefpos) {
pDict->Release();
return FALSE;
}
xrefpos = newxrefpos;
XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
m_Trailers.Add(pDict);
}
for (FX_INT32 i = 0; i < CrossRefList.GetSize(); i ++)
if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
return FALSE;
}
return TRUE;
}
FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, FX_DWORD dwObjCount)
{
if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
return FALSE;
}
m_pTrailer = LoadTrailerV4();
if (m_pTrailer == NULL) {
return FALSE;
}
FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
if (xrefsize == 0) {
return FALSE;
}
CFX_FileSizeArray CrossRefList, XRefStreamList;
CrossRefList.Add(xrefpos);
XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
xrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
while (xrefpos) {
CrossRefList.InsertAt(0, xrefpos);
LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
CPDF_Dictionary* pDict = LoadTrailerV4();
if (pDict == NULL) {
return FALSE;
}
xrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
m_Trailers.Add(pDict);
}
for (FX_INT32 i = 1; i < CrossRefList.GetSize(); i ++)
if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
return FALSE;
}
return TRUE;
}
FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount)
{
FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
m_Syntax.RestorePos(dwStartPos);
FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(pos);
}
FX_DWORD start_objnum = 0;
FX_DWORD count = dwObjCount;
FX_FILESIZE SavedPos = m_Syntax.SavePos();
FX_INT32 recordsize = 20;
char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
pBuf[1024 * recordsize] = '\0';
FX_INT32 nBlocks = count / 1024 + 1;
for (FX_INT32 block = 0; block < nBlocks; block ++) {
FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
FX_DWORD dwReadSize = block_size * recordsize;
if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
FX_Free(pBuf);
return FALSE;
}
if (!m_Syntax.ReadBlock((FX_LPBYTE)pBuf, dwReadSize)) {
FX_Free(pBuf);
return FALSE;
}
for (FX_INT32 i = 0; i < block_size; i ++) {
FX_DWORD objnum = start_objnum + block * 1024 + i;
char* pEntry = pBuf + i * recordsize;
if (pEntry[17] == 'f') {
m_CrossRef.SetAtGrow(objnum, 0);
m_V5Type.SetAtGrow(objnum, 0);
} else {
FX_INT32 offset = FXSYS_atoi(pEntry);
if (offset == 0) {
for (FX_INT32 c = 0; c < 10; c ++) {
if (pEntry[c] < '0' || pEntry[c] > '9') {
FX_Free(pBuf);
return FALSE;
}
}
}
m_CrossRef.SetAtGrow(objnum, offset);
FX_INT32 version = FXSYS_atoi(pEntry + 11);
if (version >= 1) {
m_bVersionUpdated = TRUE;
}
m_ObjVersion.SetAtGrow(objnum, version);
if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(m_CrossRef[objnum]);
}
}
m_V5Type.SetAtGrow(objnum, 1);
}
}
}
FX_Free(pBuf);
m_Syntax.RestorePos(SavedPos + count * recordsize);
return TRUE;
}
FX_BOOL CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip, FX_BOOL bFirst)
{
m_Syntax.RestorePos(pos);
if (m_Syntax.GetKeyword() != FX_BSTRC("xref")) {
return FALSE;
}
FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(pos);
}
if (streampos) {
FX_LPVOID pResult = FXSYS_bsearch(&streampos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(streampos);
}
}
while (1) {
FX_FILESIZE SavedPos = m_Syntax.SavePos();
FX_BOOL bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
if (word.IsEmpty()) {
return FALSE;
}
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
break;
}
FX_DWORD start_objnum = FXSYS_atoi(word);
if (start_objnum >= (1 << 20)) {
return FALSE;
}
FX_DWORD count = m_Syntax.GetDirectNum();
m_Syntax.ToNextWord();
SavedPos = m_Syntax.SavePos();
FX_BOOL bFirstItem = FALSE;
FX_INT32 recordsize = 20;
if (bFirst) {
bFirstItem = TRUE;
}
m_dwXrefStartObjNum = start_objnum;
if (!bSkip) {
char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
pBuf[1024 * recordsize] = '\0';
FX_INT32 nBlocks = count / 1024 + 1;
FX_BOOL bFirstBlock = TRUE;
for (FX_INT32 block = 0; block < nBlocks; block ++) {
FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
m_Syntax.ReadBlock((FX_LPBYTE)pBuf, block_size * recordsize);
for (FX_INT32 i = 0; i < block_size; i ++) {
FX_DWORD objnum = start_objnum + block * 1024 + i;
char* pEntry = pBuf + i * recordsize;
if (pEntry[17] == 'f') {
if (bFirstItem) {
objnum = 0;
bFirstItem = FALSE;
}
if (bFirstBlock) {
FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
FX_INT32 version = FXSYS_atoi(pEntry + 11);
if (offset == 0 && version == 65535 && start_objnum != 0) {
start_objnum--;
objnum = 0;
}
}
m_CrossRef.SetAtGrow(objnum, 0);
m_V5Type.SetAtGrow(objnum, 0);
} else {
FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
if (offset == 0) {
for (FX_INT32 c = 0; c < 10; c ++) {
if (pEntry[c] < '0' || pEntry[c] > '9') {
FX_Free(pBuf);
return FALSE;
}
}
}
m_CrossRef.SetAtGrow(objnum, offset);
FX_INT32 version = FXSYS_atoi(pEntry + 11);
if (version >= 1) {
m_bVersionUpdated = TRUE;
}
m_ObjVersion.SetAtGrow(objnum, version);
if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(m_CrossRef[objnum]);
}
}
m_V5Type.SetAtGrow(objnum, 1);
}
if (bFirstBlock) {
bFirstBlock = FALSE;
}
}
}
FX_Free(pBuf);
}
m_Syntax.RestorePos(SavedPos + count * recordsize);
}
if (streampos)
if (!LoadCrossRefV5(streampos, streampos, FALSE)) {
return FALSE;
}
return TRUE;
}
FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos)
{
if (!LoadCrossRefV5(xrefpos, xrefpos, TRUE)) {
return FALSE;
}
while (xrefpos)
if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
return FALSE;
}
m_ObjectStreamMap.InitHashTable(101, FALSE);
m_bXRefStream = TRUE;
return TRUE;
}
FX_BOOL CPDF_Parser::RebuildCrossRef()
{
m_CrossRef.RemoveAll();
m_V5Type.RemoveAll();
m_SortedOffset.RemoveAll();
m_ObjVersion.RemoveAll();
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = NULL;
}
FX_INT32 status = 0;
FX_INT32 inside_index = 0;
FX_DWORD objnum, gennum;
FX_INT32 depth = 0;
FX_LPBYTE buffer = FX_Alloc(FX_BYTE, 4096);
FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
FX_FILESIZE start_pos, start_pos1;
FX_FILESIZE last_obj = -1, last_xref = -1, last_trailer = -1;
FX_BOOL bInUpdate = FALSE;
while (pos < m_Syntax.m_FileLen) {
FX_BOOL bOverFlow = FALSE;
FX_DWORD size = (FX_DWORD)(m_Syntax.m_FileLen - pos);
if (size > 4096) {
size = 4096;
}
if (!m_Syntax.m_pFileAccess->ReadBlock(buffer, pos, size)) {
break;
}
for (FX_DWORD i = 0; i < size; i ++) {
FX_BYTE byte = buffer[i];
switch (status) {
case 0:
if (_PDF_CharType[byte] == 'W') {
status = 1;
}
if (byte <= '9' && byte >= '0') {
--i;
status = 1;
}
if (byte == '%') {
inside_index = 0;
status = 9;
}
if (byte == '(') {
status = 10;
depth = 1;
}
if (byte == '<') {
inside_index = 1;
status = 11;
}
if (byte == '\\') {
status = 13;
}
if (byte == 't') {
status = 7;
inside_index = 1;
}
break;
case 1:
if (_PDF_CharType[byte] == 'W') {
break;
} else if (byte <= '9' && byte >= '0') {
start_pos = pos + i;
status = 2;
objnum = byte - '0';
} else if (byte == 't') {
status = 7;
inside_index = 1;
} else if (byte == 'x') {
status = 8;
inside_index = 1;
} else {
--i;
status = 0;
}
break;
case 2:
if (byte <= '9' && byte >= '0') {
objnum = objnum * 10 + byte - '0';
break;
} else if (_PDF_CharType[byte] == 'W') {
status = 3;
} else {
--i;
status = 14;
inside_index = 0;
}
break;
case 3:
if (byte <= '9' && byte >= '0') {
start_pos1 = pos + i;
status = 4;
gennum = byte - '0';
} else if (_PDF_CharType[byte] == 'W') {
break;
} else if (byte == 't') {
status = 7;
inside_index = 1;
} else {
--i;
status = 0;
}
break;
case 4:
if (byte <= '9' && byte >= '0') {
gennum = gennum * 10 + byte - '0';
break;
} else if (_PDF_CharType[byte] == 'W') {
status = 5;
} else {
--i;
status = 0;
}
break;
case 5:
if (byte == 'o') {
status = 6;
inside_index = 1;
} else if (_PDF_CharType[byte] == 'W') {
break;
} else if (byte <= '9' && byte >= '0') {
objnum = gennum;
gennum = byte - '0';
start_pos = start_pos1;
start_pos1 = pos + i;
status = 4;
} else if (byte == 't') {
status = 7;
inside_index = 1;
} else {
--i;
status = 0;
}
break;
case 6:
switch (inside_index) {
case 1:
if (byte != 'b') {
--i;
status = 0;
} else {
inside_index ++;
}
break;
case 2:
if (byte != 'j') {
--i;
status = 0;
} else {
inside_index ++;
}
break;
case 3:
if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') {
if (objnum > 0x1000000) {
status = 0;
break;
}
FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
last_obj = start_pos;
FX_LPVOID pResult = FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(obj_pos);
}
FX_FILESIZE obj_end = 0;
CPDF_Object *pObject = ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum, NULL, &obj_end);
if (pObject) {
int iType = pObject->GetType();
if (iType == PDFOBJ_STREAM) {
CPDF_Stream* pStream = (CPDF_Stream*)pObject;
CPDF_Dictionary* pDict = pStream->GetDict();
if (pDict) {
if (pDict->KeyExist(FX_BSTRC("Type"))) {
CFX_ByteString bsValue = pDict->GetString(FX_BSTRC("Type"));
if (bsValue == FX_BSTRC("XRef") && pDict->KeyExist(FX_BSTRC("Size"))) {
CPDF_Object* pRoot = pDict->GetElement(FX_BSTRC("Root"));
if (pRoot && pRoot->GetDict() && pRoot->GetDict()->GetElement(FX_BSTRC("Pages"))) {
if (m_pTrailer) {
m_pTrailer->Release();
}
m_pTrailer = (CPDF_Dictionary*)pDict->Clone();
}
}
}
}
}
}
FX_FILESIZE offset = 0;
m_Syntax.RestorePos(obj_pos);
offset = m_Syntax.FindTag(FX_BSTRC("obj"), 0);
if (offset == -1) {
offset = 0;
} else {
offset += 3;
}
FX_FILESIZE nLen = obj_end - obj_pos - offset;
if ((FX_DWORD)nLen > size - i) {
pos = obj_end + m_Syntax.m_HeaderOffset;
bOverFlow = TRUE;
} else {
i += (FX_DWORD)nLen;
}
if (m_CrossRef.GetSize() > (FX_INT32)objnum && m_CrossRef[objnum]) {
if (pObject) {
FX_DWORD oldgen = m_ObjVersion.GetAt(objnum);
m_CrossRef[objnum] = obj_pos;
m_ObjVersion.SetAt(objnum, (FX_SHORT)gennum);
if (oldgen != gennum) {
m_bVersionUpdated = TRUE;
}
}
} else {
m_CrossRef.SetAtGrow(objnum, obj_pos);
m_V5Type.SetAtGrow(objnum, 1);
m_ObjVersion.SetAtGrow(objnum, (FX_SHORT)gennum);
}
if (pObject) {
pObject->Release();
}
}
--i;
status = 0;
break;
}
break;
case 7:
if (inside_index == 7) {
if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') {
last_trailer = pos + i - 7;
m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
if (pObj) {
if (pObj->GetType() != PDFOBJ_DICTIONARY && pObj->GetType() != PDFOBJ_STREAM) {
pObj->Release();
} else {
CPDF_Dictionary* pTrailer = NULL;
if (pObj->GetType() == PDFOBJ_STREAM) {
pTrailer = ((CPDF_Stream*)pObj)->GetDict();
} else {
pTrailer = (CPDF_Dictionary*)pObj;
}
if (pTrailer) {
if (m_pTrailer) {
CPDF_Object* pRoot = pTrailer->GetElement(FX_BSTRC("Root"));
if (pRoot == NULL || (pRoot->GetType() == PDFOBJ_REFERENCE &&
(FX_DWORD)m_CrossRef.GetSize() > ((CPDF_Reference*)pRoot)->GetRefObjNum() &&
m_CrossRef.GetAt(((CPDF_Reference*)pRoot)->GetRefObjNum()) != 0)) {
FX_POSITION pos = pTrailer->GetStartPos();
while (pos) {
CFX_ByteString key;
CPDF_Object* pObj = pTrailer->GetNextElement(pos, key);
m_pTrailer->SetAt(key, pObj->Clone(), m_pDocument);
}
pObj->Release();
} else {
pObj->Release();
}
} else {
if (pObj->GetType() == PDFOBJ_STREAM) {
m_pTrailer = (CPDF_Dictionary*)pTrailer->Clone();
pObj->Release();
} else {
m_pTrailer = pTrailer;
}
FX_FILESIZE dwSavePos = m_Syntax.SavePos();
CFX_ByteString strWord = m_Syntax.GetKeyword();
if (!strWord.Compare(FX_BSTRC("startxref"))) {
FX_BOOL bNumber = FALSE;
CFX_ByteString bsOffset = m_Syntax.GetNextWord(bNumber);
if (bNumber) {
m_LastXRefOffset = FXSYS_atoi(bsOffset);
}
}
m_Syntax.RestorePos(dwSavePos);
}
} else {
pObj->Release();
}
bInUpdate = TRUE;
}
}
}
--i;
status = 0;
} else if (byte == "trailer"[inside_index]) {
inside_index ++;
} else {
--i;
status = 0;
}
break;
case 8:
if (inside_index == 4) {
last_xref = pos + i - 4;
status = 1;
} else if (byte == "xref"[inside_index]) {
inside_index ++;
} else {
--i;
status = 0;
}
break;
case 9:
if (byte == '\r' || byte == '\n') {
status = 0;
}
break;
case 10:
if (byte == ')') {
if (depth > 0) {
depth--;
}
} else if (byte == '(') {
depth++;
}
if (!depth) {
status = 0;
}
break;
case 11:
if (byte == '<' && inside_index == 1) {
status = 12;
} else if (byte == '>') {
status = 0;
}
inside_index = 0;
break;
case 12:
--i;
status = 0;
break;
case 13:
if (_PDF_CharType[byte] == 'D' || _PDF_CharType[byte] == 'W') {
--i;
status = 0;
}
break;
case 14:
if (_PDF_CharType[byte] == 'W') {
status = 0;
} else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') {
status = 0;
--i;
} else if (inside_index == 6) {
status = 0;
--i;
} else if (byte == "endobj"[inside_index]) {
inside_index++;
}
break;
}
if (bOverFlow) {
size = 0;
break;
}
}
pos += size;
}
if (last_xref != -1 && last_xref > last_obj) {
last_trailer = last_xref;
} else if (last_trailer == -1 || last_xref < last_obj) {
last_trailer = m_Syntax.m_FileLen;
}
FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset;
FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(offset);
}
FX_Free(buffer);
return TRUE;
}
static FX_DWORD _GetVarInt(FX_LPCBYTE p, FX_INT32 n)
{
FX_DWORD result = 0;
for (FX_INT32 i = 0; i < n; i ++) {
result = result * 256 + p[i];
}
return result;
}
FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE pos, FX_FILESIZE& prev, FX_BOOL bMainXRef)
{
CPDF_Stream* pStream = (CPDF_Stream*)ParseIndirectObjectAt(m_pDocument, pos, 0, NULL);
if (!pStream) {
return FALSE;
}
if (m_pDocument) {
m_pDocument->InsertIndirectObject(pStream->m_ObjNum, pStream);
}
if (pStream->GetType() != PDFOBJ_STREAM) {
return FALSE;
}
prev = pStream->GetDict()->GetInteger(FX_BSTRC("Prev"));
FX_INT32 size = pStream->GetDict()->GetInteger(FX_BSTRC("Size"));
if (size < 0) {
pStream->Release();
return FALSE;
}
if (bMainXRef) {
m_pTrailer = (CPDF_Dictionary*)pStream->GetDict()->Clone();
m_CrossRef.SetSize(size);
if (m_V5Type.SetSize(size)) {
FXSYS_memset32(m_V5Type.GetData(), 0, size);
}
} else {
m_Trailers.Add((CPDF_Dictionary*)pStream->GetDict()->Clone());
}
FX_DWORD nSegs = 0;
std::vector <std::pair <FX_INT32, FX_INT32>> arrIndex;
CPDF_Array* pArray = pStream->GetDict()->GetArray(FX_BSTRC("Index"));
if (pArray) {
FX_DWORD nPairSize = pArray->GetCount() / 2;
CPDF_Object* pStartNumObj = NULL;
CPDF_Object* pCountObj = NULL;
for (FX_DWORD i = 0; i < nPairSize; i++) {
pStartNumObj = pArray->GetElement(i * 2);
pCountObj = pArray->GetElement(i * 2 + 1);
if (pStartNumObj && pStartNumObj->GetType() == PDFOBJ_NUMBER
&& pCountObj && pCountObj->GetType() == PDFOBJ_NUMBER) {
arrIndex.push_back(std::make_pair(pStartNumObj->GetInteger(), pCountObj->GetInteger()));
}
}
nSegs = arrIndex.size();
if (nSegs == 0) {
arrIndex.push_back(std::make_pair(0, size));
nSegs = 1;
}
}
pArray = pStream->GetDict()->GetArray(FX_BSTRC("W"));
if (pArray == NULL) {
pStream->Release();
return FALSE;
}
CFX_DWordArray WidthArray;
FX_SAFE_DWORD dwAccWidth = 0;
for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
WidthArray.Add(pArray->GetInteger(i));
dwAccWidth += WidthArray[i];
}
if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
pStream->Release();
return FALSE;
}
FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
CPDF_StreamAcc acc;
acc.LoadAllData(pStream);
FX_LPCBYTE pData = acc.GetData();
FX_DWORD dwTotalSize = acc.GetSize();
FX_DWORD segindex = 0;
for (FX_DWORD i = 0; i < nSegs; i ++) {
FX_INT32 startnum = arrIndex[i].first;
if (startnum < 0) {
continue;
}
m_dwXrefStartObjNum = base::checked_cast<FX_DWORD, FX_INT32> (startnum);
FX_DWORD count = base::checked_cast<FX_DWORD, FX_INT32> (arrIndex[i].second);
FX_SAFE_DWORD dwCaculatedSize = segindex;
dwCaculatedSize += count;
dwCaculatedSize *= totalWidth;
if (!dwCaculatedSize.IsValid() || dwCaculatedSize.ValueOrDie() > dwTotalSize) {
continue;
}
FX_LPCBYTE segstart = pData + segindex * totalWidth;
FX_SAFE_DWORD dwMaxObjNum = startnum;
dwMaxObjNum += count;
FX_DWORD dwV5Size = base::checked_cast<FX_DWORD, FX_INT32> (m_V5Type.GetSize());
if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) {
continue;
}
for (FX_DWORD j = 0; j < count; j ++) {
FX_INT32 type = 1;
FX_LPCBYTE entrystart = segstart + j * totalWidth;
if (WidthArray[0]) {
type = _GetVarInt(entrystart, WidthArray[0]);
}
if (m_V5Type[startnum + j] == 255) {
FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
m_CrossRef[startnum + j] = offset;
FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(offset);
}
continue;
}
if (m_V5Type[startnum + j]) {
continue;
}
m_V5Type[startnum + j] = type;
if (type == 0) {
m_CrossRef[startnum + j] = 0;
} else {
FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
m_CrossRef[startnum + j] = offset;
if (type == 1) {
FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_SortedOffset.Add(offset);
}
} else {
if (offset < 0 || offset >= m_V5Type.GetSize()) {
pStream->Release();
return FALSE;
}
m_V5Type[offset] = 255;
}
}
}
segindex += count;
}
pStream->Release();
return TRUE;
}
CPDF_Array* CPDF_Parser::GetIDArray()
{
CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement(FX_BSTRC("ID")) : NULL;
if (pID == NULL) {
return NULL;
}
if (pID->GetType() == PDFOBJ_REFERENCE) {
pID = ParseIndirectObject(NULL, ((CPDF_Reference*)pID)->GetRefObjNum());
m_pTrailer->SetAt(FX_BSTRC("ID"), pID);
}
if (pID == NULL || pID->GetType() != PDFOBJ_ARRAY) {
return NULL;
}
return (CPDF_Array*)pID;
}
FX_DWORD CPDF_Parser::GetRootObjNum()
{
CPDF_Object* pRef = m_pTrailer ? m_pTrailer->GetElement(FX_BSTRC("Root")) : NULL;
if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
return 0;
}
return ((CPDF_Reference*) pRef)->GetRefObjNum();
}
FX_DWORD CPDF_Parser::GetInfoObjNum()
{
CPDF_Object* pRef = m_pTrailer ? m_pTrailer->GetElement(FX_BSTRC("Info")) : NULL;
if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
return 0;
}
return ((CPDF_Reference*) pRef)->GetRefObjNum();
}
FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm)
{
bForm = FALSE;
if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
return TRUE;
}
if (m_V5Type[objnum] == 0) {
return TRUE;
}
if (m_V5Type[objnum] == 2) {
return TRUE;
}
FX_FILESIZE pos = m_CrossRef[objnum];
FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
return TRUE;
}
if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
return FALSE;
}
FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos;
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
bForm = m_Syntax.SearchMultiWord(FX_BSTRC("/Form\0stream"), TRUE, size) == 0;
m_Syntax.RestorePos(SavedPos);
return TRUE;
}
CPDF_Object* CPDF_Parser::ParseIndirectObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, PARSE_CONTEXT* pContext)
{
if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
return NULL;
}
if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
FX_FILESIZE pos = m_CrossRef[objnum];
if (pos <= 0) {
return NULL;
}
return ParseIndirectObjectAt(pObjList, pos, objnum, pContext);
}
if (m_V5Type[objnum] == 2) {
CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
if (pObjStream == NULL) {
return NULL;
}
FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
CPDF_SyntaxParser syntax;
CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
syntax.InitParser((IFX_FileStream*)file, 0);
CPDF_Object* pRet = NULL;
while (n) {
FX_DWORD thisnum = syntax.GetDirectNum();
FX_DWORD thisoff = syntax.GetDirectNum();
if (thisnum == objnum) {
syntax.RestorePos(offset + thisoff);
pRet = syntax.GetObject(pObjList, 0, 0, 0, pContext);
break;
}
n --;
}
return pRet;
}
return NULL;
}
CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum)
{
CPDF_StreamAcc* pStreamAcc = NULL;
if (m_ObjectStreamMap.Lookup((void*)(FX_UINTPTR)objnum, (void*&)pStreamAcc)) {
return pStreamAcc;
}
const CPDF_Stream* pStream = m_pDocument ? (CPDF_Stream*)m_pDocument->GetIndirectObject(objnum) : NULL;
if (pStream == NULL || pStream->GetType() != PDFOBJ_STREAM) {
return NULL;
}
pStreamAcc = FX_NEW CPDF_StreamAcc;
pStreamAcc->LoadAllData(pStream);
m_ObjectStreamMap.SetAt((void*)(FX_UINTPTR)objnum, pStreamAcc);
return pStreamAcc;
}
FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum)
{
if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
return 0;
}
if (m_V5Type[objnum] == 2) {
objnum = (FX_DWORD)m_CrossRef[objnum];
}
if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
FX_FILESIZE offset = m_CrossRef[objnum];
if (offset == 0) {
return 0;
}
FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
return 0;
}
if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
return 0;
}
return ((FX_FILESIZE*)pResult)[1] - offset;
}
return 0;
}
void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, FX_LPBYTE& pBuffer, FX_DWORD& size)
{
pBuffer = NULL;
size = 0;
if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
return;
}
if (m_V5Type[objnum] == 2) {
CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
if (pObjStream == NULL) {
return;
}
FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
CPDF_SyntaxParser syntax;
FX_LPCBYTE pData = pObjStream->GetData();
FX_DWORD totalsize = pObjStream->GetSize();
CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pData, (size_t)totalsize, FALSE));
syntax.InitParser((IFX_FileStream*)file, 0);
while (n) {
FX_DWORD thisnum = syntax.GetDirectNum();
FX_DWORD thisoff = syntax.GetDirectNum();
if (thisnum == objnum) {
if (n == 1) {
size = totalsize - (thisoff + offset);
} else {
syntax.GetDirectNum(); // Skip nextnum.
FX_DWORD nextoff = syntax.GetDirectNum();
size = nextoff - thisoff;
}
pBuffer = FX_Alloc(FX_BYTE, size);
FXSYS_memcpy32(pBuffer, pData + thisoff + offset, size);
return;
}
n --;
}
return;
}
if (m_V5Type[objnum] == 1) {
FX_FILESIZE pos = m_CrossRef[objnum];
if (pos == 0) {
return;
}
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
FX_BOOL bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return;
}
FX_DWORD real_objnum = FXSYS_atoi(word);
if (real_objnum && real_objnum != objnum) {
m_Syntax.RestorePos(SavedPos);
return;
}
word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return;
}
if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
m_Syntax.RestorePos(SavedPos);
return;
}
FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
if (pResult == NULL) {
m_Syntax.RestorePos(SavedPos);
return;
}
FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1];
FX_BOOL bNextOffValid = FALSE;
if (nextoff != pos) {
m_Syntax.RestorePos(nextoff);
word = m_Syntax.GetNextWord(bIsNumber);
if (word == FX_BSTRC("xref")) {
bNextOffValid = TRUE;
} else if (bIsNumber) {
word = m_Syntax.GetNextWord(bIsNumber);
if (bIsNumber && m_Syntax.GetKeyword() == FX_BSTRC("obj")) {
bNextOffValid = TRUE;
}
}
}
if (!bNextOffValid) {
m_Syntax.RestorePos(pos);
while (1) {
if (m_Syntax.GetKeyword() == FX_BSTRC("endobj")) {
break;
}
if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
break;
}
}
nextoff = m_Syntax.SavePos();
}
size = (FX_DWORD)(nextoff - pos);
pBuffer = FX_Alloc(FX_BYTE, size);
m_Syntax.RestorePos(pos);
m_Syntax.ReadBlock(pBuffer, size);
m_Syntax.RestorePos(SavedPos);
}
}
CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
PARSE_CONTEXT* pContext)
{
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
FX_BOOL bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_FILESIZE objOffset = m_Syntax.SavePos();
objOffset -= word.GetLength();
FX_DWORD real_objnum = FXSYS_atoi(word);
if (objnum && real_objnum != objnum) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_DWORD gennum = FXSYS_atoi(word);
if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, gennum, 0, pContext);
FX_FILESIZE endOffset = m_Syntax.SavePos();
CFX_ByteString bsWord = m_Syntax.GetKeyword();
if (bsWord == FX_BSTRC("endobj")) {
endOffset = m_Syntax.SavePos();
}
m_Syntax.RestorePos(SavedPos);
if (pObj && !objnum) {
pObj->m_ObjNum = real_objnum;
pObj->m_GenNum = gennum;
}
return pObj;
}
CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
struct PARSE_CONTEXT* pContext, FX_FILESIZE *pResultPos)
{
FX_FILESIZE SavedPos = m_Syntax.SavePos();
m_Syntax.RestorePos(pos);
FX_BOOL bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_DWORD real_objnum = FXSYS_atoi(word);
if (objnum && real_objnum != objnum) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
FX_DWORD gennum = FXSYS_atoi(word);
if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
m_Syntax.RestorePos(SavedPos);
return NULL;
}
CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, 0, pContext);
if (pResultPos) {
*pResultPos = m_Syntax.m_Pos;
}
m_Syntax.RestorePos(SavedPos);
return pObj;
}
CPDF_Dictionary* CPDF_Parser::LoadTrailerV4()
{
if (m_Syntax.GetKeyword() != FX_BSTRC("trailer")) {
return NULL;
}
CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
if (pObj == NULL || pObj->GetType() != PDFOBJ_DICTIONARY) {
if (pObj) {
pObj->Release();
}
return NULL;
}
return (CPDF_Dictionary*)pObj;
}
FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision)
{
if (m_pSecurityHandler == NULL) {
return (FX_DWORD) - 1;
}
FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
if (m_pEncryptDict && m_pEncryptDict->GetString(FX_BSTRC("Filter")) == FX_BSTRC("Standard")) {
dwPermission &= 0xFFFFFFFC;
dwPermission |= 0xFFFFF0C0;
if(bCheckRevision && m_pEncryptDict->GetInteger(FX_BSTRC("R")) == 2) {
dwPermission &= 0xFFFFF0FF;
}
}
return dwPermission;
}
FX_BOOL CPDF_Parser::IsOwner()
{
return m_pSecurityHandler == NULL ? TRUE : m_pSecurityHandler->IsOwner();
}
void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler, FX_BOOL bForced)
{
ASSERT(m_pSecurityHandler == NULL);
if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
delete m_pSecurityHandler;
m_pSecurityHandler = NULL;
}
m_bForceUseSecurityHandler = bForced;
m_pSecurityHandler = pSecurityHandler;
if (m_bForceUseSecurityHandler) {
return;
}
m_Syntax.m_pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler);
}
FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset)
{
m_Syntax.InitParser(pFileAccess, offset);
m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
FX_FILESIZE SavedPos = m_Syntax.SavePos();
FX_BOOL bIsNumber;
CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
return FALSE;
}
FX_DWORD objnum = FXSYS_atoi(word);
word = m_Syntax.GetNextWord(bIsNumber);
if (!bIsNumber) {
return FALSE;
}
FX_DWORD gennum = FXSYS_atoi(word);
if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
m_Syntax.RestorePos(SavedPos);
return FALSE;
}
m_pLinearized = m_Syntax.GetObject(NULL, objnum, gennum, 0);
if (!m_pLinearized) {
return FALSE;
}
if (m_pLinearized->GetDict() && m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
m_Syntax.GetNextWord(bIsNumber);
CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
if (!pLen) {
m_pLinearized->Release();
m_pLinearized = NULL;
return FALSE;
}
if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
return FALSE;
}
CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
m_dwFirstPageNo = pNo->GetInteger();
}
CPDF_Object *pTable = m_pLinearized->GetDict()->GetElement(FX_BSTRC("T"));
if (pTable && pTable->GetType() == PDFOBJ_NUMBER) {
m_LastXRefOffset = pTable->GetInteger();
}
return TRUE;
}
m_pLinearized->Release();
m_pLinearized = NULL;
return FALSE;
}
FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
{
CloseParser(bReParse);
m_bXRefStream = FALSE;
m_LastXRefOffset = 0;
m_bOwnFileRead = bOwnFileRead;
FX_INT32 offset = GetHeaderOffset(pFileAccess);
if (offset == -1) {
return PDFPARSE_ERROR_FORMAT;
}
if (!IsLinearizedFile(pFileAccess, offset)) {
m_Syntax.m_pFileAccess = NULL;
return StartParse(pFileAccess, bReParse, bOwnFileRead);
}
if (!bReParse) {
m_pDocument = FX_NEW CPDF_Document(this);
}
FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
FX_BOOL bXRefRebuilt = FALSE;
FX_BOOL bLoadV4 = FALSE;
if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE, FALSE)) && !LoadCrossRefV5(dwFirstXRefOffset, dwFirstXRefOffset, TRUE)) {
if (!RebuildCrossRef()) {
return PDFPARSE_ERROR_FORMAT;
}
bXRefRebuilt = TRUE;
m_LastXRefOffset = 0;
}
if (bLoadV4) {
m_pTrailer = LoadTrailerV4();
if (m_pTrailer == NULL) {
return FALSE;
}
FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
if (xrefsize == 0) {
return FALSE;
}
m_CrossRef.SetSize(xrefsize);
m_V5Type.SetSize(xrefsize);
}
FX_DWORD dwRet = SetEncryptHandler();
if (dwRet != PDFPARSE_ERROR_SUCCESS) {
return dwRet;
}
m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
if (bXRefRebuilt) {
return PDFPARSE_ERROR_FORMAT;
}
ReleaseEncryptHandler();
if (!RebuildCrossRef()) {
return PDFPARSE_ERROR_FORMAT;
}
dwRet = SetEncryptHandler();
if (dwRet != PDFPARSE_ERROR_SUCCESS) {
return dwRet;
}
m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
if (m_pDocument->GetRoot() == NULL) {
return PDFPARSE_ERROR_FORMAT;
}
}
FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
FX_DWORD RootObjNum = GetRootObjNum();
if (RootObjNum == 0) {
ReleaseEncryptHandler();
RebuildCrossRef();
RootObjNum = GetRootObjNum();
if (RootObjNum == 0) {
return PDFPARSE_ERROR_FORMAT;
}
dwRet = SetEncryptHandler();
if (dwRet != PDFPARSE_ERROR_SUCCESS) {
return dwRet;
}
}
if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
CPDF_Object* pMetadata = m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
m_Syntax.m_MetadataObjnum = ((CPDF_Reference*) pMetadata)->GetRefObjNum();
}
}
return PDFPARSE_ERROR_SUCCESS;
}
FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)
{
if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
return FALSE;
}
while (xrefpos)
if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
return FALSE;
}
m_ObjectStreamMap.InitHashTable(101, FALSE);
m_bXRefStream = TRUE;
return TRUE;
}
FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable()
{
FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
m_Syntax.m_MetadataObjnum = 0;
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = NULL;
}
m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
FX_BYTE ch = 0;
FX_DWORD dwCount = 0;
m_Syntax.GetNextChar(ch);
FX_INT32 type = _PDF_CharType[ch];
while (type == 'W') {
++dwCount;
if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
break;
}
m_Syntax.GetNextChar(ch);
type = _PDF_CharType[ch];
}
m_LastXRefOffset += dwCount;
FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
while (pos) {
FX_LPVOID objnum;
CPDF_StreamAcc* pStream;
m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
delete pStream;
}
m_ObjectStreamMap.RemoveAll();
if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
m_LastXRefOffset = 0;
m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
return PDFPARSE_ERROR_FORMAT;
}
FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
return PDFPARSE_ERROR_SUCCESS;
}
CPDF_SyntaxParser::CPDF_SyntaxParser()
{
m_pFileAccess = NULL;
m_pCryptoHandler = NULL;
m_pFileBuf = NULL;
m_BufSize = CPDF_ModuleMgr::Get()->m_FileBufSize;
m_pFileBuf = NULL;
m_MetadataObjnum = 0;
m_dwWordPos = 0;
#if defined(_FPDFAPI_MINI_)
m_bFileStream = TRUE;
#else
m_bFileStream = FALSE;
#endif
}
CPDF_SyntaxParser::~CPDF_SyntaxParser()
{
if (m_pFileBuf) {
FX_Free(m_pFileBuf);
}
}
FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, FX_BYTE& ch)
{
FX_FILESIZE save_pos = m_Pos;
m_Pos = pos;
FX_BOOL ret = GetNextChar(ch);
m_Pos = save_pos;
return ret;
}
FX_BOOL CPDF_SyntaxParser::GetNextChar(FX_BYTE& ch)
{
FX_FILESIZE pos = m_Pos + m_HeaderOffset;
if (pos >= m_FileLen) {
return FALSE;
}
if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
FX_FILESIZE read_pos = pos;
FX_DWORD read_size = m_BufSize;
if ((FX_FILESIZE)read_size > m_FileLen) {
read_size = (FX_DWORD)m_FileLen;
}
if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
if (m_FileLen < (FX_FILESIZE)read_size) {
read_pos = 0;
read_size = (FX_DWORD)m_FileLen;
} else {
read_pos = m_FileLen - read_size;
}
}
if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
return FALSE;
}
m_BufOffset = read_pos;
}
ch = m_pFileBuf[pos - m_BufOffset];
m_Pos ++;
return TRUE;
}
FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, FX_BYTE& ch)
{
pos += m_HeaderOffset;
if (pos >= m_FileLen) {
return FALSE;
}
if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
FX_FILESIZE read_pos;
if (pos < (FX_FILESIZE)m_BufSize) {
read_pos = 0;
} else {
read_pos = pos - m_BufSize + 1;
}
FX_DWORD read_size = m_BufSize;
if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
if (m_FileLen < (FX_FILESIZE)read_size) {
read_pos = 0;
read_size = (FX_DWORD)m_FileLen;
} else {
read_pos = m_FileLen - read_size;
}
}
if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
return FALSE;
}
m_BufOffset = read_pos;
}
ch = m_pFileBuf[pos - m_BufOffset];
return TRUE;
}
FX_BOOL CPDF_SyntaxParser::ReadBlock(FX_LPBYTE pBuf, FX_DWORD size)
{
if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
return FALSE;
}
m_Pos += size;
return TRUE;
}
#define MAX_WORD_BUFFER 256
void CPDF_SyntaxParser::GetNextWord()
{
m_WordSize = 0;
m_bIsNumber = TRUE;
FX_BYTE ch;
if (!GetNextChar(ch)) {
return;
}
FX_BYTE type = _PDF_CharType[ch];
while (1) {
while (type == 'W') {
if (!GetNextChar(ch)) {
return;
}
type = _PDF_CharType[ch];
}
if (ch != '%') {
break;
}
while (1) {
if (!GetNextChar(ch)) {
return;
}
if (ch == '\r' || ch == '\n') {
break;
}
}
type = _PDF_CharType[ch];
}
if (type == 'D') {
m_bIsNumber = FALSE;
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (1) {
if (!GetNextChar(ch)) {
return;
}
type = _PDF_CharType[ch];
if (type != 'R' && type != 'N') {
m_Pos --;
return;
}
if (m_WordSize < MAX_WORD_BUFFER) {
m_WordBuffer[m_WordSize++] = ch;
}
}
} else if (ch == '<') {
if (!GetNextChar(ch)) {
return;
}
if (ch == '<') {
m_WordBuffer[m_WordSize++] = ch;
} else {
m_Pos --;
}
} else if (ch == '>') {
if (!GetNextChar(ch)) {
return;
}
if (ch == '>') {
m_WordBuffer[m_WordSize++] = ch;
} else {
m_Pos --;
}
}
return;
}
while (1) {
if (m_WordSize < MAX_WORD_BUFFER) {
m_WordBuffer[m_WordSize++] = ch;
}
if (type != 'N') {
m_bIsNumber = FALSE;
}
if (!GetNextChar(ch)) {
return;
}
type = _PDF_CharType[ch];
if (type == 'D' || type == 'W') {
m_Pos --;
break;
}
}
}
CFX_ByteString CPDF_SyntaxParser::ReadString()
{
FX_BYTE ch;
if (!GetNextChar(ch)) {
return CFX_ByteString();
}
CFX_ByteTextBuf buf;
FX_INT32 parlevel = 0;
FX_INT32 status = 0, iEscCode = 0;
while (1) {
switch (status) {
case 0:
if (ch == ')') {
if (parlevel == 0) {
return buf.GetByteString();
}
parlevel --;
buf.AppendChar(')');
} else if (ch == '(') {
parlevel ++;
buf.AppendChar('(');
} else if (ch == '\\') {
status = 1;
} else {
buf.AppendChar(ch);
}
break;
case 1:
if (ch >= '0' && ch <= '7') {
iEscCode = ch - '0';
status = 2;
break;
}
if (ch == 'n') {
buf.AppendChar('\n');
} else if (ch == 'r') {
buf.AppendChar('\r');
} else if (ch == 't') {
buf.AppendChar('\t');
} else if (ch == 'b') {
buf.AppendChar('\b');
} else if (ch == 'f') {
buf.AppendChar('\f');
} else if (ch == '\r') {
status = 4;
break;
} else if (ch == '\n') {
} else {
buf.AppendChar(ch);
}
status = 0;
break;
case 2:
if (ch >= '0' && ch <= '7') {
iEscCode = iEscCode * 8 + ch - '0';
status = 3;
} else {
buf.AppendChar(iEscCode);
status = 0;
continue;
}
break;
case 3:
if (ch >= '0' && ch <= '7') {
iEscCode = iEscCode * 8 + ch - '0';
buf.AppendChar(iEscCode);
status = 0;
} else {
buf.AppendChar(iEscCode);
status = 0;
continue;
}
break;
case 4:
status = 0;
if (ch != '\n') {
continue;
}
break;
}
if (!GetNextChar(ch)) {
break;
}
}
GetNextChar(ch);
return buf.GetByteString();
}
CFX_ByteString CPDF_SyntaxParser::ReadHexString()
{
FX_BYTE ch;
if (!GetNextChar(ch)) {
return CFX_ByteString();
}
CFX_BinaryBuf buf;
FX_BOOL bFirst = TRUE;
FX_BYTE code = 0;
while (1) {
if (ch == '>') {
break;
}
if (ch >= '0' && ch <= '9') {
if (bFirst) {
code = (ch - '0') * 16;
} else {
code += ch - '0';
buf.AppendByte((FX_BYTE)code);
}
bFirst = !bFirst;
} else if (ch >= 'A' && ch <= 'F') {
if (bFirst) {
code = (ch - 'A' + 10) * 16;
} else {
code += ch - 'A' + 10;
buf.AppendByte((FX_BYTE)code);
}
bFirst = !bFirst;
} else if (ch >= 'a' && ch <= 'f') {
if (bFirst) {
code = (ch - 'a' + 10) * 16;
} else {
code += ch - 'a' + 10;
buf.AppendByte((FX_BYTE)code);
}
bFirst = !bFirst;
}
if (!GetNextChar(ch)) {
break;
}
}
if (!bFirst) {
buf.AppendByte((FX_BYTE)code);
}
return buf.GetByteString();
}
void CPDF_SyntaxParser::ToNextLine()
{
FX_BYTE ch;
while (1) {
if (!GetNextChar(ch)) {
return;
}
if (ch == '\n') {
return;
}
if (ch == '\r') {
GetNextChar(ch);
if (ch == '\n') {
return;
} else {
m_Pos --;
return;
}
}
}
}
void CPDF_SyntaxParser::ToNextWord()
{
FX_BYTE ch;
if (!GetNextChar(ch)) {
return;
}
FX_BYTE type = _PDF_CharType[ch];
while (1) {
while (type == 'W') {
m_dwWordPos = m_Pos;
if (!GetNextChar(ch)) {
return;
}
type = _PDF_CharType[ch];
}
if (ch != '%') {
break;
}
while (1) {
if (!GetNextChar(ch)) {
return;
}
if (ch == '\r' || ch == '\n') {
break;
}
}
type = _PDF_CharType[ch];
}
m_Pos --;
}
CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber)
{
GetNextWord();
bIsNumber = m_bIsNumber;
return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
}
CFX_ByteString CPDF_SyntaxParser::GetKeyword()
{
GetNextWord();
return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
}
CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum, FX_INT32 level, PARSE_CONTEXT* pContext, FX_BOOL bDecrypt)
{
if (level > _PARSER_OBJECT_LEVLE_) {
return NULL;
}
FX_FILESIZE SavedPos = m_Pos;
FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
FX_BOOL bIsNumber;
CFX_ByteString word = GetNextWord(bIsNumber);
CPDF_Object* pRet = NULL;
if (word.GetLength() == 0) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_INVALID;
}
return NULL;
}
if (bIsNumber) {
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString nextword = GetNextWord(bIsNumber);
if (bIsNumber) {
CFX_ByteString nextword2 = GetNextWord(bIsNumber);
if (nextword2 == FX_BSTRC("R")) {
FX_DWORD objnum = FXSYS_atoi(word);
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_REFERENCE;
}
pRet = CPDF_Reference::Create(pObjList, objnum);
return pRet;
} else {
m_Pos = SavedPos;
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NUMBER;
}
pRet = CPDF_Number::Create(word);
return pRet;
}
} else {
m_Pos = SavedPos;
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NUMBER;
}
pRet = CPDF_Number::Create(word);
return pRet;
}
}
if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_BOOLEAN;
}
pRet = CPDF_Boolean::Create(word == FX_BSTRC("true"));
return pRet;
}
if (word == FX_BSTRC("null")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NULL;
}
pRet = CPDF_Null::Create();
return pRet;
}
if (word == FX_BSTRC("(")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_STRING;
}
CFX_ByteString str = ReadString();
if (m_pCryptoHandler && bDecrypt) {
m_pCryptoHandler->Decrypt(objnum, gennum, str);
}
pRet = CPDF_String::Create(str, FALSE);
return pRet;
}
if (word == FX_BSTRC("<")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_STRING;
}
CFX_ByteString str = ReadHexString();
if (m_pCryptoHandler && bDecrypt) {
m_pCryptoHandler->Decrypt(objnum, gennum, str);
}
pRet = CPDF_String::Create(str, TRUE);
return pRet;
}
if (word == FX_BSTRC("[")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_ARRAY;
}
CPDF_Array* pArray = CPDF_Array::Create();
while (1) {
CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
if (pObj == NULL) {
return pArray;
}
pArray->Add(pObj);
}
}
if (word[0] == '/') {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NAME;
}
pRet = CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
return pRet;
}
if (word == FX_BSTRC("<<")) {
FX_FILESIZE saveDictOffset = m_Pos - 2;
FX_DWORD dwDictSize = 0;
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_DICTIONARY;
}
if (pContext) {
pContext->m_DictStart = SavedPos;
}
CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
FX_INT32 nKeys = 0;
FX_FILESIZE dwSignValuePos = 0;
while (1) {
FX_BOOL bIsNumber;
CFX_ByteString key = GetNextWord(bIsNumber);
if (key.IsEmpty()) {
if (pDict)
pDict->Release();
return NULL;
}
FX_FILESIZE SavedPos = m_Pos - key.GetLength();
if (key == FX_BSTRC(">>")) {
dwDictSize = m_Pos - saveDictOffset;
break;
}
if (key == FX_BSTRC("endobj")) {
dwDictSize = m_Pos - 6 - saveDictOffset;
m_Pos = SavedPos;
break;
}
if (key[0] != '/') {
continue;
}
nKeys ++;
key = PDF_NameDecode(key);
if (key == FX_BSTRC("/Contents")) {
dwSignValuePos = m_Pos;
}
CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
if (pObj == NULL) {
continue;
}
if (key.GetLength() == 1) {
pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
} else {
if (nKeys < 32) {
pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
} else {
pDict->AddValue(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
}
}
}
if (IsSignatureDict(pDict)) {
FX_FILESIZE dwSavePos = m_Pos;
m_Pos = dwSignValuePos;
CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1, NULL, FALSE);
pDict->SetAt(FX_BSTRC("Contents"), pObj);
m_Pos = dwSavePos;
}
if (pContext) {
pContext->m_DictEnd = m_Pos;
if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
return pDict;
}
}
FX_FILESIZE SavedPos = m_Pos;
FX_BOOL bIsNumber;
CFX_ByteString nextword = GetNextWord(bIsNumber);
if (nextword == FX_BSTRC("stream")) {
CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
if (pStream) {
return pStream;
}
if (pDict)
pDict->Release();
return NULL;
} else {
m_Pos = SavedPos;
return pDict;
}
}
if (word == FX_BSTRC(">>")) {
m_Pos = SavedPos;
return NULL;
}
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_INVALID;
}
return NULL;
}
CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum,
FX_INT32 level, struct PARSE_CONTEXT* pContext)
{
if (level > _PARSER_OBJECT_LEVLE_) {
return NULL;
}
FX_FILESIZE SavedPos = m_Pos;
FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
FX_BOOL bIsNumber;
CFX_ByteString word = GetNextWord(bIsNumber);
if (word.GetLength() == 0) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_INVALID;
}
return NULL;
}
if (bIsNumber) {
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString nextword = GetNextWord(bIsNumber);
if (bIsNumber) {
CFX_ByteString nextword2 = GetNextWord(bIsNumber);
if (nextword2 == FX_BSTRC("R")) {
FX_DWORD objnum = FXSYS_atoi(word);
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_REFERENCE;
}
return CPDF_Reference::Create(pObjList, objnum);
} else {
m_Pos = SavedPos;
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NUMBER;
}
return CPDF_Number::Create(word);
}
} else {
m_Pos = SavedPos;
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NUMBER;
}
return CPDF_Number::Create(word);
}
}
if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_BOOLEAN;
}
return CPDF_Boolean::Create(word == FX_BSTRC("true"));
}
if (word == FX_BSTRC("null")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NULL;
}
return CPDF_Null::Create();
}
if (word == FX_BSTRC("(")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_STRING;
}
CFX_ByteString str = ReadString();
if (m_pCryptoHandler) {
m_pCryptoHandler->Decrypt(objnum, gennum, str);
}
return CPDF_String::Create(str, FALSE);
}
if (word == FX_BSTRC("<")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_STRING;
}
CFX_ByteString str = ReadHexString();
if (m_pCryptoHandler) {
m_pCryptoHandler->Decrypt(objnum, gennum, str);
}
return CPDF_String::Create(str, TRUE);
}
if (word == FX_BSTRC("[")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_ARRAY;
}
CPDF_Array* pArray = CPDF_Array::Create();
while (1) {
CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
if (pObj == NULL) {
if (m_WordBuffer[0] == ']') {
return pArray;
}
if (pArray)
pArray->Release();
return NULL;
}
pArray->Add(pObj);
}
}
if (word[0] == '/') {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_NAME;
}
return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
}
if (word == FX_BSTRC("<<")) {
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_DICTIONARY;
}
if (pContext) {
pContext->m_DictStart = SavedPos;
}
CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
while (1) {
FX_BOOL bIsNumber;
FX_FILESIZE SavedPos = m_Pos;
CFX_ByteString key = GetNextWord(bIsNumber);
if (key.IsEmpty()) {
if (pDict)
pDict->Release();
return NULL;
}
if (key == FX_BSTRC(">>")) {
break;
}
if (key == FX_BSTRC("endobj")) {
m_Pos = SavedPos;
break;
}
if (key[0] != '/') {
continue;
}
key = PDF_NameDecode(key);
CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
if (pObj == NULL) {
if (pDict)
pDict->Release();
FX_BYTE ch;
while (1) {
if (!GetNextChar(ch)) {
break;
}
if (ch == 0x0A || ch == 0x0D) {
break;
}
}
return NULL;
}
if (key.GetLength() == 1) {
pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
} else {
pDict->AddValue(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
}
}
if (pContext) {
pContext->m_DictEnd = m_Pos;
if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
return pDict;
}
}
FX_FILESIZE SavedPos = m_Pos;
FX_BOOL bIsNumber;
CFX_ByteString nextword = GetNextWord(bIsNumber);
if (nextword == FX_BSTRC("stream")) {
CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
if (pStream) {
return pStream;
}
if (pDict)
pDict->Release();
return NULL;
} else {
m_Pos = SavedPos;
return pDict;
}
}
if (word == FX_BSTRC(">>")) {
m_Pos = SavedPos;
return NULL;
}
if (bTypeOnly) {
return (CPDF_Object*)PDFOBJ_INVALID;
}
return NULL;
}
CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, PARSE_CONTEXT* pContext,
FX_DWORD objnum, FX_DWORD gennum)
{
CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));
FX_FILESIZE len = 0;
if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) ||
((((CPDF_Reference*)pLenObj)->GetObjList() != NULL) &&
((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {
len = pLenObj->GetInteger();
}
ToNextLine();
FX_FILESIZE StreamStartPos = m_Pos;
if (pContext) {
pContext->m_DataStart = m_Pos;
}
base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
pos += len;
if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
m_Pos = pos