// Copyright 2014 PDFium Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style license that can be | |
// found in the LICENSE file. | |
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
#include "../../../include/fpdfapi/fpdf_parser.h" | |
extern const FX_LPCSTR _PDF_CharType = | |
"WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR" | |
"WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR" | |
"RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" | |
"RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" | |
"WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" | |
"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" | |
"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" | |
"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW"; | |
#ifndef MAX_PATH | |
#define MAX_PATH 4096 | |
#endif | |
CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize) | |
{ | |
m_pData = pData; | |
m_dwSize = dwSize; | |
m_dwCurPos = 0; | |
} | |
CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str) | |
{ | |
m_pData = str; | |
m_dwSize = str.GetLength(); | |
m_dwCurPos = 0; | |
} | |
void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type) | |
{ | |
pStart = NULL; | |
dwSize = 0; | |
type = PDFWORD_EOF; | |
FX_BYTE ch; | |
char chartype; | |
while (1) { | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
chartype = _PDF_CharType[ch]; | |
while (chartype == 'W') { | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
chartype = _PDF_CharType[ch]; | |
} | |
if (ch != '%') { | |
break; | |
} | |
while (1) { | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
if (ch == '\r' || ch == '\n') { | |
break; | |
} | |
} | |
chartype = _PDF_CharType[ch]; | |
} | |
FX_DWORD start_pos = m_dwCurPos - 1; | |
pStart = m_pData + start_pos; | |
if (chartype == 'D') { | |
if (ch == '/') { | |
while (1) { | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
chartype = _PDF_CharType[ch]; | |
if (chartype != 'R' && chartype != 'N') { | |
m_dwCurPos --; | |
dwSize = m_dwCurPos - start_pos; | |
type = PDFWORD_NAME; | |
return; | |
} | |
} | |
} else { | |
type = PDFWORD_DELIMITER; | |
dwSize = 1; | |
if (ch == '<') { | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
if (ch == '<') { | |
dwSize = 2; | |
} else { | |
m_dwCurPos --; | |
} | |
} else if (ch == '>') { | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
if (ch == '>') { | |
dwSize = 2; | |
} else { | |
m_dwCurPos --; | |
} | |
} | |
} | |
return; | |
} | |
type = PDFWORD_NUMBER; | |
dwSize = 1; | |
while (1) { | |
if (chartype != 'N') { | |
type = PDFWORD_TEXT; | |
} | |
if (m_dwSize <= m_dwCurPos) { | |
return; | |
} | |
ch = m_pData[m_dwCurPos++]; | |
chartype = _PDF_CharType[ch]; | |
if (chartype == 'D' || chartype == 'W') { | |
m_dwCurPos --; | |
break; | |
} | |
dwSize ++; | |
} | |
} | |
CFX_ByteStringC CPDF_SimpleParser::GetWord() | |
{ | |
FX_LPCBYTE pStart; | |
FX_DWORD dwSize; | |
int type; | |
ParseWord(pStart, dwSize, type); | |
if (dwSize == 1 && pStart[0] == '<') { | |
while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { | |
m_dwCurPos ++; | |
} | |
if (m_dwCurPos < m_dwSize) { | |
m_dwCurPos ++; | |
} | |
return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
} else if (dwSize == 1 && pStart[0] == '(') { | |
int level = 1; | |
while (m_dwCurPos < m_dwSize) { | |
if (m_pData[m_dwCurPos] == ')') { | |
level --; | |
if (level == 0) { | |
break; | |
} | |
} | |
if (m_pData[m_dwCurPos] == '\\') { | |
if (m_dwSize <= m_dwCurPos) { | |
break; | |
} | |
m_dwCurPos ++; | |
} else if (m_pData[m_dwCurPos] == '(') { | |
level ++; | |
} | |
if (m_dwSize <= m_dwCurPos) { | |
break; | |
} | |
m_dwCurPos ++; | |
} | |
if (m_dwCurPos < m_dwSize) { | |
m_dwCurPos ++; | |
} | |
return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
} | |
return CFX_ByteStringC(pStart, dwSize); | |
} | |
FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token) | |
{ | |
int token_len = token.GetLength(); | |
while (m_dwCurPos < m_dwSize - token_len) { | |
if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) { | |
break; | |
} | |
m_dwCurPos ++; | |
} | |
if (m_dwCurPos == m_dwSize - token_len) { | |
return FALSE; | |
} | |
m_dwCurPos += token_len; | |
return TRUE; | |
} | |
FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token) | |
{ | |
while (1) { | |
CFX_ByteStringC word = GetWord(); | |
if (word.IsEmpty()) { | |
return FALSE; | |
} | |
if (word == token) { | |
return TRUE; | |
} | |
} | |
return FALSE; | |
} | |
FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token, | |
FX_DWORD& start_pos, FX_DWORD& end_pos) | |
{ | |
if (!start_token.IsEmpty()) { | |
if (!SkipWord(start_token)) { | |
return FALSE; | |
} | |
start_pos = m_dwCurPos; | |
} | |
while (1) { | |
end_pos = m_dwCurPos; | |
CFX_ByteStringC word = GetWord(); | |
if (word.IsEmpty()) { | |
return FALSE; | |
} | |
if (word == end_token) { | |
return TRUE; | |
} | |
} | |
return FALSE; | |
} | |
FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams) | |
{ | |
nParams ++; | |
FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); | |
int buf_index = 0; | |
int buf_count = 0; | |
while (1) { | |
pBuf[buf_index++] = m_dwCurPos; | |
if (buf_index == nParams) { | |
buf_index = 0; | |
} | |
buf_count ++; | |
if (buf_count > nParams) { | |
buf_count = nParams; | |
} | |
CFX_ByteStringC word = GetWord(); | |
if (word.IsEmpty()) { | |
FX_Free(pBuf); | |
return FALSE; | |
} | |
if (word == token) { | |
if (buf_count < nParams) { | |
continue; | |
} | |
m_dwCurPos = pBuf[buf_index]; | |
FX_Free(pBuf); | |
return TRUE; | |
} | |
} | |
return FALSE; | |
} | |
static int _hex2dec(char ch) | |
{ | |
if (ch >= '0' && ch <= '9') { | |
return ch - '0'; | |
} | |
if (ch >= 'a' && ch <= 'f') { | |
return ch - 'a' + 10; | |
} | |
if (ch >= 'A' && ch <= 'F') { | |
return ch - 'A' + 10; | |
} | |
return 0; | |
} | |
CFX_ByteString PDF_NameDecode(FX_BSTR bstr) | |
{ | |
int size = bstr.GetLength(); | |
FX_LPCSTR pSrc = bstr.GetCStr(); | |
if (FXSYS_memchr(pSrc, '#', size) == NULL) { | |
return bstr; | |
} | |
CFX_ByteString result; | |
FX_LPSTR pDestStart = result.GetBuffer(size); | |
FX_LPSTR pDest = pDestStart; | |
for (int i = 0; i < size; i ++) { | |
if (pSrc[i] == '#' && i < size - 2) { | |
*pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]); | |
i += 2; | |
} else { | |
*pDest ++ = pSrc[i]; | |
} | |
} | |
result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); | |
return result; | |
} | |
CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) | |
{ | |
if (FXSYS_memchr((FX_LPCSTR)orig, '#', orig.GetLength()) == NULL) { | |
return orig; | |
} | |
return PDF_NameDecode(CFX_ByteStringC(orig)); | |
} | |
CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) | |
{ | |
FX_LPBYTE src_buf = (FX_LPBYTE)(FX_LPCSTR)orig; | |
int src_len = orig.GetLength(); | |
int dest_len = 0; | |
int i; | |
for (i = 0; i < src_len; i ++) { | |
FX_BYTE ch = src_buf[i]; | |
if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || | |
_PDF_CharType[ch] == 'D') { | |
dest_len += 3; | |
} else { | |
dest_len ++; | |
} | |
} | |
if (dest_len == src_len) { | |
return orig; | |
} | |
CFX_ByteString res; | |
FX_LPSTR dest_buf = res.GetBuffer(dest_len); | |
dest_len = 0; | |
for (i = 0; i < src_len; i ++) { | |
FX_BYTE ch = src_buf[i]; | |
if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || | |
_PDF_CharType[ch] == 'D') { | |
dest_buf[dest_len++] = '#'; | |
dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; | |
dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; | |
} else { | |
dest_buf[dest_len++] = ch; | |
} | |
} | |
dest_buf[dest_len] = 0; | |
res.ReleaseBuffer(); | |
return res; | |
} | |
CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj) | |
{ | |
if (pObj == NULL) { | |
buf << FX_BSTRC(" null"); | |
return buf; | |
} | |
switch (pObj->GetType()) { | |
case PDFOBJ_NULL: | |
buf << FX_BSTRC(" null"); | |
break; | |
case PDFOBJ_BOOLEAN: | |
case PDFOBJ_NUMBER: | |
buf << " " << pObj->GetString(); | |
break; | |
case PDFOBJ_STRING: { | |
CFX_ByteString str = pObj->GetString(); | |
FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex(); | |
buf << PDF_EncodeString(str, bHex); | |
break; | |
} | |
case PDFOBJ_NAME: { | |
CFX_ByteString str = pObj->GetString(); | |
buf << FX_BSTRC("/") << PDF_NameEncode(str); | |
break; | |
} | |
case PDFOBJ_REFERENCE: { | |
CPDF_Reference* p = (CPDF_Reference*)pObj; | |
buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R "); | |
break; | |
} | |
case PDFOBJ_ARRAY: { | |
CPDF_Array* p = (CPDF_Array*)pObj; | |
buf << FX_BSTRC("["); | |
for (FX_DWORD i = 0; i < p->GetCount(); i ++) { | |
CPDF_Object* pElement = p->GetElement(i); | |
if (pElement->GetObjNum()) { | |
buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R"); | |
} else { | |
buf << pElement; | |
} | |
} | |
buf << FX_BSTRC("]"); | |
break; | |
} | |
case PDFOBJ_DICTIONARY: { | |
CPDF_Dictionary* p = (CPDF_Dictionary*)pObj; | |
buf << FX_BSTRC("<<"); | |
FX_POSITION pos = p->GetStartPos(); | |
while (pos) { | |
CFX_ByteString key; | |
CPDF_Object* pValue = p->GetNextElement(pos, key); | |
buf << FX_BSTRC("/") << PDF_NameEncode(key); | |
if (pValue->GetObjNum()) { | |
buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R "); | |
} else { | |
buf << pValue; | |
} | |
} | |
buf << FX_BSTRC(">>"); | |
break; | |
} | |
case PDFOBJ_STREAM: { | |
CPDF_Stream* p = (CPDF_Stream*)pObj; | |
buf << p->GetDict() << FX_BSTRC("stream\r\n"); | |
CPDF_StreamAcc acc; | |
acc.LoadAllData(p, TRUE); | |
buf.AppendBlock(acc.GetData(), acc.GetSize()); | |
buf << FX_BSTRC("\r\nendstream"); | |
break; | |
} | |
default: | |
ASSERT(FALSE); | |
break; | |
} | |
return buf; | |
} | |
FX_FLOAT PDF_ClipFloat(FX_FLOAT f) | |
{ | |
if (f < 0) { | |
return 0; | |
} | |
if (f > 1.0f) { | |
return 1.0f; | |
} | |
return f; | |
} | |
static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) | |
{ | |
CPDF_Array* pLimits = pNode->GetArray("Limits"); | |
if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) { | |
return NULL; | |
} | |
CPDF_Array* pNumbers = pNode->GetArray("Nums"); | |
if (pNumbers) { | |
FX_DWORD dwCount = pNumbers->GetCount() / 2; | |
for (FX_DWORD i = 0; i < dwCount; i ++) { | |
int index = pNumbers->GetInteger(i * 2); | |
if (num == index) { | |
return pNumbers->GetElementValue(i * 2 + 1); | |
} | |
if (index > num) { | |
break; | |
} | |
} | |
return NULL; | |
} | |
CPDF_Array* pKids = pNode->GetArray("Kids"); | |
if (pKids == NULL) { | |
return NULL; | |
} | |
for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) { | |
CPDF_Dictionary* pKid = pKids->GetDict(i); | |
if (pKid == NULL) { | |
continue; | |
} | |
CPDF_Object* pFound = SearchNumberNode(pKid, num); | |
if (pFound) { | |
return pFound; | |
} | |
} | |
return NULL; | |
} | |
CPDF_Object* CPDF_NumberTree::LookupValue(int num) | |
{ | |
return SearchNumberNode(m_pRoot, num); | |
} |