blob: cc3d29c9ed8557b861bbb17cf7a82a8b15bfea52 [file] [log] [blame]
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "../../../include/fpdfapi/fpdf_parser.h"
extern const FX_LPCSTR _PDF_CharType =
"WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR"
"WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR"
"RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
"RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
"WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
"RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW";
#ifndef MAX_PATH
#define MAX_PATH 4096
#endif
CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize)
{
m_pData = pData;
m_dwSize = dwSize;
m_dwCurPos = 0;
}
CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str)
{
m_pData = str;
m_dwSize = str.GetLength();
m_dwCurPos = 0;
}
void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type)
{
pStart = NULL;
dwSize = 0;
type = PDFWORD_EOF;
FX_BYTE ch;
char chartype;
while (1) {
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
chartype = _PDF_CharType[ch];
while (chartype == 'W') {
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
chartype = _PDF_CharType[ch];
}
if (ch != '%') {
break;
}
while (1) {
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
if (ch == '\r' || ch == '\n') {
break;
}
}
chartype = _PDF_CharType[ch];
}
FX_DWORD start_pos = m_dwCurPos - 1;
pStart = m_pData + start_pos;
if (chartype == 'D') {
if (ch == '/') {
while (1) {
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
chartype = _PDF_CharType[ch];
if (chartype != 'R' && chartype != 'N') {
m_dwCurPos --;
dwSize = m_dwCurPos - start_pos;
type = PDFWORD_NAME;
return;
}
}
} else {
type = PDFWORD_DELIMITER;
dwSize = 1;
if (ch == '<') {
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
if (ch == '<') {
dwSize = 2;
} else {
m_dwCurPos --;
}
} else if (ch == '>') {
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
if (ch == '>') {
dwSize = 2;
} else {
m_dwCurPos --;
}
}
}
return;
}
type = PDFWORD_NUMBER;
dwSize = 1;
while (1) {
if (chartype != 'N') {
type = PDFWORD_TEXT;
}
if (m_dwSize <= m_dwCurPos) {
return;
}
ch = m_pData[m_dwCurPos++];
chartype = _PDF_CharType[ch];
if (chartype == 'D' || chartype == 'W') {
m_dwCurPos --;
break;
}
dwSize ++;
}
}
CFX_ByteStringC CPDF_SimpleParser::GetWord()
{
FX_LPCBYTE pStart;
FX_DWORD dwSize;
int type;
ParseWord(pStart, dwSize, type);
if (dwSize == 1 && pStart[0] == '<') {
while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
m_dwCurPos ++;
}
if (m_dwCurPos < m_dwSize) {
m_dwCurPos ++;
}
return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
} else if (dwSize == 1 && pStart[0] == '(') {
int level = 1;
while (m_dwCurPos < m_dwSize) {
if (m_pData[m_dwCurPos] == ')') {
level --;
if (level == 0) {
break;
}
}
if (m_pData[m_dwCurPos] == '\\') {
if (m_dwSize <= m_dwCurPos) {
break;
}
m_dwCurPos ++;
} else if (m_pData[m_dwCurPos] == '(') {
level ++;
}
if (m_dwSize <= m_dwCurPos) {
break;
}
m_dwCurPos ++;
}
if (m_dwCurPos < m_dwSize) {
m_dwCurPos ++;
}
return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
}
return CFX_ByteStringC(pStart, dwSize);
}
FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token)
{
int token_len = token.GetLength();
while (m_dwCurPos < m_dwSize - token_len) {
if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) {
break;
}
m_dwCurPos ++;
}
if (m_dwCurPos == m_dwSize - token_len) {
return FALSE;
}
m_dwCurPos += token_len;
return TRUE;
}
FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token)
{
while (1) {
CFX_ByteStringC word = GetWord();
if (word.IsEmpty()) {
return FALSE;
}
if (word == token) {
return TRUE;
}
}
return FALSE;
}
FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token,
FX_DWORD& start_pos, FX_DWORD& end_pos)
{
if (!start_token.IsEmpty()) {
if (!SkipWord(start_token)) {
return FALSE;
}
start_pos = m_dwCurPos;
}
while (1) {
end_pos = m_dwCurPos;
CFX_ByteStringC word = GetWord();
if (word.IsEmpty()) {
return FALSE;
}
if (word == end_token) {
return TRUE;
}
}
return FALSE;
}
FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams)
{
nParams ++;
FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
int buf_index = 0;
int buf_count = 0;
while (1) {
pBuf[buf_index++] = m_dwCurPos;
if (buf_index == nParams) {
buf_index = 0;
}
buf_count ++;
if (buf_count > nParams) {
buf_count = nParams;
}
CFX_ByteStringC word = GetWord();
if (word.IsEmpty()) {
FX_Free(pBuf);
return FALSE;
}
if (word == token) {
if (buf_count < nParams) {
continue;
}
m_dwCurPos = pBuf[buf_index];
FX_Free(pBuf);
return TRUE;
}
}
return FALSE;
}
static int _hex2dec(char ch)
{
if (ch >= '0' && ch <= '9') {
return ch - '0';
}
if (ch >= 'a' && ch <= 'f') {
return ch - 'a' + 10;
}
if (ch >= 'A' && ch <= 'F') {
return ch - 'A' + 10;
}
return 0;
}
CFX_ByteString PDF_NameDecode(FX_BSTR bstr)
{
int size = bstr.GetLength();
FX_LPCSTR pSrc = bstr.GetCStr();
if (FXSYS_memchr(pSrc, '#', size) == NULL) {
return bstr;
}
CFX_ByteString result;
FX_LPSTR pDestStart = result.GetBuffer(size);
FX_LPSTR pDest = pDestStart;
for (int i = 0; i < size; i ++) {
if (pSrc[i] == '#' && i < size - 2) {
*pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]);
i += 2;
} else {
*pDest ++ = pSrc[i];
}
}
result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
return result;
}
CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig)
{
if (FXSYS_memchr((FX_LPCSTR)orig, '#', orig.GetLength()) == NULL) {
return orig;
}
return PDF_NameDecode(CFX_ByteStringC(orig));
}
CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig)
{
FX_LPBYTE src_buf = (FX_LPBYTE)(FX_LPCSTR)orig;
int src_len = orig.GetLength();
int dest_len = 0;
int i;
for (i = 0; i < src_len; i ++) {
FX_BYTE ch = src_buf[i];
if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
_PDF_CharType[ch] == 'D') {
dest_len += 3;
} else {
dest_len ++;
}
}
if (dest_len == src_len) {
return orig;
}
CFX_ByteString res;
FX_LPSTR dest_buf = res.GetBuffer(dest_len);
dest_len = 0;
for (i = 0; i < src_len; i ++) {
FX_BYTE ch = src_buf[i];
if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
_PDF_CharType[ch] == 'D') {
dest_buf[dest_len++] = '#';
dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
} else {
dest_buf[dest_len++] = ch;
}
}
dest_buf[dest_len] = 0;
res.ReleaseBuffer();
return res;
}
CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj)
{
if (pObj == NULL) {
buf << FX_BSTRC(" null");
return buf;
}
switch (pObj->GetType()) {
case PDFOBJ_NULL:
buf << FX_BSTRC(" null");
break;
case PDFOBJ_BOOLEAN:
case PDFOBJ_NUMBER:
buf << " " << pObj->GetString();
break;
case PDFOBJ_STRING: {
CFX_ByteString str = pObj->GetString();
FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex();
buf << PDF_EncodeString(str, bHex);
break;
}
case PDFOBJ_NAME: {
CFX_ByteString str = pObj->GetString();
buf << FX_BSTRC("/") << PDF_NameEncode(str);
break;
}
case PDFOBJ_REFERENCE: {
CPDF_Reference* p = (CPDF_Reference*)pObj;
buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R ");
break;
}
case PDFOBJ_ARRAY: {
CPDF_Array* p = (CPDF_Array*)pObj;
buf << FX_BSTRC("[");
for (FX_DWORD i = 0; i < p->GetCount(); i ++) {
CPDF_Object* pElement = p->GetElement(i);
if (pElement->GetObjNum()) {
buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R");
} else {
buf << pElement;
}
}
buf << FX_BSTRC("]");
break;
}
case PDFOBJ_DICTIONARY: {
CPDF_Dictionary* p = (CPDF_Dictionary*)pObj;
buf << FX_BSTRC("<<");
FX_POSITION pos = p->GetStartPos();
while (pos) {
CFX_ByteString key;
CPDF_Object* pValue = p->GetNextElement(pos, key);
buf << FX_BSTRC("/") << PDF_NameEncode(key);
if (pValue->GetObjNum()) {
buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R ");
} else {
buf << pValue;
}
}
buf << FX_BSTRC(">>");
break;
}
case PDFOBJ_STREAM: {
CPDF_Stream* p = (CPDF_Stream*)pObj;
buf << p->GetDict() << FX_BSTRC("stream\r\n");
CPDF_StreamAcc acc;
acc.LoadAllData(p, TRUE);
buf.AppendBlock(acc.GetData(), acc.GetSize());
buf << FX_BSTRC("\r\nendstream");
break;
}
default:
ASSERT(FALSE);
break;
}
return buf;
}
FX_FLOAT PDF_ClipFloat(FX_FLOAT f)
{
if (f < 0) {
return 0;
}
if (f > 1.0f) {
return 1.0f;
}
return f;
}
static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num)
{
CPDF_Array* pLimits = pNode->GetArray("Limits");
if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) {
return NULL;
}
CPDF_Array* pNumbers = pNode->GetArray("Nums");
if (pNumbers) {
FX_DWORD dwCount = pNumbers->GetCount() / 2;
for (FX_DWORD i = 0; i < dwCount; i ++) {
int index = pNumbers->GetInteger(i * 2);
if (num == index) {
return pNumbers->GetElementValue(i * 2 + 1);
}
if (index > num) {
break;
}
}
return NULL;
}
CPDF_Array* pKids = pNode->GetArray("Kids");
if (pKids == NULL) {
return NULL;
}
for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) {
CPDF_Dictionary* pKid = pKids->GetDict(i);
if (pKid == NULL) {
continue;
}
CPDF_Object* pFound = SearchNumberNode(pKid, num);
if (pFound) {
return pFound;
}
}
return NULL;
}
CPDF_Object* CPDF_NumberTree::LookupValue(int num)
{
return SearchNumberNode(m_pRoot, num);
}