| // Copyright 2014 PDFium Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style license that can be | |
| // found in the LICENSE file. | |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| #include "../../../include/fpdfapi/fpdf_parser.h" | |
| extern const FX_LPCSTR _PDF_CharType = | |
| "WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR" | |
| "WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR" | |
| "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" | |
| "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" | |
| "WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" | |
| "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" | |
| "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" | |
| "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW"; | |
| #ifndef MAX_PATH | |
| #define MAX_PATH 4096 | |
| #endif | |
| CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize) | |
| { | |
| m_pData = pData; | |
| m_dwSize = dwSize; | |
| m_dwCurPos = 0; | |
| } | |
| CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str) | |
| { | |
| m_pData = str; | |
| m_dwSize = str.GetLength(); | |
| m_dwCurPos = 0; | |
| } | |
| void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type) | |
| { | |
| pStart = NULL; | |
| dwSize = 0; | |
| type = PDFWORD_EOF; | |
| FX_BYTE ch; | |
| char chartype; | |
| while (1) { | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| chartype = _PDF_CharType[ch]; | |
| while (chartype == 'W') { | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| chartype = _PDF_CharType[ch]; | |
| } | |
| if (ch != '%') { | |
| break; | |
| } | |
| while (1) { | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| if (ch == '\r' || ch == '\n') { | |
| break; | |
| } | |
| } | |
| chartype = _PDF_CharType[ch]; | |
| } | |
| FX_DWORD start_pos = m_dwCurPos - 1; | |
| pStart = m_pData + start_pos; | |
| if (chartype == 'D') { | |
| if (ch == '/') { | |
| while (1) { | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| chartype = _PDF_CharType[ch]; | |
| if (chartype != 'R' && chartype != 'N') { | |
| m_dwCurPos --; | |
| dwSize = m_dwCurPos - start_pos; | |
| type = PDFWORD_NAME; | |
| return; | |
| } | |
| } | |
| } else { | |
| type = PDFWORD_DELIMITER; | |
| dwSize = 1; | |
| if (ch == '<') { | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| if (ch == '<') { | |
| dwSize = 2; | |
| } else { | |
| m_dwCurPos --; | |
| } | |
| } else if (ch == '>') { | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| if (ch == '>') { | |
| dwSize = 2; | |
| } else { | |
| m_dwCurPos --; | |
| } | |
| } | |
| } | |
| return; | |
| } | |
| type = PDFWORD_NUMBER; | |
| dwSize = 1; | |
| while (1) { | |
| if (chartype != 'N') { | |
| type = PDFWORD_TEXT; | |
| } | |
| if (m_dwSize <= m_dwCurPos) { | |
| return; | |
| } | |
| ch = m_pData[m_dwCurPos++]; | |
| chartype = _PDF_CharType[ch]; | |
| if (chartype == 'D' || chartype == 'W') { | |
| m_dwCurPos --; | |
| break; | |
| } | |
| dwSize ++; | |
| } | |
| } | |
| CFX_ByteStringC CPDF_SimpleParser::GetWord() | |
| { | |
| FX_LPCBYTE pStart; | |
| FX_DWORD dwSize; | |
| int type; | |
| ParseWord(pStart, dwSize, type); | |
| if (dwSize == 1 && pStart[0] == '<') { | |
| while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { | |
| m_dwCurPos ++; | |
| } | |
| if (m_dwCurPos < m_dwSize) { | |
| m_dwCurPos ++; | |
| } | |
| return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
| } else if (dwSize == 1 && pStart[0] == '(') { | |
| int level = 1; | |
| while (m_dwCurPos < m_dwSize) { | |
| if (m_pData[m_dwCurPos] == ')') { | |
| level --; | |
| if (level == 0) { | |
| break; | |
| } | |
| } | |
| if (m_pData[m_dwCurPos] == '\\') { | |
| if (m_dwSize <= m_dwCurPos) { | |
| break; | |
| } | |
| m_dwCurPos ++; | |
| } else if (m_pData[m_dwCurPos] == '(') { | |
| level ++; | |
| } | |
| if (m_dwSize <= m_dwCurPos) { | |
| break; | |
| } | |
| m_dwCurPos ++; | |
| } | |
| if (m_dwCurPos < m_dwSize) { | |
| m_dwCurPos ++; | |
| } | |
| return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
| } | |
| return CFX_ByteStringC(pStart, dwSize); | |
| } | |
| FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token) | |
| { | |
| int token_len = token.GetLength(); | |
| while (m_dwCurPos < m_dwSize - token_len) { | |
| if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) { | |
| break; | |
| } | |
| m_dwCurPos ++; | |
| } | |
| if (m_dwCurPos == m_dwSize - token_len) { | |
| return FALSE; | |
| } | |
| m_dwCurPos += token_len; | |
| return TRUE; | |
| } | |
| FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token) | |
| { | |
| while (1) { | |
| CFX_ByteStringC word = GetWord(); | |
| if (word.IsEmpty()) { | |
| return FALSE; | |
| } | |
| if (word == token) { | |
| return TRUE; | |
| } | |
| } | |
| return FALSE; | |
| } | |
| FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token, | |
| FX_DWORD& start_pos, FX_DWORD& end_pos) | |
| { | |
| if (!start_token.IsEmpty()) { | |
| if (!SkipWord(start_token)) { | |
| return FALSE; | |
| } | |
| start_pos = m_dwCurPos; | |
| } | |
| while (1) { | |
| end_pos = m_dwCurPos; | |
| CFX_ByteStringC word = GetWord(); | |
| if (word.IsEmpty()) { | |
| return FALSE; | |
| } | |
| if (word == end_token) { | |
| return TRUE; | |
| } | |
| } | |
| return FALSE; | |
| } | |
| FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams) | |
| { | |
| nParams ++; | |
| FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); | |
| int buf_index = 0; | |
| int buf_count = 0; | |
| while (1) { | |
| pBuf[buf_index++] = m_dwCurPos; | |
| if (buf_index == nParams) { | |
| buf_index = 0; | |
| } | |
| buf_count ++; | |
| if (buf_count > nParams) { | |
| buf_count = nParams; | |
| } | |
| CFX_ByteStringC word = GetWord(); | |
| if (word.IsEmpty()) { | |
| FX_Free(pBuf); | |
| return FALSE; | |
| } | |
| if (word == token) { | |
| if (buf_count < nParams) { | |
| continue; | |
| } | |
| m_dwCurPos = pBuf[buf_index]; | |
| FX_Free(pBuf); | |
| return TRUE; | |
| } | |
| } | |
| return FALSE; | |
| } | |
| static int _hex2dec(char ch) | |
| { | |
| if (ch >= '0' && ch <= '9') { | |
| return ch - '0'; | |
| } | |
| if (ch >= 'a' && ch <= 'f') { | |
| return ch - 'a' + 10; | |
| } | |
| if (ch >= 'A' && ch <= 'F') { | |
| return ch - 'A' + 10; | |
| } | |
| return 0; | |
| } | |
| CFX_ByteString PDF_NameDecode(FX_BSTR bstr) | |
| { | |
| int size = bstr.GetLength(); | |
| FX_LPCSTR pSrc = bstr.GetCStr(); | |
| if (FXSYS_memchr(pSrc, '#', size) == NULL) { | |
| return bstr; | |
| } | |
| CFX_ByteString result; | |
| FX_LPSTR pDestStart = result.GetBuffer(size); | |
| FX_LPSTR pDest = pDestStart; | |
| for (int i = 0; i < size; i ++) { | |
| if (pSrc[i] == '#' && i < size - 2) { | |
| *pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]); | |
| i += 2; | |
| } else { | |
| *pDest ++ = pSrc[i]; | |
| } | |
| } | |
| result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); | |
| return result; | |
| } | |
| CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) | |
| { | |
| if (FXSYS_memchr((FX_LPCSTR)orig, '#', orig.GetLength()) == NULL) { | |
| return orig; | |
| } | |
| return PDF_NameDecode(CFX_ByteStringC(orig)); | |
| } | |
| CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) | |
| { | |
| FX_LPBYTE src_buf = (FX_LPBYTE)(FX_LPCSTR)orig; | |
| int src_len = orig.GetLength(); | |
| int dest_len = 0; | |
| int i; | |
| for (i = 0; i < src_len; i ++) { | |
| FX_BYTE ch = src_buf[i]; | |
| if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || | |
| _PDF_CharType[ch] == 'D') { | |
| dest_len += 3; | |
| } else { | |
| dest_len ++; | |
| } | |
| } | |
| if (dest_len == src_len) { | |
| return orig; | |
| } | |
| CFX_ByteString res; | |
| FX_LPSTR dest_buf = res.GetBuffer(dest_len); | |
| dest_len = 0; | |
| for (i = 0; i < src_len; i ++) { | |
| FX_BYTE ch = src_buf[i]; | |
| if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || | |
| _PDF_CharType[ch] == 'D') { | |
| dest_buf[dest_len++] = '#'; | |
| dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; | |
| dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; | |
| } else { | |
| dest_buf[dest_len++] = ch; | |
| } | |
| } | |
| dest_buf[dest_len] = 0; | |
| res.ReleaseBuffer(); | |
| return res; | |
| } | |
| CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj) | |
| { | |
| if (pObj == NULL) { | |
| buf << FX_BSTRC(" null"); | |
| return buf; | |
| } | |
| switch (pObj->GetType()) { | |
| case PDFOBJ_NULL: | |
| buf << FX_BSTRC(" null"); | |
| break; | |
| case PDFOBJ_BOOLEAN: | |
| case PDFOBJ_NUMBER: | |
| buf << " " << pObj->GetString(); | |
| break; | |
| case PDFOBJ_STRING: { | |
| CFX_ByteString str = pObj->GetString(); | |
| FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex(); | |
| buf << PDF_EncodeString(str, bHex); | |
| break; | |
| } | |
| case PDFOBJ_NAME: { | |
| CFX_ByteString str = pObj->GetString(); | |
| buf << FX_BSTRC("/") << PDF_NameEncode(str); | |
| break; | |
| } | |
| case PDFOBJ_REFERENCE: { | |
| CPDF_Reference* p = (CPDF_Reference*)pObj; | |
| buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R "); | |
| break; | |
| } | |
| case PDFOBJ_ARRAY: { | |
| CPDF_Array* p = (CPDF_Array*)pObj; | |
| buf << FX_BSTRC("["); | |
| for (FX_DWORD i = 0; i < p->GetCount(); i ++) { | |
| CPDF_Object* pElement = p->GetElement(i); | |
| if (pElement->GetObjNum()) { | |
| buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R"); | |
| } else { | |
| buf << pElement; | |
| } | |
| } | |
| buf << FX_BSTRC("]"); | |
| break; | |
| } | |
| case PDFOBJ_DICTIONARY: { | |
| CPDF_Dictionary* p = (CPDF_Dictionary*)pObj; | |
| buf << FX_BSTRC("<<"); | |
| FX_POSITION pos = p->GetStartPos(); | |
| while (pos) { | |
| CFX_ByteString key; | |
| CPDF_Object* pValue = p->GetNextElement(pos, key); | |
| buf << FX_BSTRC("/") << PDF_NameEncode(key); | |
| if (pValue->GetObjNum()) { | |
| buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R "); | |
| } else { | |
| buf << pValue; | |
| } | |
| } | |
| buf << FX_BSTRC(">>"); | |
| break; | |
| } | |
| case PDFOBJ_STREAM: { | |
| CPDF_Stream* p = (CPDF_Stream*)pObj; | |
| buf << p->GetDict() << FX_BSTRC("stream\r\n"); | |
| CPDF_StreamAcc acc; | |
| acc.LoadAllData(p, TRUE); | |
| buf.AppendBlock(acc.GetData(), acc.GetSize()); | |
| buf << FX_BSTRC("\r\nendstream"); | |
| break; | |
| } | |
| default: | |
| ASSERT(FALSE); | |
| break; | |
| } | |
| return buf; | |
| } | |
| FX_FLOAT PDF_ClipFloat(FX_FLOAT f) | |
| { | |
| if (f < 0) { | |
| return 0; | |
| } | |
| if (f > 1.0f) { | |
| return 1.0f; | |
| } | |
| return f; | |
| } | |
| static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) | |
| { | |
| CPDF_Array* pLimits = pNode->GetArray("Limits"); | |
| if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) { | |
| return NULL; | |
| } | |
| CPDF_Array* pNumbers = pNode->GetArray("Nums"); | |
| if (pNumbers) { | |
| FX_DWORD dwCount = pNumbers->GetCount() / 2; | |
| for (FX_DWORD i = 0; i < dwCount; i ++) { | |
| int index = pNumbers->GetInteger(i * 2); | |
| if (num == index) { | |
| return pNumbers->GetElementValue(i * 2 + 1); | |
| } | |
| if (index > num) { | |
| break; | |
| } | |
| } | |
| return NULL; | |
| } | |
| CPDF_Array* pKids = pNode->GetArray("Kids"); | |
| if (pKids == NULL) { | |
| return NULL; | |
| } | |
| for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) { | |
| CPDF_Dictionary* pKid = pKids->GetDict(i); | |
| if (pKid == NULL) { | |
| continue; | |
| } | |
| CPDF_Object* pFound = SearchNumberNode(pKid, num); | |
| if (pFound) { | |
| return pFound; | |
| } | |
| } | |
| return NULL; | |
| } | |
| CPDF_Object* CPDF_NumberTree::LookupValue(int num) | |
| { | |
| return SearchNumberNode(m_pRoot, num); | |
| } |