blob: 6b4a3622575c5888108994082fb5c308b6a39309 [file] [log] [blame]
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/page/pageint.h"
#include <limits.h>
#include <utility>
#include "core/fpdfapi/cpdf_modulemgr.h"
#include "core/fpdfapi/page/cpdf_docpagedata.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_boolean.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_null.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_decode.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcodec/fx_codec.h"
#include "core/fxcrt/fx_ext.h"
CCodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(
const uint8_t* src_buf,
uint32_t src_size,
int width,
int height,
const CPDF_Dictionary* pParams);
namespace {
const uint32_t kMaxNestedArrayLevel = 512;
const uint32_t kMaxWordBuffer = 256;
const FX_STRSIZE kMaxStringLength = 32767;
uint32_t DecodeAllScanlines(CCodec_ScanlineDecoder* pDecoder,
uint8_t*& dest_buf,
uint32_t& dest_size) {
if (!pDecoder)
return FX_INVALID_OFFSET;
int ncomps = pDecoder->CountComps();
int bpc = pDecoder->GetBPC();
int width = pDecoder->GetWidth();
int height = pDecoder->GetHeight();
int pitch = (width * ncomps * bpc + 7) / 8;
if (height == 0 || pitch > (1 << 30) / height) {
delete pDecoder;
return FX_INVALID_OFFSET;
}
dest_buf = FX_Alloc2D(uint8_t, pitch, height);
dest_size = pitch * height; // Safe since checked alloc returned.
for (int row = 0; row < height; row++) {
const uint8_t* pLine = pDecoder->GetScanline(row);
if (!pLine)
break;
FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
}
uint32_t srcoff = pDecoder->GetSrcOffset();
delete pDecoder;
return srcoff;
}
uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf,
uint32_t limit,
int width,
int height,
CFX_ByteString& decoder,
CPDF_Dictionary* pParam,
uint8_t*& dest_buf,
uint32_t& dest_size) {
if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
CCodec_ScanlineDecoder* pDecoder =
FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
return DecodeAllScanlines(pDecoder, dest_buf, dest_size);
}
if (decoder == "ASCII85Decode" || decoder == "A85")
return A85Decode(src_buf, limit, dest_buf, dest_size);
if (decoder == "ASCIIHexDecode" || decoder == "AHx")
return HexDecode(src_buf, limit, dest_buf, dest_size);
if (decoder == "FlateDecode" || decoder == "Fl") {
return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size,
dest_buf, dest_size);
}
if (decoder == "LZWDecode" || decoder == "LZW") {
return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
dest_size);
}
if (decoder == "DCTDecode" || decoder == "DCT") {
CCodec_ScanlineDecoder* pDecoder =
CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
src_buf, limit, width, height, 0,
!pParam || pParam->GetIntegerFor("ColorTransform", 1));
return DecodeAllScanlines(pDecoder, dest_buf, dest_size);
}
if (decoder == "RunLengthDecode" || decoder == "RL")
return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
dest_size = 0;
dest_buf = 0;
return (uint32_t)-1;
}
} // namespace
CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
: m_pBuf(pData),
m_Size(dwSize),
m_Pos(0),
m_pLastObj(nullptr),
m_pPool(nullptr) {}
CPDF_StreamParser::CPDF_StreamParser(
const uint8_t* pData,
uint32_t dwSize,
const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
: m_pBuf(pData),
m_Size(dwSize),
m_Pos(0),
m_pLastObj(nullptr),
m_pPool(pPool) {}
CPDF_StreamParser::~CPDF_StreamParser() {
delete m_pLastObj;
}
CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc,
CPDF_Dictionary* pDict,
CPDF_Object* pCSObj) {
if (m_Pos == m_Size)
return nullptr;
if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
m_Pos++;
CFX_ByteString Decoder;
CPDF_Dictionary* pParam = nullptr;
CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
if (pFilter) {
if (CPDF_Array* pArray = pFilter->AsArray()) {
Decoder = pArray->GetStringAt(0);
CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
if (pParams)
pParam = pParams->GetDictAt(0);
} else {
Decoder = pFilter->GetString();
pParam = pDict->GetDictFor("DecodeParms");
}
}
uint32_t width = pDict->GetIntegerFor("Width");
uint32_t height = pDict->GetIntegerFor("Height");
uint32_t OrigSize = 0;
if (pCSObj) {
uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
uint32_t nComponents = 1;
CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
if (pCS) {
nComponents = pCS->CountComponents();
pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
} else {
nComponents = 3;
}
uint32_t pitch = width;
if (bpc && pitch > INT_MAX / bpc)
return nullptr;
pitch *= bpc;
if (nComponents && pitch > INT_MAX / nComponents)
return nullptr;
pitch *= nComponents;
if (pitch > INT_MAX - 7)
return nullptr;
pitch += 7;
pitch /= 8;
OrigSize = pitch;
} else {
if (width > INT_MAX - 7)
return nullptr;
OrigSize = ((width + 7) / 8);
}
if (height && OrigSize > INT_MAX / height)
return nullptr;
OrigSize *= height;
uint8_t* pData = nullptr;
uint32_t dwStreamSize;
if (Decoder.IsEmpty()) {
if (OrigSize > m_Size - m_Pos)
OrigSize = m_Size - m_Pos;
pData = FX_Alloc(uint8_t, OrigSize);
FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize);
dwStreamSize = OrigSize;
m_Pos += OrigSize;
} else {
uint32_t dwDestSize = OrigSize;
dwStreamSize =
PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
Decoder, pParam, pData, dwDestSize);
FX_Free(pData);
if (static_cast<int>(dwStreamSize) < 0)
return nullptr;
uint32_t dwSavePos = m_Pos;
m_Pos += dwStreamSize;
while (1) {
uint32_t dwPrevPos = m_Pos;
CPDF_StreamParser::SyntaxType type = ParseNextElement();
if (type == CPDF_StreamParser::EndOfData)
break;
if (type != CPDF_StreamParser::Keyword) {
dwStreamSize += m_Pos - dwPrevPos;
continue;
}
if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
GetWordBuf()[1] == 'I') {
m_Pos = dwPrevPos;
break;
}
dwStreamSize += m_Pos - dwPrevPos;
}
m_Pos = dwSavePos;
pData = FX_Alloc(uint8_t, dwStreamSize);
FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize);
m_Pos += dwStreamSize;
}
pDict->SetNewFor<CPDF_Number>("Length", (int)dwStreamSize);
return new CPDF_Stream(pData, dwStreamSize, pDict);
}
CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
delete m_pLastObj;
m_pLastObj = nullptr;
m_WordSize = 0;
bool bIsNumber = true;
if (!PositionIsInBounds())
return EndOfData;
int ch = m_pBuf[m_Pos++];
while (1) {
while (PDFCharIsWhitespace(ch)) {
if (!PositionIsInBounds())
return EndOfData;
ch = m_pBuf[m_Pos++];
}
if (ch != '%')
break;
while (1) {
if (!PositionIsInBounds())
return EndOfData;
ch = m_pBuf[m_Pos++];
if (PDFCharIsLineEnding(ch))
break;
}
}
if (PDFCharIsDelimiter(ch) && ch != '/') {
m_Pos--;
m_pLastObj = ReadNextObject(false, 0);
return Others;
}
while (1) {
if (m_WordSize < kMaxWordBuffer)
m_WordBuffer[m_WordSize++] = ch;
if (!PDFCharIsNumeric(ch))
bIsNumber = false;
if (!PositionIsInBounds())
break;
ch = m_pBuf[m_Pos++];
if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
m_WordBuffer[m_WordSize] = 0;
if (bIsNumber)
return Number;
if (m_WordBuffer[0] == '/')
return Name;
if (m_WordSize == 4) {
if (memcmp(m_WordBuffer, "true", 4) == 0) {
m_pLastObj = new CPDF_Boolean(true);
return Others;
}
if (memcmp(m_WordBuffer, "null", 4) == 0) {
m_pLastObj = new CPDF_Null;
return Others;
}
} else if (m_WordSize == 5) {
if (memcmp(m_WordBuffer, "false", 5) == 0) {
m_pLastObj = new CPDF_Boolean(false);
return Others;
}
}
return Keyword;
}
CPDF_Object* CPDF_StreamParser::GetObject() {
CPDF_Object* pObj = m_pLastObj;
m_pLastObj = nullptr;
return pObj;
}
CPDF_Object* CPDF_StreamParser::ReadNextObject(bool bAllowNestedArray,
uint32_t dwInArrayLevel) {
bool bIsNumber;
GetNextWord(bIsNumber);
if (!m_WordSize)
return nullptr;
if (bIsNumber) {
m_WordBuffer[m_WordSize] = 0;
return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize));
}
int first_char = m_WordBuffer[0];
if (first_char == '/') {
CFX_ByteString name =
PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
return new CPDF_Name(m_pPool, name);
}
if (first_char == '(') {
CFX_ByteString str = ReadString();
return new CPDF_String(m_pPool, str, false);
}
if (first_char == '<') {
if (m_WordSize == 1)
return new CPDF_String(m_pPool, ReadHexString(), true);
CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pPool);
while (1) {
GetNextWord(bIsNumber);
if (m_WordSize == 2 && m_WordBuffer[0] == '>')
break;
if (!m_WordSize || m_WordBuffer[0] != '/') {
delete pDict;
return nullptr;
}
CFX_ByteString key =
PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
auto pObj = pdfium::WrapUnique(ReadNextObject(true, 0));
if (!pObj) {
delete pDict;
return nullptr;
}
if (!key.IsEmpty())
pDict->SetFor(key, std::move(pObj));
}
return pDict;
}
if (first_char == '[') {
if ((!bAllowNestedArray && dwInArrayLevel) ||
dwInArrayLevel > kMaxNestedArrayLevel) {
return nullptr;
}
CPDF_Array* pArray = new CPDF_Array;
while (1) {
CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, dwInArrayLevel + 1);
if (pObj) {
pArray->Add(pdfium::WrapUnique(pObj));
continue;
}
if (!m_WordSize || m_WordBuffer[0] == ']')
break;
}
return pArray;
}
if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
return new CPDF_Boolean(false);
if (m_WordSize == 4) {
if (memcmp(m_WordBuffer, "true", 4) == 0)
return new CPDF_Boolean(true);
if (memcmp(m_WordBuffer, "null", 4) == 0)
return new CPDF_Null;
}
return nullptr;
}
// TODO(npm): the following methods are almost identical in cpdf_syntaxparser
void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
m_WordSize = 0;
bIsNumber = true;
if (!PositionIsInBounds())
return;
int ch = m_pBuf[m_Pos++];
while (1) {
while (PDFCharIsWhitespace(ch)) {
if (!PositionIsInBounds()) {
return;
}
ch = m_pBuf[m_Pos++];
}
if (ch != '%')
break;
while (1) {
if (!PositionIsInBounds())
return;
ch = m_pBuf[m_Pos++];
if (PDFCharIsLineEnding(ch))
break;
}
}
if (PDFCharIsDelimiter(ch)) {
bIsNumber = false;
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (1) {
if (!PositionIsInBounds())
return;
ch = m_pBuf[m_Pos++];
if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_Pos--;
return;
}
if (m_WordSize < kMaxWordBuffer)
m_WordBuffer[m_WordSize++] = ch;
}
} else if (ch == '<') {
if (!PositionIsInBounds())
return;
ch = m_pBuf[m_Pos++];
if (ch == '<')
m_WordBuffer[m_WordSize++] = ch;
else
m_Pos--;
} else if (ch == '>') {
if (!PositionIsInBounds())
return;
ch = m_pBuf[m_Pos++];
if (ch == '>')
m_WordBuffer[m_WordSize++] = ch;
else
m_Pos--;
}
return;
}
while (1) {
if (m_WordSize < kMaxWordBuffer)
m_WordBuffer[m_WordSize++] = ch;
if (!PDFCharIsNumeric(ch))
bIsNumber = false;
if (!PositionIsInBounds())
return;
ch = m_pBuf[m_Pos++];
if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
}
CFX_ByteString CPDF_StreamParser::ReadString() {
if (!PositionIsInBounds())
return CFX_ByteString();
uint8_t ch = m_pBuf[m_Pos++];
CFX_ByteTextBuf buf;
int parlevel = 0;
int status = 0;
int iEscCode = 0;
while (1) {
switch (status) {
case 0:
if (ch == ')') {
if (parlevel == 0) {
if (buf.GetLength() > kMaxStringLength) {
return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
}
return buf.MakeString();
}
parlevel--;
buf.AppendChar(')');
} else if (ch == '(') {
parlevel++;
buf.AppendChar('(');
} else if (ch == '\\') {
status = 1;
} else {
buf.AppendChar((char)ch);
}
break;
case 1:
if (ch >= '0' && ch <= '7') {
iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
status = 2;
break;
}
if (ch == 'n') {
buf.AppendChar('\n');
} else if (ch == 'r') {
buf.AppendChar('\r');
} else if (ch == 't') {
buf.AppendChar('\t');
} else if (ch == 'b') {
buf.AppendChar('\b');
} else if (ch == 'f') {
buf.AppendChar('\f');
} else if (ch == '\r') {
status = 4;
break;
} else if (ch == '\n') {
} else {
buf.AppendChar(ch);
}
status = 0;
break;
case 2:
if (ch >= '0' && ch <= '7') {
iEscCode =
iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
status = 3;
} else {
buf.AppendChar(iEscCode);
status = 0;
continue;
}
break;
case 3:
if (ch >= '0' && ch <= '7') {
iEscCode =
iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
buf.AppendChar(iEscCode);
status = 0;
} else {
buf.AppendChar(iEscCode);
status = 0;
continue;
}
break;
case 4:
status = 0;
if (ch != '\n') {
continue;
}
break;
}
if (!PositionIsInBounds())
break;
ch = m_pBuf[m_Pos++];
}
if (PositionIsInBounds())
++m_Pos;
if (buf.GetLength() > kMaxStringLength) {
return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
}
return buf.MakeString();
}
CFX_ByteString CPDF_StreamParser::ReadHexString() {
if (!PositionIsInBounds())
return CFX_ByteString();
CFX_ByteTextBuf buf;
bool bFirst = true;
int code = 0;
while (PositionIsInBounds()) {
int ch = m_pBuf[m_Pos++];
if (ch == '>')
break;
if (!std::isxdigit(ch))
continue;
int val = FXSYS_toHexDigit(ch);
if (bFirst) {
code = val * 16;
} else {
code += val;
buf.AppendByte((uint8_t)code);
}
bFirst = !bFirst;
}
if (!bFirst)
buf.AppendChar((char)code);
if (buf.GetLength() > kMaxStringLength)
return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
return buf.MakeString();
}
bool CPDF_StreamParser::PositionIsInBounds() const {
return m_Pos < m_Size;
}