| // Copyright 2016 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "core/fpdfapi/page/cpdf_streamparser.h" |
| |
| #include <algorithm> |
| #include <memory> |
| #include <sstream> |
| #include <utility> |
| |
| #include "constants/stream_dict_common.h" |
| #include "core/fpdfapi/page/cpdf_docpagedata.h" |
| #include "core/fpdfapi/parser/cpdf_array.h" |
| #include "core/fpdfapi/parser/cpdf_boolean.h" |
| #include "core/fpdfapi/parser/cpdf_dictionary.h" |
| #include "core/fpdfapi/parser/cpdf_name.h" |
| #include "core/fpdfapi/parser/cpdf_null.h" |
| #include "core/fpdfapi/parser/cpdf_number.h" |
| #include "core/fpdfapi/parser/cpdf_stream.h" |
| #include "core/fpdfapi/parser/cpdf_string.h" |
| #include "core/fpdfapi/parser/fpdf_parser_decode.h" |
| #include "core/fpdfapi/parser/fpdf_parser_utility.h" |
| #include "core/fxcodec/fx_codec.h" |
| #include "core/fxcodec/jpeg/jpegmodule.h" |
| #include "core/fxcodec/scanlinedecoder.h" |
| #include "core/fxcrt/fx_extension.h" |
| #include "core/fxcrt/fx_memory_wrappers.h" |
| #include "core/fxcrt/fx_safe_types.h" |
| |
| namespace { |
| |
| const uint32_t kMaxNestedParsingLevel = 512; |
| const size_t kMaxStringLength = 32767; |
| |
| const char kTrue[] = "true"; |
| const char kFalse[] = "false"; |
| const char kNull[] = "null"; |
| |
| uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) { |
| if (!pDecoder) |
| return FX_INVALID_OFFSET; |
| |
| int ncomps = pDecoder->CountComps(); |
| int bpc = pDecoder->GetBPC(); |
| int width = pDecoder->GetWidth(); |
| int height = pDecoder->GetHeight(); |
| if (width <= 0 || height <= 0) |
| return FX_INVALID_OFFSET; |
| |
| FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, ncomps, width); |
| size *= height; |
| if (size.ValueOrDefault(0) == 0) |
| return FX_INVALID_OFFSET; |
| |
| for (int row = 0; row < height; ++row) { |
| if (!pDecoder->GetScanline(row)) |
| break; |
| } |
| return pDecoder->GetSrcOffset(); |
| } |
| |
| uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span, |
| int width, |
| int height, |
| const ByteString& decoder, |
| const CPDF_Dictionary* pParam, |
| uint32_t orig_size) { |
| // |decoder| should not be an abbreviation. |
| ASSERT(decoder != "A85"); |
| ASSERT(decoder != "AHx"); |
| ASSERT(decoder != "CCF"); |
| ASSERT(decoder != "DCT"); |
| ASSERT(decoder != "Fl"); |
| ASSERT(decoder != "LZW"); |
| ASSERT(decoder != "RL"); |
| |
| std::unique_ptr<uint8_t, FxFreeDeleter> ignored_result; |
| uint32_t ignored_size; |
| if (decoder == "FlateDecode") { |
| return FlateOrLZWDecode(false, src_span, pParam, orig_size, &ignored_result, |
| &ignored_size); |
| } |
| if (decoder == "LZWDecode") { |
| return FlateOrLZWDecode(true, src_span, pParam, 0, &ignored_result, |
| &ignored_size); |
| } |
| if (decoder == "DCTDecode") { |
| std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder( |
| src_span, width, height, 0, |
| !pParam || pParam->GetIntegerFor("ColorTransform", 1)); |
| return DecodeAllScanlines(std::move(pDecoder)); |
| } |
| if (decoder == "CCITTFaxDecode") { |
| std::unique_ptr<ScanlineDecoder> pDecoder = |
| CreateFaxDecoder(src_span, width, height, pParam); |
| return DecodeAllScanlines(std::move(pDecoder)); |
| } |
| |
| if (decoder == "ASCII85Decode") |
| return A85Decode(src_span, &ignored_result, &ignored_size); |
| if (decoder == "ASCIIHexDecode") |
| return HexDecode(src_span, &ignored_result, &ignored_size); |
| if (decoder == "RunLengthDecode") |
| return RunLengthDecode(src_span, &ignored_result, &ignored_size); |
| |
| return FX_INVALID_OFFSET; |
| } |
| |
| } // namespace |
| |
| CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span) |
| : m_pBuf(span) {} |
| |
| CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span, |
| const WeakPtr<ByteStringPool>& pPool) |
| : m_pPool(pPool), m_pBuf(span) {} |
| |
| CPDF_StreamParser::~CPDF_StreamParser() = default; |
| |
| RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream( |
| CPDF_Document* pDoc, |
| RetainPtr<CPDF_Dictionary> pDict, |
| const CPDF_Object* pCSObj) { |
| if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos])) |
| m_Pos++; |
| |
| if (m_Pos == m_pBuf.size()) |
| return nullptr; |
| |
| ByteString decoder; |
| const CPDF_Dictionary* pParam = nullptr; |
| CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter"); |
| if (pFilter) { |
| const CPDF_Array* pArray = pFilter->AsArray(); |
| if (pArray) { |
| decoder = pArray->GetStringAt(0); |
| const CPDF_Array* pParams = |
| pDict->GetArrayFor(pdfium::stream::kDecodeParms); |
| if (pParams) |
| pParam = pParams->GetDictAt(0); |
| } else { |
| decoder = pFilter->GetString(); |
| pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms); |
| } |
| } |
| uint32_t width = pDict->GetIntegerFor("Width"); |
| uint32_t height = pDict->GetIntegerFor("Height"); |
| uint32_t bpc = 1; |
| uint32_t nComponents = 1; |
| if (pCSObj) { |
| RetainPtr<CPDF_ColorSpace> pCS = |
| CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr); |
| nComponents = pCS ? pCS->CountComponents() : 3; |
| bpc = pDict->GetIntegerFor("BitsPerComponent"); |
| } |
| FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, nComponents, width); |
| size *= height; |
| if (!size.IsValid()) |
| return nullptr; |
| |
| uint32_t dwOrigSize = size.ValueOrDie(); |
| std::unique_ptr<uint8_t, FxFreeDeleter> pData; |
| uint32_t dwStreamSize; |
| if (decoder.IsEmpty()) { |
| dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos); |
| pData.reset(FX_AllocUninit(uint8_t, dwOrigSize)); |
| auto copy_span = m_pBuf.subspan(m_Pos, dwOrigSize); |
| memcpy(pData.get(), copy_span.data(), copy_span.size()); |
| dwStreamSize = dwOrigSize; |
| m_Pos += dwOrigSize; |
| } else { |
| dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height, |
| decoder, pParam, dwOrigSize); |
| if (!pdfium::base::IsValueInRangeForNumericType<int>(dwStreamSize)) |
| return nullptr; |
| |
| uint32_t dwSavePos = m_Pos; |
| m_Pos += dwStreamSize; |
| while (1) { |
| uint32_t dwPrevPos = m_Pos; |
| CPDF_StreamParser::SyntaxType type = ParseNextElement(); |
| if (type == CPDF_StreamParser::EndOfData) |
| break; |
| |
| if (type != CPDF_StreamParser::Keyword) { |
| dwStreamSize += m_Pos - dwPrevPos; |
| continue; |
| } |
| if (GetWord() == "EI") { |
| m_Pos = dwPrevPos; |
| break; |
| } |
| dwStreamSize += m_Pos - dwPrevPos; |
| } |
| m_Pos = dwSavePos; |
| pData.reset(FX_AllocUninit(uint8_t, dwStreamSize)); |
| auto copy_span = m_pBuf.subspan(m_Pos, dwStreamSize); |
| memcpy(pData.get(), copy_span.data(), copy_span.size()); |
| m_Pos += dwStreamSize; |
| } |
| pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize)); |
| return pdfium::MakeRetain<CPDF_Stream>(std::move(pData), dwStreamSize, |
| std::move(pDict)); |
| } |
| |
| CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { |
| m_pLastObj.Reset(); |
| m_WordSize = 0; |
| if (!PositionIsInBounds()) |
| return EndOfData; |
| |
| uint8_t ch = m_pBuf[m_Pos++]; |
| while (1) { |
| while (PDFCharIsWhitespace(ch)) { |
| if (!PositionIsInBounds()) |
| return EndOfData; |
| |
| ch = m_pBuf[m_Pos++]; |
| } |
| |
| if (ch != '%') |
| break; |
| |
| while (1) { |
| if (!PositionIsInBounds()) |
| return EndOfData; |
| |
| ch = m_pBuf[m_Pos++]; |
| if (PDFCharIsLineEnding(ch)) |
| break; |
| } |
| } |
| |
| if (PDFCharIsDelimiter(ch) && ch != '/') { |
| m_Pos--; |
| m_pLastObj = ReadNextObject(false, false, 0); |
| return Others; |
| } |
| |
| bool bIsNumber = true; |
| while (1) { |
| if (m_WordSize < kMaxWordLength) |
| m_WordBuffer[m_WordSize++] = ch; |
| |
| if (!PDFCharIsNumeric(ch)) |
| bIsNumber = false; |
| |
| if (!PositionIsInBounds()) |
| break; |
| |
| ch = m_pBuf[m_Pos++]; |
| |
| if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { |
| m_Pos--; |
| break; |
| } |
| } |
| |
| m_WordBuffer[m_WordSize] = 0; |
| if (bIsNumber) |
| return Number; |
| |
| if (m_WordBuffer[0] == '/') |
| return Name; |
| |
| if (m_WordSize == 4) { |
| if (WordBufferMatches(kTrue)) { |
| m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true); |
| return Others; |
| } |
| if (WordBufferMatches(kNull)) { |
| m_pLastObj = pdfium::MakeRetain<CPDF_Null>(); |
| return Others; |
| } |
| } else if (m_WordSize == 5) { |
| if (WordBufferMatches(kFalse)) { |
| m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false); |
| return Others; |
| } |
| } |
| return Keyword; |
| } |
| |
| RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject( |
| bool bAllowNestedArray, |
| bool bInArray, |
| uint32_t dwRecursionLevel) { |
| bool bIsNumber; |
| // Must get the next word before returning to avoid infinite loops. |
| GetNextWord(bIsNumber); |
| if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel) |
| return nullptr; |
| |
| if (bIsNumber) { |
| m_WordBuffer[m_WordSize] = 0; |
| return pdfium::MakeRetain<CPDF_Number>( |
| ByteStringView(m_WordBuffer, m_WordSize)); |
| } |
| |
| int first_char = m_WordBuffer[0]; |
| if (first_char == '/') { |
| ByteString name = |
| PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)); |
| return pdfium::MakeRetain<CPDF_Name>(m_pPool, name); |
| } |
| |
| if (first_char == '(') { |
| ByteString str = ReadString(); |
| return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false); |
| } |
| |
| if (first_char == '<') { |
| if (m_WordSize == 1) |
| return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(), true); |
| |
| auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool); |
| while (1) { |
| GetNextWord(bIsNumber); |
| if (m_WordSize == 2 && m_WordBuffer[0] == '>') |
| break; |
| |
| if (!m_WordSize || m_WordBuffer[0] != '/') |
| return nullptr; |
| |
| ByteString key = |
| PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)); |
| RetainPtr<CPDF_Object> pObj = |
| ReadNextObject(true, bInArray, dwRecursionLevel + 1); |
| if (!pObj) |
| return nullptr; |
| |
| if (!key.IsEmpty()) |
| pDict->SetFor(key, std::move(pObj)); |
| } |
| return pDict; |
| } |
| |
| if (first_char == '[') { |
| if ((!bAllowNestedArray && bInArray)) |
| return nullptr; |
| |
| auto pArray = pdfium::MakeRetain<CPDF_Array>(); |
| while (1) { |
| RetainPtr<CPDF_Object> pObj = |
| ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1); |
| if (pObj) { |
| pArray->Append(std::move(pObj)); |
| continue; |
| } |
| if (!m_WordSize || m_WordBuffer[0] == ']') |
| break; |
| } |
| return pArray; |
| } |
| |
| if (WordBufferMatches(kFalse)) |
| return pdfium::MakeRetain<CPDF_Boolean>(false); |
| if (WordBufferMatches(kTrue)) |
| return pdfium::MakeRetain<CPDF_Boolean>(true); |
| if (WordBufferMatches(kNull)) |
| return pdfium::MakeRetain<CPDF_Null>(); |
| return nullptr; |
| } |
| |
| // TODO(npm): the following methods are almost identical in cpdf_syntaxparser |
| void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { |
| m_WordSize = 0; |
| bIsNumber = true; |
| if (!PositionIsInBounds()) |
| return; |
| |
| uint8_t ch = m_pBuf[m_Pos++]; |
| while (1) { |
| while (PDFCharIsWhitespace(ch)) { |
| if (!PositionIsInBounds()) { |
| return; |
| } |
| ch = m_pBuf[m_Pos++]; |
| } |
| |
| if (ch != '%') |
| break; |
| |
| while (1) { |
| if (!PositionIsInBounds()) |
| return; |
| ch = m_pBuf[m_Pos++]; |
| if (PDFCharIsLineEnding(ch)) |
| break; |
| } |
| } |
| |
| if (PDFCharIsDelimiter(ch)) { |
| bIsNumber = false; |
| m_WordBuffer[m_WordSize++] = ch; |
| if (ch == '/') { |
| while (1) { |
| if (!PositionIsInBounds()) |
| return; |
| ch = m_pBuf[m_Pos++]; |
| if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { |
| m_Pos--; |
| return; |
| } |
| if (m_WordSize < kMaxWordLength) |
| m_WordBuffer[m_WordSize++] = ch; |
| } |
| } else if (ch == '<') { |
| if (!PositionIsInBounds()) |
| return; |
| ch = m_pBuf[m_Pos++]; |
| if (ch == '<') |
| m_WordBuffer[m_WordSize++] = ch; |
| else |
| m_Pos--; |
| } else if (ch == '>') { |
| if (!PositionIsInBounds()) |
| return; |
| ch = m_pBuf[m_Pos++]; |
| if (ch == '>') |
| m_WordBuffer[m_WordSize++] = ch; |
| else |
| m_Pos--; |
| } |
| return; |
| } |
| |
| while (1) { |
| if (m_WordSize < kMaxWordLength) |
| m_WordBuffer[m_WordSize++] = ch; |
| if (!PDFCharIsNumeric(ch)) |
| bIsNumber = false; |
| if (!PositionIsInBounds()) |
| return; |
| |
| ch = m_pBuf[m_Pos++]; |
| if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { |
| m_Pos--; |
| break; |
| } |
| } |
| } |
| |
| ByteString CPDF_StreamParser::ReadString() { |
| if (!PositionIsInBounds()) |
| return ByteString(); |
| |
| uint8_t ch = m_pBuf[m_Pos++]; |
| std::ostringstream buf; |
| int parlevel = 0; |
| int status = 0; |
| int iEscCode = 0; |
| while (1) { |
| switch (status) { |
| case 0: |
| if (ch == ')') { |
| if (parlevel == 0) { |
| if (buf.tellp() <= 0) |
| return ByteString(); |
| |
| return ByteString( |
| buf.str().c_str(), |
| std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); |
| } |
| parlevel--; |
| buf << ')'; |
| } else if (ch == '(') { |
| parlevel++; |
| buf << '('; |
| } else if (ch == '\\') { |
| status = 1; |
| } else { |
| buf << static_cast<char>(ch); |
| } |
| break; |
| case 1: |
| if (FXSYS_IsOctalDigit(ch)) { |
| iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch)); |
| status = 2; |
| break; |
| } |
| if (ch == '\r') { |
| status = 4; |
| break; |
| } |
| if (ch == '\n') { |
| // Do nothing. |
| } else if (ch == 'n') { |
| buf << '\n'; |
| } else if (ch == 'r') { |
| buf << '\r'; |
| } else if (ch == 't') { |
| buf << '\t'; |
| } else if (ch == 'b') { |
| buf << '\b'; |
| } else if (ch == 'f') { |
| buf << '\f'; |
| } else { |
| buf << static_cast<char>(ch); |
| } |
| status = 0; |
| break; |
| case 2: |
| if (FXSYS_IsOctalDigit(ch)) { |
| iEscCode = |
| iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch)); |
| status = 3; |
| } else { |
| buf << static_cast<char>(iEscCode); |
| status = 0; |
| continue; |
| } |
| break; |
| case 3: |
| if (FXSYS_IsOctalDigit(ch)) { |
| iEscCode = |
| iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch)); |
| buf << static_cast<char>(iEscCode); |
| status = 0; |
| } else { |
| buf << static_cast<char>(iEscCode); |
| status = 0; |
| continue; |
| } |
| break; |
| case 4: |
| status = 0; |
| if (ch != '\n') |
| continue; |
| break; |
| } |
| if (!PositionIsInBounds()) |
| break; |
| |
| ch = m_pBuf[m_Pos++]; |
| } |
| if (PositionIsInBounds()) |
| ++m_Pos; |
| |
| if (buf.tellp() <= 0) |
| return ByteString(); |
| |
| return ByteString( |
| buf.str().c_str(), |
| std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); |
| } |
| |
| ByteString CPDF_StreamParser::ReadHexString() { |
| if (!PositionIsInBounds()) |
| return ByteString(); |
| |
| std::ostringstream buf; |
| bool bFirst = true; |
| int code = 0; |
| while (PositionIsInBounds()) { |
| uint8_t ch = m_pBuf[m_Pos++]; |
| if (ch == '>') |
| break; |
| |
| if (!std::isxdigit(ch)) |
| continue; |
| |
| int val = FXSYS_HexCharToInt(ch); |
| if (bFirst) { |
| code = val * 16; |
| } else { |
| code += val; |
| buf << static_cast<uint8_t>(code); |
| } |
| bFirst = !bFirst; |
| } |
| if (!bFirst) |
| buf << static_cast<char>(code); |
| |
| if (buf.tellp() <= 0) |
| return ByteString(); |
| |
| return ByteString( |
| buf.str().c_str(), |
| std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); |
| } |
| |
| bool CPDF_StreamParser::PositionIsInBounds() const { |
| return m_Pos < m_pBuf.size(); |
| } |
| |
| bool CPDF_StreamParser::WordBufferMatches(const char* pWord) const { |
| const size_t iLength = strlen(pWord); |
| return m_WordSize == iLength && memcmp(m_WordBuffer, pWord, iLength) == 0; |
| } |