|  | // Copyright 2016 The PDFium Authors | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 
|  |  | 
|  | #include "core/fpdfapi/parser/cpdf_syntax_parser.h" | 
|  |  | 
|  | #include <ctype.h> | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <utility> | 
|  |  | 
|  | #include "core/fpdfapi/parser/cpdf_array.h" | 
|  | #include "core/fpdfapi/parser/cpdf_boolean.h" | 
|  | #include "core/fpdfapi/parser/cpdf_crypto_handler.h" | 
|  | #include "core/fpdfapi/parser/cpdf_dictionary.h" | 
|  | #include "core/fpdfapi/parser/cpdf_name.h" | 
|  | #include "core/fpdfapi/parser/cpdf_null.h" | 
|  | #include "core/fpdfapi/parser/cpdf_number.h" | 
|  | #include "core/fpdfapi/parser/cpdf_read_validator.h" | 
|  | #include "core/fpdfapi/parser/cpdf_reference.h" | 
|  | #include "core/fpdfapi/parser/cpdf_stream.h" | 
|  | #include "core/fpdfapi/parser/cpdf_string.h" | 
|  | #include "core/fpdfapi/parser/fpdf_parser_utility.h" | 
|  | #include "core/fxcrt/autorestorer.h" | 
|  | #include "core/fxcrt/cfx_read_only_vector_stream.h" | 
|  | #include "core/fxcrt/fixed_uninit_data_vector.h" | 
|  | #include "core/fxcrt/fx_extension.h" | 
|  | #include "core/fxcrt/fx_safe_types.h" | 
|  | #include "third_party/base/check.h" | 
|  | #include "third_party/base/check_op.h" | 
|  | #include "third_party/base/numerics/safe_math.h" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | enum class ReadStatus { | 
|  | kNormal, | 
|  | kBackslash, | 
|  | kOctal, | 
|  | kFinishOctal, | 
|  | kCarriageReturn | 
|  | }; | 
|  |  | 
|  | class ReadableSubStream final : public IFX_SeekableReadStream { | 
|  | public: | 
|  | ReadableSubStream(RetainPtr<IFX_SeekableReadStream> pFileRead, | 
|  | FX_FILESIZE part_offset, | 
|  | FX_FILESIZE part_size) | 
|  | : m_pFileRead(std::move(pFileRead)), | 
|  | m_PartOffset(part_offset), | 
|  | m_PartSize(part_size) {} | 
|  |  | 
|  | ~ReadableSubStream() override = default; | 
|  |  | 
|  | // IFX_SeekableReadStream overrides: | 
|  | bool ReadBlockAtOffset(pdfium::span<uint8_t> buffer, | 
|  | FX_FILESIZE offset) override { | 
|  | FX_SAFE_FILESIZE safe_end = offset; | 
|  | safe_end += buffer.size(); | 
|  | // Check that requested range is valid, to prevent calling of ReadBlock | 
|  | // of original m_pFileRead with incorrect params. | 
|  | if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize) | 
|  | return false; | 
|  |  | 
|  | return m_pFileRead->ReadBlockAtOffset(buffer, m_PartOffset + offset); | 
|  | } | 
|  |  | 
|  | FX_FILESIZE GetSize() override { return m_PartSize; } | 
|  |  | 
|  | private: | 
|  | RetainPtr<IFX_SeekableReadStream> m_pFileRead; | 
|  | FX_FILESIZE m_PartOffset; | 
|  | FX_FILESIZE m_PartSize; | 
|  | }; | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | // static | 
|  | int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; | 
|  |  | 
|  | // static | 
|  | std::unique_ptr<CPDF_SyntaxParser> CPDF_SyntaxParser::CreateForTesting( | 
|  | RetainPtr<IFX_SeekableReadStream> pFileAccess, | 
|  | FX_FILESIZE HeaderOffset) { | 
|  | return std::make_unique<CPDF_SyntaxParser>( | 
|  | pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileAccess), nullptr), | 
|  | HeaderOffset); | 
|  | } | 
|  |  | 
|  | CPDF_SyntaxParser::CPDF_SyntaxParser( | 
|  | RetainPtr<IFX_SeekableReadStream> pFileAccess) | 
|  | : CPDF_SyntaxParser( | 
|  | pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileAccess), | 
|  | nullptr), | 
|  | 0) {} | 
|  |  | 
|  | CPDF_SyntaxParser::CPDF_SyntaxParser(RetainPtr<CPDF_ReadValidator> validator, | 
|  | FX_FILESIZE HeaderOffset) | 
|  | : m_pFileAccess(std::move(validator)), | 
|  | m_HeaderOffset(HeaderOffset), | 
|  | m_FileLen(m_pFileAccess->GetSize()) { | 
|  | DCHECK(m_HeaderOffset <= m_FileLen); | 
|  | } | 
|  |  | 
|  | CPDF_SyntaxParser::~CPDF_SyntaxParser() = default; | 
|  |  | 
|  | bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { | 
|  | AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | 
|  | m_Pos = pos; | 
|  | return GetNextChar(ch); | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) { | 
|  | if (read_pos >= m_FileLen) | 
|  | return false; | 
|  | size_t read_size = m_ReadBufferSize; | 
|  | FX_SAFE_FILESIZE safe_end = read_pos; | 
|  | safe_end += read_size; | 
|  | if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen) | 
|  | read_size = m_FileLen - read_pos; | 
|  |  | 
|  | m_pFileBuf.resize(read_size); | 
|  | if (!m_pFileAccess->ReadBlockAtOffset(m_pFileBuf, read_pos)) { | 
|  | m_pFileBuf.clear(); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | m_BufOffset = read_pos; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { | 
|  | FX_FILESIZE pos = m_Pos + m_HeaderOffset; | 
|  | if (pos >= m_FileLen) | 
|  | return false; | 
|  |  | 
|  | if (!IsPositionRead(pos) && !ReadBlockAt(pos)) | 
|  | return false; | 
|  |  | 
|  | ch = m_pFileBuf[pos - m_BufOffset]; | 
|  | m_Pos++; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | FX_FILESIZE CPDF_SyntaxParser::GetDocumentSize() const { | 
|  | return m_FileLen - m_HeaderOffset; | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) { | 
|  | pos += m_HeaderOffset; | 
|  | if (pos >= m_FileLen) | 
|  | return false; | 
|  |  | 
|  | if (!IsPositionRead(pos)) { | 
|  | FX_FILESIZE block_start = 0; | 
|  | if (pos >= CPDF_Stream::kFileBufSize) | 
|  | block_start = pos - CPDF_Stream::kFileBufSize + 1; | 
|  | if (!ReadBlockAt(block_start) || !IsPositionRead(pos)) | 
|  | return false; | 
|  | } | 
|  | *ch = m_pFileBuf[pos - m_BufOffset]; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::ReadBlock(pdfium::span<uint8_t> buffer) { | 
|  | if (!m_pFileAccess->ReadBlockAtOffset(buffer, m_Pos + m_HeaderOffset)) | 
|  | return false; | 
|  | m_Pos += buffer.size(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | CPDF_SyntaxParser::WordType CPDF_SyntaxParser::GetNextWordInternal() { | 
|  | m_WordSize = 0; | 
|  | WordType word_type = WordType::kNumber; | 
|  |  | 
|  | ToNextWord(); | 
|  | uint8_t ch; | 
|  | if (!GetNextChar(ch)) | 
|  | return word_type; | 
|  |  | 
|  | if (PDFCharIsDelimiter(ch)) { | 
|  | word_type = WordType::kWord; | 
|  |  | 
|  | m_WordBuffer[m_WordSize++] = ch; | 
|  | if (ch == '/') { | 
|  | while (true) { | 
|  | if (!GetNextChar(ch)) | 
|  | return word_type; | 
|  |  | 
|  | if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | 
|  | m_Pos--; | 
|  | return word_type; | 
|  | } | 
|  |  | 
|  | if (m_WordSize < sizeof(m_WordBuffer) - 1) | 
|  | m_WordBuffer[m_WordSize++] = ch; | 
|  | } | 
|  | } else if (ch == '<') { | 
|  | if (!GetNextChar(ch)) | 
|  | return word_type; | 
|  |  | 
|  | if (ch == '<') | 
|  | m_WordBuffer[m_WordSize++] = ch; | 
|  | else | 
|  | m_Pos--; | 
|  | } else if (ch == '>') { | 
|  | if (!GetNextChar(ch)) | 
|  | return word_type; | 
|  |  | 
|  | if (ch == '>') | 
|  | m_WordBuffer[m_WordSize++] = ch; | 
|  | else | 
|  | m_Pos--; | 
|  | } | 
|  | return word_type; | 
|  | } | 
|  |  | 
|  | while (true) { | 
|  | if (m_WordSize < sizeof(m_WordBuffer) - 1) | 
|  | m_WordBuffer[m_WordSize++] = ch; | 
|  |  | 
|  | if (!PDFCharIsNumeric(ch)) | 
|  | word_type = WordType::kWord; | 
|  |  | 
|  | if (!GetNextChar(ch)) | 
|  | return word_type; | 
|  |  | 
|  | if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | 
|  | m_Pos--; | 
|  | break; | 
|  | } | 
|  | } | 
|  | return word_type; | 
|  | } | 
|  |  | 
|  | ByteString CPDF_SyntaxParser::ReadString() { | 
|  | uint8_t ch; | 
|  | if (!GetNextChar(ch)) | 
|  | return ByteString(); | 
|  |  | 
|  | ByteString buf; | 
|  | int32_t parlevel = 0; | 
|  | ReadStatus status = ReadStatus::kNormal; | 
|  | int32_t iEscCode = 0; | 
|  | while (true) { | 
|  | switch (status) { | 
|  | case ReadStatus::kNormal: | 
|  | if (ch == ')') { | 
|  | if (parlevel == 0) | 
|  | return ByteString(buf); | 
|  | parlevel--; | 
|  | } else if (ch == '(') { | 
|  | parlevel++; | 
|  | } | 
|  | if (ch == '\\') | 
|  | status = ReadStatus::kBackslash; | 
|  | else | 
|  | buf += static_cast<char>(ch); | 
|  | break; | 
|  | case ReadStatus::kBackslash: | 
|  | if (FXSYS_IsOctalDigit(ch)) { | 
|  | iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); | 
|  | status = ReadStatus::kOctal; | 
|  | break; | 
|  | } | 
|  | if (ch == '\r') { | 
|  | status = ReadStatus::kCarriageReturn; | 
|  | break; | 
|  | } | 
|  | if (ch == 'n') { | 
|  | buf += '\n'; | 
|  | } else if (ch == 'r') { | 
|  | buf += '\r'; | 
|  | } else if (ch == 't') { | 
|  | buf += '\t'; | 
|  | } else if (ch == 'b') { | 
|  | buf += '\b'; | 
|  | } else if (ch == 'f') { | 
|  | buf += '\f'; | 
|  | } else if (ch != '\n') { | 
|  | buf += static_cast<char>(ch); | 
|  | } | 
|  | status = ReadStatus::kNormal; | 
|  | break; | 
|  | case ReadStatus::kOctal: | 
|  | if (FXSYS_IsOctalDigit(ch)) { | 
|  | iEscCode = | 
|  | iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); | 
|  | status = ReadStatus::kFinishOctal; | 
|  | } else { | 
|  | buf += static_cast<char>(iEscCode); | 
|  | status = ReadStatus::kNormal; | 
|  | continue; | 
|  | } | 
|  | break; | 
|  | case ReadStatus::kFinishOctal: | 
|  | status = ReadStatus::kNormal; | 
|  | if (FXSYS_IsOctalDigit(ch)) { | 
|  | iEscCode = | 
|  | iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); | 
|  | buf += static_cast<char>(iEscCode); | 
|  | } else { | 
|  | buf += static_cast<char>(iEscCode); | 
|  | continue; | 
|  | } | 
|  | break; | 
|  | case ReadStatus::kCarriageReturn: | 
|  | status = ReadStatus::kNormal; | 
|  | if (ch != '\n') | 
|  | continue; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (!GetNextChar(ch)) | 
|  | break; | 
|  | } | 
|  |  | 
|  | GetNextChar(ch); | 
|  | return buf; | 
|  | } | 
|  |  | 
|  | ByteString CPDF_SyntaxParser::ReadHexString() { | 
|  | uint8_t ch; | 
|  | if (!GetNextChar(ch)) | 
|  | return ByteString(); | 
|  |  | 
|  | ByteString buf; | 
|  | bool bFirst = true; | 
|  | uint8_t code = 0; | 
|  | while (true) { | 
|  | if (ch == '>') | 
|  | break; | 
|  |  | 
|  | if (isxdigit(ch)) { | 
|  | int val = FXSYS_HexCharToInt(ch); | 
|  | if (bFirst) { | 
|  | code = val * 16; | 
|  | } else { | 
|  | code += val; | 
|  | buf += static_cast<char>(code); | 
|  | } | 
|  | bFirst = !bFirst; | 
|  | } | 
|  |  | 
|  | if (!GetNextChar(ch)) | 
|  | break; | 
|  | } | 
|  | if (!bFirst) | 
|  | buf += static_cast<char>(code); | 
|  |  | 
|  | return buf; | 
|  | } | 
|  |  | 
|  | void CPDF_SyntaxParser::ToNextLine() { | 
|  | uint8_t ch; | 
|  | while (GetNextChar(ch)) { | 
|  | if (ch == '\n') | 
|  | break; | 
|  |  | 
|  | if (ch == '\r') { | 
|  | GetNextChar(ch); | 
|  | if (ch != '\n') | 
|  | --m_Pos; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void CPDF_SyntaxParser::ToNextWord() { | 
|  | if (m_TrailerEnds) { | 
|  | RecordingToNextWord(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | uint8_t ch; | 
|  | if (!GetNextChar(ch)) | 
|  | return; | 
|  |  | 
|  | while (true) { | 
|  | while (PDFCharIsWhitespace(ch)) { | 
|  | if (!GetNextChar(ch)) | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (ch != '%') | 
|  | break; | 
|  |  | 
|  | while (true) { | 
|  | if (!GetNextChar(ch)) | 
|  | return; | 
|  | if (PDFCharIsLineEnding(ch)) | 
|  | break; | 
|  | } | 
|  | } | 
|  | m_Pos--; | 
|  | } | 
|  |  | 
|  | // A state machine which goes % -> E -> O -> F -> line ending. | 
|  | enum class EofState { | 
|  | kInitial = 0, | 
|  | kNonPercent, | 
|  | kPercent, | 
|  | kE, | 
|  | kO, | 
|  | kF, | 
|  | kInvalid, | 
|  | }; | 
|  |  | 
|  | void CPDF_SyntaxParser::RecordingToNextWord() { | 
|  | DCHECK(m_TrailerEnds); | 
|  |  | 
|  | EofState eof_state = EofState::kInitial; | 
|  | // Find the first character which is neither whitespace, nor part of a | 
|  | // comment. | 
|  | while (true) { | 
|  | uint8_t ch; | 
|  | if (!GetNextChar(ch)) | 
|  | return; | 
|  | switch (eof_state) { | 
|  | case EofState::kInitial: | 
|  | if (!PDFCharIsWhitespace(ch)) | 
|  | eof_state = ch == '%' ? EofState::kPercent : EofState::kNonPercent; | 
|  | break; | 
|  | case EofState::kNonPercent: | 
|  | break; | 
|  | case EofState::kPercent: | 
|  | if (ch == 'E') | 
|  | eof_state = EofState::kE; | 
|  | else if (ch != '%') | 
|  | eof_state = EofState::kInvalid; | 
|  | break; | 
|  | case EofState::kE: | 
|  | eof_state = ch == 'O' ? EofState::kO : EofState::kInvalid; | 
|  | break; | 
|  | case EofState::kO: | 
|  | eof_state = ch == 'F' ? EofState::kF : EofState::kInvalid; | 
|  | break; | 
|  | case EofState::kF: | 
|  | if (ch == '\r') { | 
|  | // See if \r has to be combined with a \n that follows it | 
|  | // immediately. | 
|  | if (GetNextChar(ch) && ch != '\n') { | 
|  | ch = '\r'; | 
|  | m_Pos--; | 
|  | } | 
|  | } | 
|  | // If we now have a \r, that's not followed by a \n, so both are OK. | 
|  | if (ch == '\r' || ch == '\n') | 
|  | m_TrailerEnds->push_back(m_Pos); | 
|  | eof_state = EofState::kInvalid; | 
|  | break; | 
|  | case EofState::kInvalid: | 
|  | break; | 
|  | } | 
|  | if (PDFCharIsLineEnding(ch)) | 
|  | eof_state = EofState::kInitial; | 
|  | if (eof_state == EofState::kNonPercent) | 
|  | break; | 
|  | } | 
|  | m_Pos--; | 
|  | } | 
|  |  | 
|  | CPDF_SyntaxParser::WordResult CPDF_SyntaxParser::GetNextWord() { | 
|  | CPDF_ReadValidator::ScopedSession read_session(GetValidator()); | 
|  | WordType word_type = GetNextWordInternal(); | 
|  | ByteString word; | 
|  | if (!GetValidator()->has_read_problems()) | 
|  | word = ByteString(m_WordBuffer, m_WordSize); | 
|  | return {word, word_type == WordType::kNumber}; | 
|  | } | 
|  |  | 
|  | ByteString CPDF_SyntaxParser::PeekNextWord() { | 
|  | AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | 
|  | return GetNextWord().word; | 
|  | } | 
|  |  | 
|  | ByteString CPDF_SyntaxParser::GetKeyword() { | 
|  | return GetNextWord().word; | 
|  | } | 
|  |  | 
|  | void CPDF_SyntaxParser::SetPos(FX_FILESIZE pos) { | 
|  | DCHECK_GE(pos, 0); | 
|  | m_Pos = std::min(pos, m_FileLen); | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody( | 
|  | CPDF_IndirectObjectHolder* pObjList) { | 
|  | CPDF_ReadValidator::ScopedSession read_session(GetValidator()); | 
|  | auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose); | 
|  | if (GetValidator()->has_read_problems()) | 
|  | return nullptr; | 
|  | return result; | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal( | 
|  | CPDF_IndirectObjectHolder* pObjList, | 
|  | ParseType parse_type) { | 
|  | AutoRestorer<int> depth_restorer(&s_CurrentRecursionDepth); | 
|  | if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | 
|  | return nullptr; | 
|  |  | 
|  | FX_FILESIZE SavedObjPos = m_Pos; | 
|  | WordResult word_result = GetNextWord(); | 
|  | const ByteString& word = word_result.word; | 
|  | if (word.IsEmpty()) | 
|  | return nullptr; | 
|  |  | 
|  | if (word_result.is_number) { | 
|  | AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos); | 
|  | WordResult nextword = GetNextWord(); | 
|  | if (!nextword.is_number) | 
|  | return pdfium::MakeRetain<CPDF_Number>(word.AsStringView()); | 
|  |  | 
|  | WordResult nextword2 = GetNextWord(); | 
|  | if (nextword2.word != "R") | 
|  | return pdfium::MakeRetain<CPDF_Number>(word.AsStringView()); | 
|  |  | 
|  | pos_restorer.AbandonRestoration(); | 
|  | uint32_t refnum = FXSYS_atoui(word.c_str()); | 
|  | if (refnum == CPDF_Object::kInvalidObjNum) | 
|  | return nullptr; | 
|  |  | 
|  | return pdfium::MakeRetain<CPDF_Reference>(pObjList, refnum); | 
|  | } | 
|  |  | 
|  | if (word == "true" || word == "false") | 
|  | return pdfium::MakeRetain<CPDF_Boolean>(word == "true"); | 
|  |  | 
|  | if (word == "null") | 
|  | return pdfium::MakeRetain<CPDF_Null>(); | 
|  |  | 
|  | if (word == "(") { | 
|  | ByteString str = ReadString(); | 
|  | return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false); | 
|  | } | 
|  | if (word == "<") { | 
|  | ByteString str = ReadHexString(); | 
|  | return pdfium::MakeRetain<CPDF_String>(m_pPool, str, true); | 
|  | } | 
|  | if (word == "[") { | 
|  | auto pArray = pdfium::MakeRetain<CPDF_Array>(); | 
|  | while (RetainPtr<CPDF_Object> pObj = | 
|  | GetObjectBodyInternal(pObjList, ParseType::kLoose)) { | 
|  | pArray->Append(std::move(pObj)); | 
|  | } | 
|  | return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']') | 
|  | ? std::move(pArray) | 
|  | : nullptr; | 
|  | } | 
|  | if (word[0] == '/') { | 
|  | return pdfium::MakeRetain<CPDF_Name>( | 
|  | m_pPool, | 
|  | PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1))); | 
|  | } | 
|  | if (word == "<<") { | 
|  | RetainPtr<CPDF_Dictionary> pDict = | 
|  | pdfium::MakeRetain<CPDF_Dictionary>(m_pPool); | 
|  | while (true) { | 
|  | WordResult inner_word_result = GetNextWord(); | 
|  | const ByteString& inner_word = inner_word_result.word; | 
|  | if (inner_word.IsEmpty()) | 
|  | return nullptr; | 
|  |  | 
|  | FX_FILESIZE SavedPos = m_Pos - inner_word.GetLength(); | 
|  | if (inner_word == ">>") | 
|  | break; | 
|  |  | 
|  | if (inner_word == "endobj") { | 
|  | m_Pos = SavedPos; | 
|  | break; | 
|  | } | 
|  | if (inner_word[0] != '/') | 
|  | continue; | 
|  |  | 
|  | ByteString key = PDF_NameDecode(inner_word.AsStringView()); | 
|  | if (key.IsEmpty() && parse_type == ParseType::kLoose) | 
|  | continue; | 
|  |  | 
|  | RetainPtr<CPDF_Object> pObj = | 
|  | GetObjectBodyInternal(pObjList, ParseType::kLoose); | 
|  | if (!pObj) { | 
|  | if (parse_type == ParseType::kLoose) | 
|  | continue; | 
|  |  | 
|  | ToNextLine(); | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | // `key` has to be "/X" at the minimum. | 
|  | if (key.GetLength() > 1) { | 
|  | pDict->SetFor(key.Substr(1), std::move(pObj)); | 
|  | } | 
|  | } | 
|  |  | 
|  | AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos); | 
|  | if (GetNextWord().word != "stream") | 
|  | return pDict; | 
|  | pos_restorer.AbandonRestoration(); | 
|  | return ReadStream(std::move(pDict)); | 
|  | } | 
|  | if (word == ">>") | 
|  | m_Pos = SavedObjPos; | 
|  |  | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject( | 
|  | CPDF_IndirectObjectHolder* pObjList, | 
|  | ParseType parse_type) { | 
|  | CPDF_ReadValidator::ScopedSession read_session(GetValidator()); | 
|  | const FX_FILESIZE saved_pos = GetPos(); | 
|  |  | 
|  | WordResult objnum_word_result = GetNextWord(); | 
|  | if (!objnum_word_result.is_number || objnum_word_result.word.IsEmpty()) { | 
|  | SetPos(saved_pos); | 
|  | return nullptr; | 
|  | } | 
|  | const uint32_t parser_objnum = FXSYS_atoui(objnum_word_result.word.c_str()); | 
|  |  | 
|  | WordResult gennum_word_result = GetNextWord(); | 
|  | const ByteString& gennum_word = gennum_word_result.word; | 
|  | if (!gennum_word_result.is_number || gennum_word.IsEmpty()) { | 
|  | SetPos(saved_pos); | 
|  | return nullptr; | 
|  | } | 
|  | const uint32_t parser_gennum = FXSYS_atoui(gennum_word.c_str()); | 
|  |  | 
|  | if (GetKeyword() != "obj") { | 
|  | SetPos(saved_pos); | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_Object> pObj = GetObjectBodyInternal(pObjList, parse_type); | 
|  | if (pObj) { | 
|  | pObj->SetObjNum(parser_objnum); | 
|  | pObj->SetGenNum(parser_gennum); | 
|  | } | 
|  |  | 
|  | return GetValidator()->has_read_problems() ? nullptr : std::move(pObj); | 
|  | } | 
|  |  | 
|  | unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { | 
|  | unsigned char byte1 = 0; | 
|  | unsigned char byte2 = 0; | 
|  |  | 
|  | GetCharAt(pos, byte1); | 
|  | GetCharAt(pos + 1, byte2); | 
|  |  | 
|  | if (byte1 == '\r' && byte2 == '\n') | 
|  | return 2; | 
|  |  | 
|  | if (byte1 == '\r' || byte1 == '\n') | 
|  | return 1; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | FX_FILESIZE CPDF_SyntaxParser::FindWordPos(ByteStringView word) { | 
|  | AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos); | 
|  | FX_FILESIZE end_offset = FindTag(word); | 
|  | while (end_offset >= 0) { | 
|  | // Stop searching when word is found. | 
|  | if (IsWholeWord(GetPos() - word.GetLength(), m_FileLen, word, true)) | 
|  | return GetPos() - word.GetLength(); | 
|  |  | 
|  | end_offset = FindTag(word); | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | FX_FILESIZE CPDF_SyntaxParser::FindStreamEndPos() { | 
|  | const ByteStringView kEndStreamStr("endstream"); | 
|  | const ByteStringView kEndObjStr("endobj"); | 
|  |  | 
|  | FX_FILESIZE endStreamWordOffset = FindWordPos(kEndStreamStr); | 
|  | FX_FILESIZE endObjWordOffset = FindWordPos(kEndObjStr); | 
|  |  | 
|  | // Can't find "endstream" or "endobj". | 
|  | if (endStreamWordOffset < 0 && endObjWordOffset < 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | if (endStreamWordOffset < 0 && endObjWordOffset >= 0) { | 
|  | // Correct the position of end stream. | 
|  | endStreamWordOffset = endObjWordOffset; | 
|  | } else if (endStreamWordOffset >= 0 && endObjWordOffset < 0) { | 
|  | // Correct the position of end obj. | 
|  | endObjWordOffset = endStreamWordOffset; | 
|  | } else if (endStreamWordOffset > endObjWordOffset) { | 
|  | endStreamWordOffset = endObjWordOffset; | 
|  | } | 
|  |  | 
|  | int numMarkers = ReadEOLMarkers(endStreamWordOffset - 2); | 
|  | if (numMarkers == 2) { | 
|  | endStreamWordOffset -= 2; | 
|  | } else { | 
|  | numMarkers = ReadEOLMarkers(endStreamWordOffset - 1); | 
|  | if (numMarkers == 1) { | 
|  | endStreamWordOffset -= 1; | 
|  | } | 
|  | } | 
|  | if (endStreamWordOffset < GetPos()) { | 
|  | return -1; | 
|  | } | 
|  | return endStreamWordOffset; | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( | 
|  | RetainPtr<CPDF_Dictionary> pDict) { | 
|  | RetainPtr<const CPDF_Number> pLenObj = | 
|  | ToNumber(pDict->GetDirectObjectFor("Length")); | 
|  | FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1; | 
|  |  | 
|  | // Locate the start of stream. | 
|  | ToNextLine(); | 
|  | const FX_FILESIZE streamStartPos = GetPos(); | 
|  |  | 
|  | if (len > 0) { | 
|  | FX_SAFE_FILESIZE pos = GetPos(); | 
|  | pos += len; | 
|  | if (!pos.IsValid() || pos.ValueOrDie() >= m_FileLen) | 
|  | len = -1; | 
|  | } | 
|  |  | 
|  | RetainPtr<IFX_SeekableReadStream> substream; | 
|  | if (len > 0) { | 
|  | // Check data availability first to allow the Validator to request data | 
|  | // smoothly, without jumps. | 
|  | if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( | 
|  | m_HeaderOffset + GetPos(), len)) { | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | substream = pdfium::MakeRetain<ReadableSubStream>( | 
|  | GetValidator(), m_HeaderOffset + GetPos(), len); | 
|  | SetPos(GetPos() + len); | 
|  | } | 
|  |  | 
|  | const ByteStringView kEndStreamStr("endstream"); | 
|  | const ByteStringView kEndObjStr("endobj"); | 
|  |  | 
|  | // Note, we allow zero length streams as we need to pass them through when we | 
|  | // are importing pages into a new document. | 
|  | if (len >= 0) { | 
|  | CPDF_ReadValidator::ScopedSession read_session(GetValidator()); | 
|  | m_Pos += ReadEOLMarkers(GetPos()); | 
|  | memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); | 
|  | GetNextWordInternal(); | 
|  | if (GetValidator()->has_read_problems()) | 
|  | return nullptr; | 
|  |  | 
|  | // Earlier version of PDF specification doesn't require EOL marker before | 
|  | // 'endstream' keyword. If keyword 'endstream' follows the bytes in | 
|  | // specified length, it signals the end of stream. | 
|  | if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(), | 
|  | kEndStreamStr.GetLength()) != 0) { | 
|  | substream.Reset(); | 
|  | len = -1; | 
|  | SetPos(streamStartPos); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (len < 0) { | 
|  | // If len is not available or incorrect, len needs to be calculated | 
|  | // by searching the keywords "endstream" or "endobj". | 
|  | const FX_FILESIZE streamEndPos = FindStreamEndPos(); | 
|  | if (streamEndPos < 0) | 
|  | return nullptr; | 
|  |  | 
|  | len = streamEndPos - streamStartPos; | 
|  | DCHECK_GE(len, 0); | 
|  | if (len > 0) { | 
|  | SetPos(streamStartPos); | 
|  | // Check data availability first to allow the Validator to request data | 
|  | // smoothly, without jumps. | 
|  | if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( | 
|  | m_HeaderOffset + GetPos(), len)) { | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | substream = pdfium::MakeRetain<ReadableSubStream>( | 
|  | GetValidator(), m_HeaderOffset + GetPos(), len); | 
|  | SetPos(GetPos() + len); | 
|  | } | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_Stream> pStream; | 
|  | if (substream) { | 
|  | // It is unclear from CPDF_SyntaxParser's perspective what object | 
|  | // `substream` is ultimately holding references to. To avoid unexpectedly | 
|  | // changing object lifetimes by handing `substream` to `pStream`, make a | 
|  | // copy of the data here. | 
|  | FixedUninitDataVector<uint8_t> data(substream->GetSize()); | 
|  | bool did_read = substream->ReadBlockAtOffset(data.writable_span(), 0); | 
|  | CHECK(did_read); | 
|  | auto data_as_stream = | 
|  | pdfium::MakeRetain<CFX_ReadOnlyVectorStream>(std::move(data)); | 
|  |  | 
|  | pStream = pdfium::MakeRetain<CPDF_Stream>(); | 
|  | pStream->InitStreamFromFile(std::move(data_as_stream), std::move(pDict)); | 
|  | } else { | 
|  | DCHECK(!len); | 
|  | pStream = pdfium::MakeRetain<CPDF_Stream>(std::move(pDict)); | 
|  | } | 
|  | const FX_FILESIZE end_stream_offset = GetPos(); | 
|  | memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); | 
|  | GetNextWordInternal(); | 
|  |  | 
|  | // Allow whitespace after endstream and before a newline. | 
|  | unsigned char ch = 0; | 
|  | while (GetNextChar(ch)) { | 
|  | if (!PDFCharIsWhitespace(ch) || PDFCharIsLineEnding(ch)) | 
|  | break; | 
|  | } | 
|  | SetPos(GetPos() - 1); | 
|  |  | 
|  | int numMarkers = ReadEOLMarkers(GetPos()); | 
|  | if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) && | 
|  | numMarkers != 0 && | 
|  | memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) { | 
|  | SetPos(end_stream_offset); | 
|  | } | 
|  | return pStream; | 
|  | } | 
|  |  | 
|  | uint32_t CPDF_SyntaxParser::GetDirectNum() { | 
|  | if (GetNextWordInternal() != WordType::kNumber) | 
|  | return 0; | 
|  |  | 
|  | m_WordBuffer[m_WordSize] = 0; | 
|  | return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer)); | 
|  | } | 
|  |  | 
|  | RetainPtr<CPDF_ReadValidator> CPDF_SyntaxParser::GetValidator() const { | 
|  | return m_pFileAccess; | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, | 
|  | FX_FILESIZE limit, | 
|  | ByteStringView tag, | 
|  | bool checkKeyword) { | 
|  | const uint32_t taglen = tag.GetLength(); | 
|  |  | 
|  | bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); | 
|  | bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && | 
|  | !PDFCharIsWhitespace(tag[taglen - 1]); | 
|  |  | 
|  | uint8_t ch; | 
|  | if (bCheckRight && startpos + static_cast<int32_t>(taglen) <= limit && | 
|  | GetCharAt(startpos + static_cast<int32_t>(taglen), ch)) { | 
|  | if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | 
|  | (checkKeyword && PDFCharIsDelimiter(ch))) { | 
|  | return false; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { | 
|  | if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | 
|  | (checkKeyword && PDFCharIsDelimiter(ch))) { | 
|  | return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::BackwardsSearchToWord(ByteStringView word, | 
|  | FX_FILESIZE limit) { | 
|  | int32_t taglen = word.GetLength(); | 
|  | if (taglen == 0) | 
|  | return false; | 
|  |  | 
|  | FX_FILESIZE pos = m_Pos; | 
|  | int32_t offset = taglen - 1; | 
|  | while (true) { | 
|  | if (limit && pos <= m_Pos - limit) | 
|  | return false; | 
|  |  | 
|  | uint8_t byte; | 
|  | if (!GetCharAtBackward(pos, &byte)) | 
|  | return false; | 
|  |  | 
|  | if (byte == word[offset]) { | 
|  | offset--; | 
|  | if (offset >= 0) { | 
|  | pos--; | 
|  | continue; | 
|  | } | 
|  | if (IsWholeWord(pos, limit, word, false)) { | 
|  | m_Pos = pos; | 
|  | return true; | 
|  | } | 
|  | } | 
|  | offset = byte == word[taglen - 1] ? taglen - 2 : taglen - 1; | 
|  | pos--; | 
|  | if (pos < 0) | 
|  | return false; | 
|  | } | 
|  | } | 
|  |  | 
|  | FX_FILESIZE CPDF_SyntaxParser::FindTag(ByteStringView tag) { | 
|  | const FX_FILESIZE startpos = GetPos(); | 
|  | const int32_t taglen = tag.GetLength(); | 
|  | DCHECK_GT(taglen, 0); | 
|  |  | 
|  | int32_t match = 0; | 
|  | while (true) { | 
|  | uint8_t ch; | 
|  | if (!GetNextChar(ch)) | 
|  | return -1; | 
|  |  | 
|  | if (ch == tag[match]) { | 
|  | match++; | 
|  | if (match == taglen) | 
|  | return GetPos() - startpos - taglen; | 
|  | } else { | 
|  | match = ch == tag[0] ? 1 : 0; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const { | 
|  | return m_BufOffset <= pos && | 
|  | pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size()); | 
|  | } |