blob: 52400c152eaefe2dfd86715c66b1744d49db07fd [file] [log] [blame]
// Copyright 2016 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include <ctype.h>
#include <algorithm>
#include <utility>
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_boolean.h"
#include "core/fpdfapi/parser/cpdf_crypto_handler.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_null.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_read_validator.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcrt/autorestorer.h"
#include "core/fxcrt/cfx_read_only_vector_stream.h"
#include "core/fxcrt/check.h"
#include "core/fxcrt/check_op.h"
#include "core/fxcrt/fixed_size_data_vector.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
namespace {
enum class ReadStatus {
kNormal,
kBackslash,
kOctal,
kFinishOctal,
kCarriageReturn
};
class ReadableSubStream final : public IFX_SeekableReadStream {
public:
ReadableSubStream(RetainPtr<IFX_SeekableReadStream> pFileRead,
FX_FILESIZE part_offset,
FX_FILESIZE part_size)
: m_pFileRead(std::move(pFileRead)),
m_PartOffset(part_offset),
m_PartSize(part_size) {}
~ReadableSubStream() override = default;
// IFX_SeekableReadStream overrides:
bool ReadBlockAtOffset(pdfium::span<uint8_t> buffer,
FX_FILESIZE offset) override {
FX_SAFE_FILESIZE safe_end = offset;
safe_end += buffer.size();
// Check that requested range is valid, to prevent calling of ReadBlock
// of original m_pFileRead with incorrect params.
if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize)
return false;
return m_pFileRead->ReadBlockAtOffset(buffer, m_PartOffset + offset);
}
FX_FILESIZE GetSize() override { return m_PartSize; }
private:
RetainPtr<IFX_SeekableReadStream> m_pFileRead;
FX_FILESIZE m_PartOffset;
FX_FILESIZE m_PartSize;
};
} // namespace
// static
int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
// static
std::unique_ptr<CPDF_SyntaxParser> CPDF_SyntaxParser::CreateForTesting(
RetainPtr<IFX_SeekableReadStream> pFileAccess,
FX_FILESIZE HeaderOffset) {
return std::make_unique<CPDF_SyntaxParser>(
pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileAccess), nullptr),
HeaderOffset);
}
CPDF_SyntaxParser::CPDF_SyntaxParser(
RetainPtr<IFX_SeekableReadStream> pFileAccess)
: CPDF_SyntaxParser(
pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileAccess),
nullptr),
0) {}
CPDF_SyntaxParser::CPDF_SyntaxParser(RetainPtr<CPDF_ReadValidator> validator,
FX_FILESIZE HeaderOffset)
: m_pFileAccess(std::move(validator)),
m_HeaderOffset(HeaderOffset),
m_FileLen(m_pFileAccess->GetSize()) {
DCHECK(m_HeaderOffset <= m_FileLen);
}
CPDF_SyntaxParser::~CPDF_SyntaxParser() = default;
bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
m_Pos = pos;
return GetNextChar(ch);
}
bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) {
if (read_pos >= m_FileLen)
return false;
size_t read_size = m_ReadBufferSize;
FX_SAFE_FILESIZE safe_end = read_pos;
safe_end += read_size;
if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
read_size = m_FileLen - read_pos;
m_pFileBuf.resize(read_size);
if (!m_pFileAccess->ReadBlockAtOffset(m_pFileBuf, read_pos)) {
m_pFileBuf.clear();
return false;
}
m_BufOffset = read_pos;
return true;
}
bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
FX_FILESIZE pos = m_Pos + m_HeaderOffset;
if (pos >= m_FileLen)
return false;
if (!IsPositionRead(pos) && !ReadBlockAt(pos))
return false;
ch = m_pFileBuf[pos - m_BufOffset];
m_Pos++;
return true;
}
FX_FILESIZE CPDF_SyntaxParser::GetDocumentSize() const {
return m_FileLen - m_HeaderOffset;
}
bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
pos += m_HeaderOffset;
if (pos >= m_FileLen)
return false;
if (!IsPositionRead(pos)) {
FX_FILESIZE block_start = 0;
if (pos >= CPDF_Stream::kFileBufSize)
block_start = pos - CPDF_Stream::kFileBufSize + 1;
if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
return false;
}
*ch = m_pFileBuf[pos - m_BufOffset];
return true;
}
bool CPDF_SyntaxParser::ReadBlock(pdfium::span<uint8_t> buffer) {
if (!m_pFileAccess->ReadBlockAtOffset(buffer, m_Pos + m_HeaderOffset))
return false;
m_Pos += buffer.size();
return true;
}
CPDF_SyntaxParser::WordType CPDF_SyntaxParser::GetNextWordInternal() {
m_WordSize = 0;
WordType word_type = WordType::kNumber;
ToNextWord();
uint8_t ch;
if (!GetNextChar(ch))
return word_type;
if (PDFCharIsDelimiter(ch)) {
word_type = WordType::kWord;
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (true) {
if (!GetNextChar(ch))
return word_type;
if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_Pos--;
return word_type;
}
if (m_WordSize < sizeof(m_WordBuffer) - 1)
m_WordBuffer[m_WordSize++] = ch;
}
} else if (ch == '<') {
if (!GetNextChar(ch))
return word_type;
if (ch == '<')
m_WordBuffer[m_WordSize++] = ch;
else
m_Pos--;
} else if (ch == '>') {
if (!GetNextChar(ch))
return word_type;
if (ch == '>')
m_WordBuffer[m_WordSize++] = ch;
else
m_Pos--;
}
return word_type;
}
while (true) {
if (m_WordSize < sizeof(m_WordBuffer) - 1)
m_WordBuffer[m_WordSize++] = ch;
if (!PDFCharIsNumeric(ch))
word_type = WordType::kWord;
if (!GetNextChar(ch))
return word_type;
if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
return word_type;
}
ByteString CPDF_SyntaxParser::ReadString() {
uint8_t ch;
if (!GetNextChar(ch))
return ByteString();
ByteString buf;
int32_t parlevel = 0;
ReadStatus status = ReadStatus::kNormal;
int32_t iEscCode = 0;
while (true) {
switch (status) {
case ReadStatus::kNormal:
if (ch == ')') {
if (parlevel == 0)
return ByteString(buf);
parlevel--;
} else if (ch == '(') {
parlevel++;
}
if (ch == '\\')
status = ReadStatus::kBackslash;
else
buf += static_cast<char>(ch);
break;
case ReadStatus::kBackslash:
if (FXSYS_IsOctalDigit(ch)) {
iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
status = ReadStatus::kOctal;
break;
}
if (ch == '\r') {
status = ReadStatus::kCarriageReturn;
break;
}
if (ch == 'n') {
buf += '\n';
} else if (ch == 'r') {
buf += '\r';
} else if (ch == 't') {
buf += '\t';
} else if (ch == 'b') {
buf += '\b';
} else if (ch == 'f') {
buf += '\f';
} else if (ch != '\n') {
buf += static_cast<char>(ch);
}
status = ReadStatus::kNormal;
break;
case ReadStatus::kOctal:
if (FXSYS_IsOctalDigit(ch)) {
iEscCode =
iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
status = ReadStatus::kFinishOctal;
} else {
buf += static_cast<char>(iEscCode);
status = ReadStatus::kNormal;
continue;
}
break;
case ReadStatus::kFinishOctal:
status = ReadStatus::kNormal;
if (FXSYS_IsOctalDigit(ch)) {
iEscCode =
iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
buf += static_cast<char>(iEscCode);
} else {
buf += static_cast<char>(iEscCode);
continue;
}
break;
case ReadStatus::kCarriageReturn:
status = ReadStatus::kNormal;
if (ch != '\n')
continue;
break;
}
if (!GetNextChar(ch))
break;
}
GetNextChar(ch);
return buf;
}
ByteString CPDF_SyntaxParser::ReadHexString() {
uint8_t ch;
if (!GetNextChar(ch))
return ByteString();
ByteString buf;
bool bFirst = true;
uint8_t code = 0;
while (true) {
if (ch == '>')
break;
if (isxdigit(ch)) {
int val = FXSYS_HexCharToInt(ch);
if (bFirst) {
code = val * 16;
} else {
code += val;
buf += static_cast<char>(code);
}
bFirst = !bFirst;
}
if (!GetNextChar(ch))
break;
}
if (!bFirst)
buf += static_cast<char>(code);
return buf;
}
void CPDF_SyntaxParser::ToNextLine() {
uint8_t ch;
while (GetNextChar(ch)) {
if (ch == '\n')
break;
if (ch == '\r') {
GetNextChar(ch);
if (ch != '\n')
--m_Pos;
break;
}
}
}
void CPDF_SyntaxParser::ToNextWord() {
if (m_TrailerEnds) {
RecordingToNextWord();
return;
}
uint8_t ch;
if (!GetNextChar(ch))
return;
while (true) {
while (PDFCharIsWhitespace(ch)) {
if (!GetNextChar(ch))
return;
}
if (ch != '%')
break;
while (true) {
if (!GetNextChar(ch))
return;
if (PDFCharIsLineEnding(ch))
break;
}
}
m_Pos--;
}
// A state machine which goes % -> E -> O -> F -> line ending.
enum class EofState {
kInitial = 0,
kNonPercent,
kPercent,
kE,
kO,
kF,
kInvalid,
};
void CPDF_SyntaxParser::RecordingToNextWord() {
DCHECK(m_TrailerEnds);
EofState eof_state = EofState::kInitial;
// Find the first character which is neither whitespace, nor part of a
// comment.
while (true) {
uint8_t ch;
if (!GetNextChar(ch))
return;
switch (eof_state) {
case EofState::kInitial:
if (!PDFCharIsWhitespace(ch))
eof_state = ch == '%' ? EofState::kPercent : EofState::kNonPercent;
break;
case EofState::kNonPercent:
break;
case EofState::kPercent:
if (ch == 'E')
eof_state = EofState::kE;
else if (ch != '%')
eof_state = EofState::kInvalid;
break;
case EofState::kE:
eof_state = ch == 'O' ? EofState::kO : EofState::kInvalid;
break;
case EofState::kO:
eof_state = ch == 'F' ? EofState::kF : EofState::kInvalid;
break;
case EofState::kF:
if (ch == '\r') {
// See if \r has to be combined with a \n that follows it
// immediately.
if (GetNextChar(ch) && ch != '\n') {
ch = '\r';
m_Pos--;
}
}
// If we now have a \r, that's not followed by a \n, so both are OK.
if (ch == '\r' || ch == '\n')
m_TrailerEnds->push_back(m_Pos);
eof_state = EofState::kInvalid;
break;
case EofState::kInvalid:
break;
}
if (PDFCharIsLineEnding(ch))
eof_state = EofState::kInitial;
if (eof_state == EofState::kNonPercent)
break;
}
m_Pos--;
}
CPDF_SyntaxParser::WordResult CPDF_SyntaxParser::GetNextWord() {
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
WordType word_type = GetNextWordInternal();
ByteString word;
if (!GetValidator()->has_read_problems())
word = ByteString(m_WordBuffer.data(), m_WordSize);
return {word, word_type == WordType::kNumber};
}
ByteString CPDF_SyntaxParser::PeekNextWord() {
AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
return GetNextWord().word;
}
ByteString CPDF_SyntaxParser::GetKeyword() {
return GetNextWord().word;
}
void CPDF_SyntaxParser::SetPos(FX_FILESIZE pos) {
DCHECK_GE(pos, 0);
m_Pos = std::min(pos, m_FileLen);
}
RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody(
CPDF_IndirectObjectHolder* pObjList) {
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose);
if (GetValidator()->has_read_problems())
return nullptr;
return result;
}
RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal(
CPDF_IndirectObjectHolder* pObjList,
ParseType parse_type) {
AutoRestorer<int> depth_restorer(&s_CurrentRecursionDepth);
if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
return nullptr;
FX_FILESIZE SavedObjPos = m_Pos;
WordResult word_result = GetNextWord();
const ByteString& word = word_result.word;
if (word.IsEmpty())
return nullptr;
if (word_result.is_number) {
AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
WordResult nextword = GetNextWord();
if (!nextword.is_number)
return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
WordResult nextword2 = GetNextWord();
if (nextword2.word != "R")
return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
pos_restorer.AbandonRestoration();
uint32_t refnum = FXSYS_atoui(word.c_str());
if (refnum == CPDF_Object::kInvalidObjNum)
return nullptr;
return pdfium::MakeRetain<CPDF_Reference>(pObjList, refnum);
}
if (word == "true" || word == "false")
return pdfium::MakeRetain<CPDF_Boolean>(word == "true");
if (word == "null")
return pdfium::MakeRetain<CPDF_Null>();
if (word == "(") {
ByteString str = ReadString();
return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
}
if (word == "<") {
ByteString str = ReadHexString();
return pdfium::MakeRetain<CPDF_String>(m_pPool, str, true);
}
if (word == "[") {
auto pArray = pdfium::MakeRetain<CPDF_Array>();
while (RetainPtr<CPDF_Object> pObj =
GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
// `pObj` cannot be a stream, per ISO 32000-1:2008 section 7.3.8.1.
if (!pObj->IsStream()) {
pArray->Append(std::move(pObj));
}
}
return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']')
? std::move(pArray)
: nullptr;
}
if (word[0] == '/') {
auto word_span = pdfium::make_span(m_WordBuffer).first(m_WordSize);
return pdfium::MakeRetain<CPDF_Name>(
m_pPool, PDF_NameDecode(ByteStringView(word_span).Substr(1)));
}
if (word == "<<") {
RetainPtr<CPDF_Dictionary> pDict =
pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
while (true) {
WordResult inner_word_result = GetNextWord();
const ByteString& inner_word = inner_word_result.word;
if (inner_word.IsEmpty())
return nullptr;
FX_FILESIZE SavedPos = m_Pos - inner_word.GetLength();
if (inner_word == ">>")
break;
if (inner_word == "endobj") {
m_Pos = SavedPos;
break;
}
if (inner_word[0] != '/')
continue;
ByteString key = PDF_NameDecode(inner_word.AsStringView());
if (key.IsEmpty() && parse_type == ParseType::kLoose)
continue;
RetainPtr<CPDF_Object> pObj =
GetObjectBodyInternal(pObjList, ParseType::kLoose);
if (!pObj) {
if (parse_type == ParseType::kLoose)
continue;
ToNextLine();
return nullptr;
}
// `key` has to be "/X" at the minimum.
// `pObj` cannot be a stream, per ISO 32000-1:2008 section 7.3.8.1.
if (key.GetLength() > 1 && !pObj->IsStream()) {
pDict->SetFor(key.Substr(1), std::move(pObj));
}
}
AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
if (GetNextWord().word != "stream")
return pDict;
pos_restorer.AbandonRestoration();
return ReadStream(std::move(pDict));
}
if (word == ">>")
m_Pos = SavedObjPos;
return nullptr;
}
RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject(
CPDF_IndirectObjectHolder* pObjList,
ParseType parse_type) {
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
const FX_FILESIZE saved_pos = GetPos();
WordResult objnum_word_result = GetNextWord();
if (!objnum_word_result.is_number || objnum_word_result.word.IsEmpty()) {
SetPos(saved_pos);
return nullptr;
}
const uint32_t parser_objnum = FXSYS_atoui(objnum_word_result.word.c_str());
WordResult gennum_word_result = GetNextWord();
const ByteString& gennum_word = gennum_word_result.word;
if (!gennum_word_result.is_number || gennum_word.IsEmpty()) {
SetPos(saved_pos);
return nullptr;
}
const uint32_t parser_gennum = FXSYS_atoui(gennum_word.c_str());
if (GetKeyword() != "obj") {
SetPos(saved_pos);
return nullptr;
}
RetainPtr<CPDF_Object> pObj = GetObjectBodyInternal(pObjList, parse_type);
if (pObj) {
pObj->SetObjNum(parser_objnum);
pObj->SetGenNum(parser_gennum);
}
return GetValidator()->has_read_problems() ? nullptr : pObj;
}
unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
unsigned char byte1 = 0;
unsigned char byte2 = 0;
GetCharAt(pos, byte1);
GetCharAt(pos + 1, byte2);
if (byte1 == '\r' && byte2 == '\n')
return 2;
if (byte1 == '\r' || byte1 == '\n')
return 1;
return 0;
}
FX_FILESIZE CPDF_SyntaxParser::FindWordPos(ByteStringView word) {
AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
FX_FILESIZE end_offset = FindTag(word);
while (end_offset >= 0) {
// Stop searching when word is found.
if (IsWholeWord(GetPos() - word.GetLength(), m_FileLen, word, true))
return GetPos() - word.GetLength();
end_offset = FindTag(word);
}
return -1;
}
FX_FILESIZE CPDF_SyntaxParser::FindStreamEndPos() {
const ByteStringView kEndStreamStr("endstream");
const ByteStringView kEndObjStr("endobj");
FX_FILESIZE endStreamWordOffset = FindWordPos(kEndStreamStr);
FX_FILESIZE endObjWordOffset = FindWordPos(kEndObjStr);
// Can't find "endstream" or "endobj".
if (endStreamWordOffset < 0 && endObjWordOffset < 0) {
return -1;
}
if (endStreamWordOffset < 0 && endObjWordOffset >= 0) {
// Correct the position of end stream.
endStreamWordOffset = endObjWordOffset;
} else if (endStreamWordOffset >= 0 && endObjWordOffset < 0) {
// Correct the position of end obj.
endObjWordOffset = endStreamWordOffset;
} else if (endStreamWordOffset > endObjWordOffset) {
endStreamWordOffset = endObjWordOffset;
}
int numMarkers = ReadEOLMarkers(endStreamWordOffset - 2);
if (numMarkers == 2) {
endStreamWordOffset -= 2;
} else {
numMarkers = ReadEOLMarkers(endStreamWordOffset - 1);
if (numMarkers == 1) {
endStreamWordOffset -= 1;
}
}
if (endStreamWordOffset < GetPos()) {
return -1;
}
return endStreamWordOffset;
}
RetainPtr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
RetainPtr<CPDF_Dictionary> pDict) {
RetainPtr<const CPDF_Number> pLenObj =
ToNumber(pDict->GetDirectObjectFor("Length"));
FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
// Locate the start of stream.
ToNextLine();
const FX_FILESIZE streamStartPos = GetPos();
if (len > 0) {
FX_SAFE_FILESIZE pos = GetPos();
pos += len;
if (!pos.IsValid() || pos.ValueOrDie() >= m_FileLen)
len = -1;
}
RetainPtr<IFX_SeekableReadStream> substream;
if (len > 0) {
// Check data availability first to allow the Validator to request data
// smoothly, without jumps.
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
m_HeaderOffset + GetPos(), len)) {
return nullptr;
}
substream = pdfium::MakeRetain<ReadableSubStream>(
GetValidator(), m_HeaderOffset + GetPos(), len);
SetPos(GetPos() + len);
}
const ByteStringView kEndStreamStr("endstream");
const ByteStringView kEndObjStr("endobj");
// Note, we allow zero length streams as we need to pass them through when we
// are importing pages into a new document.
if (len >= 0) {
CPDF_ReadValidator::ScopedSession read_session(GetValidator());
m_Pos += ReadEOLMarkers(GetPos());
memset(m_WordBuffer.data(), 0, kEndStreamStr.GetLength() + 1);
GetNextWordInternal();
if (GetValidator()->has_read_problems())
return nullptr;
// Earlier version of PDF specification doesn't require EOL marker before
// 'endstream' keyword. If keyword 'endstream' follows the bytes in
// specified length, it signals the end of stream.
if (memcmp(m_WordBuffer.data(), kEndStreamStr.unterminated_unsigned_str(),
kEndStreamStr.GetLength()) != 0) {
substream.Reset();
len = -1;
SetPos(streamStartPos);
}
}
if (len < 0) {
// If len is not available or incorrect, len needs to be calculated
// by searching the keywords "endstream" or "endobj".
const FX_FILESIZE streamEndPos = FindStreamEndPos();
if (streamEndPos < 0)
return nullptr;
len = streamEndPos - streamStartPos;
DCHECK_GE(len, 0);
if (len > 0) {
SetPos(streamStartPos);
// Check data availability first to allow the Validator to request data
// smoothly, without jumps.
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
m_HeaderOffset + GetPos(), len)) {
return nullptr;
}
substream = pdfium::MakeRetain<ReadableSubStream>(
GetValidator(), m_HeaderOffset + GetPos(), len);
SetPos(GetPos() + len);
}
}
RetainPtr<CPDF_Stream> stream;
if (substream) {
// It is unclear from CPDF_SyntaxParser's perspective what object
// `substream` is ultimately holding references to. To avoid unexpectedly
// changing object lifetimes by handing `substream` to `stream`, make a
// copy of the data here.
auto data = FixedSizeDataVector<uint8_t>::Uninit(substream->GetSize());
bool did_read = substream->ReadBlockAtOffset(data.span(), 0);
CHECK(did_read);
auto data_as_stream =
pdfium::MakeRetain<CFX_ReadOnlyVectorStream>(std::move(data));
stream = pdfium::MakeRetain<CPDF_Stream>(std::move(data_as_stream),
std::move(pDict));
} else {
DCHECK(!len);
stream = pdfium::MakeRetain<CPDF_Stream>(std::move(pDict));
}
const FX_FILESIZE end_stream_offset = GetPos();
memset(m_WordBuffer.data(), 0, kEndObjStr.GetLength() + 1);
GetNextWordInternal();
// Allow whitespace after endstream and before a newline.
unsigned char ch = 0;
while (GetNextChar(ch)) {
if (!PDFCharIsWhitespace(ch) || PDFCharIsLineEnding(ch))
break;
}
SetPos(GetPos() - 1);
int numMarkers = ReadEOLMarkers(GetPos());
if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
numMarkers != 0 &&
memcmp(m_WordBuffer.data(), kEndObjStr.unterminated_unsigned_str(),
kEndObjStr.GetLength()) == 0) {
SetPos(end_stream_offset);
}
return stream;
}
uint32_t CPDF_SyntaxParser::GetDirectNum() {
if (GetNextWordInternal() != WordType::kNumber)
return 0;
m_WordBuffer[m_WordSize] = 0;
return FXSYS_atoui(pdfium::as_chars(pdfium::make_span(m_WordBuffer)).data());
}
RetainPtr<CPDF_ReadValidator> CPDF_SyntaxParser::GetValidator() const {
return m_pFileAccess;
}
bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
ByteStringView tag,
bool checkKeyword) {
const uint32_t taglen = tag.GetLength();
bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
!PDFCharIsWhitespace(tag[taglen - 1]);
uint8_t ch;
if (bCheckRight && startpos + static_cast<int32_t>(taglen) <= limit &&
GetCharAt(startpos + static_cast<int32_t>(taglen), ch)) {
if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
(checkKeyword && PDFCharIsDelimiter(ch))) {
return false;
}
}
if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
(checkKeyword && PDFCharIsDelimiter(ch))) {
return false;
}
}
return true;
}
bool CPDF_SyntaxParser::BackwardsSearchToWord(ByteStringView word,
FX_FILESIZE limit) {
int32_t taglen = word.GetLength();
if (taglen == 0)
return false;
FX_FILESIZE pos = m_Pos;
int32_t offset = taglen - 1;
while (true) {
if (limit && pos <= m_Pos - limit)
return false;
uint8_t byte;
if (!GetCharAtBackward(pos, &byte))
return false;
if (byte == word[offset]) {
offset--;
if (offset >= 0) {
pos--;
continue;
}
if (IsWholeWord(pos, limit, word, false)) {
m_Pos = pos;
return true;
}
}
offset = byte == word[taglen - 1] ? taglen - 2 : taglen - 1;
pos--;
if (pos < 0)
return false;
}
}
FX_FILESIZE CPDF_SyntaxParser::FindTag(ByteStringView tag) {
const FX_FILESIZE startpos = GetPos();
const int32_t taglen = tag.GetLength();
DCHECK_GT(taglen, 0);
int32_t match = 0;
while (true) {
uint8_t ch;
if (!GetNextChar(ch))
return -1;
if (ch == tag[match]) {
match++;
if (match == taglen)
return GetPos() - startpos - taglen;
} else {
match = ch == tag[0] ? 1 : 0;
}
}
}
bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
return m_BufOffset <= pos &&
pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
}