core/fpdfapi/page/cpdf_streamparser.cpp - pdfium - Git at Google

 // Copyright 2016 PDFium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

 #include "core/fpdfapi/page/cpdf_streamparser.h"

 #include <algorithm>
 #include <memory>
 #include <sstream>
 #include <utility>

 #include "constants/stream_dict_common.h"
 #include "core/fpdfapi/page/cpdf_docpagedata.h"
 #include "core/fpdfapi/parser/cpdf_array.h"
 #include "core/fpdfapi/parser/cpdf_boolean.h"
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_name.h"
 #include "core/fpdfapi/parser/cpdf_null.h"
 #include "core/fpdfapi/parser/cpdf_number.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_string.h"
 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
 #include "core/fxcodec/fx_codec.h"
 #include "core/fxcodec/jpeg/jpegmodule.h"
 #include "core/fxcodec/scanlinedecoder.h"
 #include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_memory_wrappers.h"
 #include "core/fxcrt/fx_safe_types.h"
 #include "third_party/base/check.h"

 namespace {

 const uint32_t kMaxNestedParsingLevel = 512;
 const size_t kMaxStringLength = 32767;

 const char kTrue[] = "true";
 const char kFalse[] = "false";
 const char kNull[] = "null";

 uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
   if (!pDecoder)
     return FX_INVALID_OFFSET;

   int ncomps = pDecoder->CountComps();
   int bpc = pDecoder->GetBPC();
   int width = pDecoder->GetWidth();
   int height = pDecoder->GetHeight();
   if (width <= 0 || height <= 0)
     return FX_INVALID_OFFSET;

   FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, ncomps, width);
   size *= height;
   if (size.ValueOrDefault(0) == 0)
     return FX_INVALID_OFFSET;

   for (int row = 0; row < height; ++row) {
     if (!pDecoder->GetScanline(row))
       break;
   }
   return pDecoder->GetSrcOffset();
 }

 uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span,
                             int width,
                             int height,
                             const ByteString& decoder,
                             const CPDF_Dictionary* pParam,
                             uint32_t orig_size) {
   // |decoder| should not be an abbreviation.
   DCHECK(decoder != "A85");
   DCHECK(decoder != "AHx");
   DCHECK(decoder != "CCF");
   DCHECK(decoder != "DCT");
   DCHECK(decoder != "Fl");
   DCHECK(decoder != "LZW");
   DCHECK(decoder != "RL");

   std::unique_ptr<uint8_t, FxFreeDeleter> ignored_result;
   uint32_t ignored_size;
   if (decoder == "FlateDecode") {
     return FlateOrLZWDecode(false, src_span, pParam, orig_size, &ignored_result,
                             &ignored_size);
   }
   if (decoder == "LZWDecode") {
     return FlateOrLZWDecode(true, src_span, pParam, 0, &ignored_result,
                             &ignored_size);
   }
   if (decoder == "DCTDecode") {
     std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder(
         src_span, width, height, 0,
         !pParam || pParam->GetIntegerFor("ColorTransform", 1));
     return DecodeAllScanlines(std::move(pDecoder));
   }
   if (decoder == "CCITTFaxDecode") {
     std::unique_ptr<ScanlineDecoder> pDecoder =
         CreateFaxDecoder(src_span, width, height, pParam);
     return DecodeAllScanlines(std::move(pDecoder));
   }

   if (decoder == "ASCII85Decode")
     return A85Decode(src_span, &ignored_result, &ignored_size);
   if (decoder == "ASCIIHexDecode")
     return HexDecode(src_span, &ignored_result, &ignored_size);
   if (decoder == "RunLengthDecode")
     return RunLengthDecode(src_span, &ignored_result, &ignored_size);

   return FX_INVALID_OFFSET;
 }

 }  // namespace

 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span)
     : m_pBuf(span) {}

 CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span,
                                      const WeakPtr<ByteStringPool>& pPool)
     : m_pPool(pPool), m_pBuf(span) {}

 CPDF_StreamParser::~CPDF_StreamParser() = default;

 RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
     CPDF_Document* pDoc,
     RetainPtr<CPDF_Dictionary> pDict,
     const CPDF_Object* pCSObj) {
   if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
     m_Pos++;

   if (m_Pos == m_pBuf.size())
     return nullptr;

   ByteString decoder;
   const CPDF_Dictionary* pParam = nullptr;
   CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
   if (pFilter) {
     const CPDF_Array* pArray = pFilter->AsArray();
     if (pArray) {
       decoder = pArray->GetStringAt(0);
       const CPDF_Array* pParams =
           pDict->GetArrayFor(pdfium::stream::kDecodeParms);
       if (pParams)
         pParam = pParams->GetDictAt(0);
     } else {
       decoder = pFilter->GetString();
       pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms);
     }
   }
   uint32_t width = pDict->GetIntegerFor("Width");
   uint32_t height = pDict->GetIntegerFor("Height");
   uint32_t bpc = 1;
   uint32_t nComponents = 1;
   if (pCSObj) {
     RetainPtr<CPDF_ColorSpace> pCS =
         CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr);
     nComponents = pCS ? pCS->CountComponents() : 3;
     bpc = pDict->GetIntegerFor("BitsPerComponent");
   }
   FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, nComponents, width);
   size *= height;
   if (!size.IsValid())
     return nullptr;

   uint32_t dwOrigSize = size.ValueOrDie();
   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
   uint32_t dwStreamSize;
   if (decoder.IsEmpty()) {
     dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
     pData.reset(FX_AllocUninit(uint8_t, dwOrigSize));
     auto copy_span = m_pBuf.subspan(m_Pos, dwOrigSize);
     memcpy(pData.get(), copy_span.data(), copy_span.size());
     dwStreamSize = dwOrigSize;
     m_Pos += dwOrigSize;
   } else {
     dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
                                       decoder, pParam, dwOrigSize);
     if (!pdfium::base::IsValueInRangeForNumericType<int>(dwStreamSize))
       return nullptr;

     uint32_t dwSavePos = m_Pos;
     m_Pos += dwStreamSize;
     while (1) {
       uint32_t dwPrevPos = m_Pos;
       CPDF_StreamParser::SyntaxType type = ParseNextElement();
       if (type == CPDF_StreamParser::EndOfData)
         break;

       if (type != CPDF_StreamParser::Keyword) {
         dwStreamSize += m_Pos - dwPrevPos;
         continue;
       }
       if (GetWord() == "EI") {
         m_Pos = dwPrevPos;
         break;
       }
       dwStreamSize += m_Pos - dwPrevPos;
     }
     m_Pos = dwSavePos;
     pData.reset(FX_AllocUninit(uint8_t, dwStreamSize));
     auto copy_span = m_pBuf.subspan(m_Pos, dwStreamSize);
     memcpy(pData.get(), copy_span.data(), copy_span.size());
     m_Pos += dwStreamSize;
   }
   pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
   return pdfium::MakeRetain<CPDF_Stream>(std::move(pData), dwStreamSize,
                                          std::move(pDict));
 }

 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
   m_pLastObj.Reset();
   m_WordSize = 0;
   if (!PositionIsInBounds())
     return EndOfData;

   uint8_t ch = m_pBuf[m_Pos++];
   while (1) {
     while (PDFCharIsWhitespace(ch)) {
       if (!PositionIsInBounds())
         return EndOfData;

       ch = m_pBuf[m_Pos++];
     }

     if (ch != '%')
       break;

     while (1) {
       if (!PositionIsInBounds())
         return EndOfData;

       ch = m_pBuf[m_Pos++];
       if (PDFCharIsLineEnding(ch))
         break;
     }
   }

   if (PDFCharIsDelimiter(ch) && ch != '/') {
     m_Pos--;
     m_pLastObj = ReadNextObject(false, false, 0);
     return Others;
   }

   bool bIsNumber = true;
   while (1) {
     if (m_WordSize < kMaxWordLength)
       m_WordBuffer[m_WordSize++] = ch;

     if (!PDFCharIsNumeric(ch))
       bIsNumber = false;

     if (!PositionIsInBounds())
       break;

     ch = m_pBuf[m_Pos++];

     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
       m_Pos--;
       break;
     }
   }

   m_WordBuffer[m_WordSize] = 0;
   if (bIsNumber)
     return Number;

   if (m_WordBuffer[0] == '/')
     return Name;

   if (m_WordSize == 4) {
     if (WordBufferMatches(kTrue)) {
       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true);
       return Others;
     }
     if (WordBufferMatches(kNull)) {
       m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
       return Others;
     }
   } else if (m_WordSize == 5) {
     if (WordBufferMatches(kFalse)) {
       m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false);
       return Others;
     }
   }
   return Keyword;
 }

 RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
     bool bAllowNestedArray,
     bool bInArray,
     uint32_t dwRecursionLevel) {
   bool bIsNumber;
   // Must get the next word before returning to avoid infinite loops.
   GetNextWord(bIsNumber);
   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
     return nullptr;

   if (bIsNumber) {
     m_WordBuffer[m_WordSize] = 0;
     return pdfium::MakeRetain<CPDF_Number>(
         ByteStringView(m_WordBuffer, m_WordSize));
   }

   int first_char = m_WordBuffer[0];
   if (first_char == '/') {
     ByteString name =
         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
     return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
   }

   if (first_char == '(') {
     ByteString str = ReadString();
     return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
   }

   if (first_char == '<') {
     if (m_WordSize == 1)
       return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(), true);

     auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
     while (1) {
       GetNextWord(bIsNumber);
       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
         break;

       if (!m_WordSize || m_WordBuffer[0] != '/')
         return nullptr;

       ByteString key =
           PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
       RetainPtr<CPDF_Object> pObj =
           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
       if (!pObj)
         return nullptr;

       if (!key.IsEmpty())
         pDict->SetFor(key, std::move(pObj));
     }
     return pDict;
   }

   if (first_char == '[') {
     if ((!bAllowNestedArray && bInArray))
       return nullptr;

     auto pArray = pdfium::MakeRetain<CPDF_Array>();
     while (1) {
       RetainPtr<CPDF_Object> pObj =
           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
       if (pObj) {
         pArray->Append(std::move(pObj));
         continue;
       }
       if (!m_WordSize || m_WordBuffer[0] == ']')
         break;
     }
     return pArray;
   }

   if (WordBufferMatches(kFalse))
     return pdfium::MakeRetain<CPDF_Boolean>(false);
   if (WordBufferMatches(kTrue))
     return pdfium::MakeRetain<CPDF_Boolean>(true);
   if (WordBufferMatches(kNull))
     return pdfium::MakeRetain<CPDF_Null>();
   return nullptr;
 }

 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
   m_WordSize = 0;
   bIsNumber = true;
   if (!PositionIsInBounds())
     return;

   uint8_t ch = m_pBuf[m_Pos++];
   while (1) {
     while (PDFCharIsWhitespace(ch)) {
       if (!PositionIsInBounds()) {
         return;
       }
       ch = m_pBuf[m_Pos++];
     }

     if (ch != '%')
       break;

     while (1) {
       if (!PositionIsInBounds())
         return;
       ch = m_pBuf[m_Pos++];
       if (PDFCharIsLineEnding(ch))
         break;
     }
   }

   if (PDFCharIsDelimiter(ch)) {
     bIsNumber = false;
     m_WordBuffer[m_WordSize++] = ch;
     if (ch == '/') {
       while (1) {
         if (!PositionIsInBounds())
           return;
         ch = m_pBuf[m_Pos++];
         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
           m_Pos--;
           return;
         }
         if (m_WordSize < kMaxWordLength)
           m_WordBuffer[m_WordSize++] = ch;
       }
     } else if (ch == '<') {
       if (!PositionIsInBounds())
         return;
       ch = m_pBuf[m_Pos++];
       if (ch == '<')
         m_WordBuffer[m_WordSize++] = ch;
       else
         m_Pos--;
     } else if (ch == '>') {
       if (!PositionIsInBounds())
         return;
       ch = m_pBuf[m_Pos++];
       if (ch == '>')
         m_WordBuffer[m_WordSize++] = ch;
       else
         m_Pos--;
     }
     return;
   }

   while (1) {
     if (m_WordSize < kMaxWordLength)
       m_WordBuffer[m_WordSize++] = ch;
     if (!PDFCharIsNumeric(ch))
       bIsNumber = false;
     if (!PositionIsInBounds())
       return;

     ch = m_pBuf[m_Pos++];
     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
       m_Pos--;
       break;
     }
   }
 }

 ByteString CPDF_StreamParser::ReadString() {
   if (!PositionIsInBounds())
     return ByteString();

   uint8_t ch = m_pBuf[m_Pos++];
   std::ostringstream buf;
   int parlevel = 0;
   int status = 0;
   int iEscCode = 0;
   while (1) {
     switch (status) {
       case 0:
         if (ch == ')') {
           if (parlevel == 0) {
             if (buf.tellp() <= 0)
               return ByteString();

             return ByteString(
                 buf.str().c_str(),
                 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
           }
           parlevel--;
           buf << ')';
         } else if (ch == '(') {
           parlevel++;
           buf << '(';
         } else if (ch == '\\') {
           status = 1;
         } else {
           buf << static_cast<char>(ch);
         }
         break;
       case 1:
         if (FXSYS_IsOctalDigit(ch)) {
           iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
           status = 2;
           break;
         }
         if (ch == '\r') {
           status = 4;
           break;
         }
         if (ch == '\n') {
           // Do nothing.
         } else if (ch == 'n') {
           buf << '\n';
         } else if (ch == 'r') {
           buf << '\r';
         } else if (ch == 't') {
           buf << '\t';
         } else if (ch == 'b') {
           buf << '\b';
         } else if (ch == 'f') {
           buf << '\f';
         } else {
           buf << static_cast<char>(ch);
         }
         status = 0;
         break;
       case 2:
         if (FXSYS_IsOctalDigit(ch)) {
           iEscCode =
               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
           status = 3;
         } else {
           buf << static_cast<char>(iEscCode);
           status = 0;
           continue;
         }
         break;
       case 3:
         if (FXSYS_IsOctalDigit(ch)) {
           iEscCode =
               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
           buf << static_cast<char>(iEscCode);
           status = 0;
         } else {
           buf << static_cast<char>(iEscCode);
           status = 0;
           continue;
         }
         break;
       case 4:
         status = 0;
         if (ch != '\n')
           continue;
         break;
     }
     if (!PositionIsInBounds())
       break;

     ch = m_pBuf[m_Pos++];
   }
   if (PositionIsInBounds())
     ++m_Pos;

   if (buf.tellp() <= 0)
     return ByteString();

   return ByteString(
       buf.str().c_str(),
       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
 }

 ByteString CPDF_StreamParser::ReadHexString() {
   if (!PositionIsInBounds())
     return ByteString();

   std::ostringstream buf;
   bool bFirst = true;
   int code = 0;
   while (PositionIsInBounds()) {
     uint8_t ch = m_pBuf[m_Pos++];
     if (ch == '>')
       break;

     if (!std::isxdigit(ch))
       continue;

     int val = FXSYS_HexCharToInt(ch);
     if (bFirst) {
       code = val * 16;
     } else {
       code += val;
       buf << static_cast<uint8_t>(code);
     }
     bFirst = !bFirst;
   }
   if (!bFirst)
     buf << static_cast<char>(code);

   if (buf.tellp() <= 0)
     return ByteString();

   return ByteString(
       buf.str().c_str(),
       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
 }

 bool CPDF_StreamParser::PositionIsInBounds() const {
   return m_Pos < m_pBuf.size();
 }

 bool CPDF_StreamParser::WordBufferMatches(const char* pWord) const {
   const size_t iLength = strlen(pWord);
   return m_WordSize == iLength && memcmp(m_WordBuffer, pWord, iLength) == 0;
 }
	// Copyright 2016 PDFium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

	#include "core/fpdfapi/page/cpdf_streamparser.h"

	#include <algorithm>
	#include <memory>
	#include <sstream>
	#include <utility>

	#include "constants/stream_dict_common.h"
	#include "core/fpdfapi/page/cpdf_docpagedata.h"
	#include "core/fpdfapi/parser/cpdf_array.h"
	#include "core/fpdfapi/parser/cpdf_boolean.h"
	#include "core/fpdfapi/parser/cpdf_dictionary.h"
	#include "core/fpdfapi/parser/cpdf_name.h"
	#include "core/fpdfapi/parser/cpdf_null.h"
	#include "core/fpdfapi/parser/cpdf_number.h"
	#include "core/fpdfapi/parser/cpdf_stream.h"
	#include "core/fpdfapi/parser/cpdf_string.h"
	#include "core/fpdfapi/parser/fpdf_parser_decode.h"
	#include "core/fpdfapi/parser/fpdf_parser_utility.h"
	#include "core/fxcodec/fx_codec.h"
	#include "core/fxcodec/jpeg/jpegmodule.h"
	#include "core/fxcodec/scanlinedecoder.h"
	#include "core/fxcrt/fx_extension.h"
	#include "core/fxcrt/fx_memory_wrappers.h"
	#include "core/fxcrt/fx_safe_types.h"
	#include "third_party/base/check.h"

	namespace {

	const uint32_t kMaxNestedParsingLevel = 512;
	const size_t kMaxStringLength = 32767;

	const char kTrue[] = "true";
	const char kFalse[] = "false";
	const char kNull[] = "null";

	uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
	if (!pDecoder)
	return FX_INVALID_OFFSET;

	int ncomps = pDecoder->CountComps();
	int bpc = pDecoder->GetBPC();
	int width = pDecoder->GetWidth();
	int height = pDecoder->GetHeight();
	if (width <= 0 \|\| height <= 0)
	return FX_INVALID_OFFSET;

	FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, ncomps, width);
	size *= height;
	if (size.ValueOrDefault(0) == 0)
	return FX_INVALID_OFFSET;

	for (int row = 0; row < height; ++row) {
	if (!pDecoder->GetScanline(row))
	break;
	}
	return pDecoder->GetSrcOffset();
	}

	uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span,
	int width,
	int height,
	const ByteString& decoder,
	const CPDF_Dictionary* pParam,
	uint32_t orig_size) {
	// \|decoder\| should not be an abbreviation.
	DCHECK(decoder != "A85");
	DCHECK(decoder != "AHx");
	DCHECK(decoder != "CCF");
	DCHECK(decoder != "DCT");
	DCHECK(decoder != "Fl");
	DCHECK(decoder != "LZW");
	DCHECK(decoder != "RL");

	std::unique_ptr<uint8_t, FxFreeDeleter> ignored_result;
	uint32_t ignored_size;
	if (decoder == "FlateDecode") {
	return FlateOrLZWDecode(false, src_span, pParam, orig_size, &ignored_result,
	&ignored_size);
	}
	if (decoder == "LZWDecode") {
	return FlateOrLZWDecode(true, src_span, pParam, 0, &ignored_result,
	&ignored_size);
	}
	if (decoder == "DCTDecode") {
	std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder(
	src_span, width, height, 0,
	!pParam \|\| pParam->GetIntegerFor("ColorTransform", 1));
	return DecodeAllScanlines(std::move(pDecoder));
	}
	if (decoder == "CCITTFaxDecode") {
	std::unique_ptr<ScanlineDecoder> pDecoder =
	CreateFaxDecoder(src_span, width, height, pParam);
	return DecodeAllScanlines(std::move(pDecoder));
	}

	if (decoder == "ASCII85Decode")
	return A85Decode(src_span, &ignored_result, &ignored_size);
	if (decoder == "ASCIIHexDecode")
	return HexDecode(src_span, &ignored_result, &ignored_size);
	if (decoder == "RunLengthDecode")
	return RunLengthDecode(src_span, &ignored_result, &ignored_size);

	return FX_INVALID_OFFSET;
	}

	} // namespace

	CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span)
	: m_pBuf(span) {}

	CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span,
	const WeakPtr<ByteStringPool>& pPool)
	: m_pPool(pPool), m_pBuf(span) {}

	CPDF_StreamParser::~CPDF_StreamParser() = default;

	RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
	CPDF_Document* pDoc,
	RetainPtr<CPDF_Dictionary> pDict,
	const CPDF_Object* pCSObj) {
	if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
	m_Pos++;

	if (m_Pos == m_pBuf.size())
	return nullptr;

	ByteString decoder;
	const CPDF_Dictionary* pParam = nullptr;
	CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
	if (pFilter) {
	const CPDF_Array* pArray = pFilter->AsArray();
	if (pArray) {
	decoder = pArray->GetStringAt(0);
	const CPDF_Array* pParams =
	pDict->GetArrayFor(pdfium::stream::kDecodeParms);
	if (pParams)
	pParam = pParams->GetDictAt(0);
	} else {
	decoder = pFilter->GetString();
	pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms);
	}
	}
	uint32_t width = pDict->GetIntegerFor("Width");
	uint32_t height = pDict->GetIntegerFor("Height");
	uint32_t bpc = 1;
	uint32_t nComponents = 1;
	if (pCSObj) {
	RetainPtr<CPDF_ColorSpace> pCS =
	CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr);
	nComponents = pCS ? pCS->CountComponents() : 3;
	bpc = pDict->GetIntegerFor("BitsPerComponent");
	}
	FX_SAFE_UINT32 size = fxcodec::CalculatePitch8(bpc, nComponents, width);
	size *= height;
	if (!size.IsValid())
	return nullptr;

	uint32_t dwOrigSize = size.ValueOrDie();
	std::unique_ptr<uint8_t, FxFreeDeleter> pData;
	uint32_t dwStreamSize;
	if (decoder.IsEmpty()) {
	dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
	pData.reset(FX_AllocUninit(uint8_t, dwOrigSize));
	auto copy_span = m_pBuf.subspan(m_Pos, dwOrigSize);
	memcpy(pData.get(), copy_span.data(), copy_span.size());
	dwStreamSize = dwOrigSize;
	m_Pos += dwOrigSize;
	} else {
	dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
	decoder, pParam, dwOrigSize);
	if (!pdfium::base::IsValueInRangeForNumericType<int>(dwStreamSize))
	return nullptr;

	uint32_t dwSavePos = m_Pos;
	m_Pos += dwStreamSize;
	while (1) {
	uint32_t dwPrevPos = m_Pos;
	CPDF_StreamParser::SyntaxType type = ParseNextElement();
	if (type == CPDF_StreamParser::EndOfData)
	break;

	if (type != CPDF_StreamParser::Keyword) {
	dwStreamSize += m_Pos - dwPrevPos;
	continue;
	}
	if (GetWord() == "EI") {
	m_Pos = dwPrevPos;
	break;
	}
	dwStreamSize += m_Pos - dwPrevPos;
	}
	m_Pos = dwSavePos;
	pData.reset(FX_AllocUninit(uint8_t, dwStreamSize));
	auto copy_span = m_pBuf.subspan(m_Pos, dwStreamSize);
	memcpy(pData.get(), copy_span.data(), copy_span.size());
	m_Pos += dwStreamSize;
	}
	pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
	return pdfium::MakeRetain<CPDF_Stream>(std::move(pData), dwStreamSize,
	std::move(pDict));
	}

	CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
	m_pLastObj.Reset();
	m_WordSize = 0;
	if (!PositionIsInBounds())
	return EndOfData;

	uint8_t ch = m_pBuf[m_Pos++];
	while (1) {
	while (PDFCharIsWhitespace(ch)) {
	if (!PositionIsInBounds())
	return EndOfData;

	ch = m_pBuf[m_Pos++];
	}

	if (ch != '%')
	break;

	while (1) {
	if (!PositionIsInBounds())
	return EndOfData;

	ch = m_pBuf[m_Pos++];
	if (PDFCharIsLineEnding(ch))
	break;
	}
	}

	if (PDFCharIsDelimiter(ch) && ch != '/') {
	m_Pos--;
	m_pLastObj = ReadNextObject(false, false, 0);
	return Others;
	}

	bool bIsNumber = true;
	while (1) {
	if (m_WordSize < kMaxWordLength)
	m_WordBuffer[m_WordSize++] = ch;

	if (!PDFCharIsNumeric(ch))
	bIsNumber = false;

	if (!PositionIsInBounds())
	break;

	ch = m_pBuf[m_Pos++];

	if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {
	m_Pos--;
	break;
	}
	}

	m_WordBuffer[m_WordSize] = 0;
	if (bIsNumber)
	return Number;

	if (m_WordBuffer[0] == '/')
	return Name;

	if (m_WordSize == 4) {
	if (WordBufferMatches(kTrue)) {
	m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true);
	return Others;
	}
	if (WordBufferMatches(kNull)) {
	m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
	return Others;
	}
	} else if (m_WordSize == 5) {
	if (WordBufferMatches(kFalse)) {
	m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false);
	return Others;
	}
	}
	return Keyword;
	}

	RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
	bool bAllowNestedArray,
	bool bInArray,
	uint32_t dwRecursionLevel) {
	bool bIsNumber;
	// Must get the next word before returning to avoid infinite loops.
	GetNextWord(bIsNumber);
	if (!m_WordSize \|\| dwRecursionLevel > kMaxNestedParsingLevel)
	return nullptr;

	if (bIsNumber) {
	m_WordBuffer[m_WordSize] = 0;
	return pdfium::MakeRetain<CPDF_Number>(
	ByteStringView(m_WordBuffer, m_WordSize));
	}

	int first_char = m_WordBuffer[0];
	if (first_char == '/') {
	ByteString name =
	PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
	return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
	}

	if (first_char == '(') {
	ByteString str = ReadString();
	return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
	}

	if (first_char == '<') {
	if (m_WordSize == 1)
	return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(), true);

	auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
	while (1) {
	GetNextWord(bIsNumber);
	if (m_WordSize == 2 && m_WordBuffer[0] == '>')
	break;

	if (!m_WordSize \|\| m_WordBuffer[0] != '/')
	return nullptr;

	ByteString key =
	PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
	RetainPtr<CPDF_Object> pObj =
	ReadNextObject(true, bInArray, dwRecursionLevel + 1);
	if (!pObj)
	return nullptr;

	if (!key.IsEmpty())
	pDict->SetFor(key, std::move(pObj));
	}
	return pDict;
	}

	if (first_char == '[') {
	if ((!bAllowNestedArray && bInArray))
	return nullptr;

	auto pArray = pdfium::MakeRetain<CPDF_Array>();
	while (1) {
	RetainPtr<CPDF_Object> pObj =
	ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
	if (pObj) {
	pArray->Append(std::move(pObj));
	continue;
	}
	if (!m_WordSize \|\| m_WordBuffer[0] == ']')
	break;
	}
	return pArray;
	}

	if (WordBufferMatches(kFalse))
	return pdfium::MakeRetain<CPDF_Boolean>(false);
	if (WordBufferMatches(kTrue))
	return pdfium::MakeRetain<CPDF_Boolean>(true);
	if (WordBufferMatches(kNull))
	return pdfium::MakeRetain<CPDF_Null>();
	return nullptr;
	}

	// TODO(npm): the following methods are almost identical in cpdf_syntaxparser
	void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
	m_WordSize = 0;
	bIsNumber = true;
	if (!PositionIsInBounds())
	return;

	uint8_t ch = m_pBuf[m_Pos++];
	while (1) {
	while (PDFCharIsWhitespace(ch)) {
	if (!PositionIsInBounds()) {
	return;
	}
	ch = m_pBuf[m_Pos++];
	}

	if (ch != '%')
	break;

	while (1) {
	if (!PositionIsInBounds())
	return;
	ch = m_pBuf[m_Pos++];
	if (PDFCharIsLineEnding(ch))
	break;
	}
	}

	if (PDFCharIsDelimiter(ch)) {
	bIsNumber = false;
	m_WordBuffer[m_WordSize++] = ch;
	if (ch == '/') {
	while (1) {
	if (!PositionIsInBounds())
	return;
	ch = m_pBuf[m_Pos++];
	if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
	m_Pos--;
	return;
	}
	if (m_WordSize < kMaxWordLength)
	m_WordBuffer[m_WordSize++] = ch;
	}
	} else if (ch == '<') {
	if (!PositionIsInBounds())
	return;
	ch = m_pBuf[m_Pos++];
	if (ch == '<')
	m_WordBuffer[m_WordSize++] = ch;
	else
	m_Pos--;
	} else if (ch == '>') {
	if (!PositionIsInBounds())
	return;
	ch = m_pBuf[m_Pos++];
	if (ch == '>')
	m_WordBuffer[m_WordSize++] = ch;
	else
	m_Pos--;
	}
	return;
	}

	while (1) {
	if (m_WordSize < kMaxWordLength)
	m_WordBuffer[m_WordSize++] = ch;
	if (!PDFCharIsNumeric(ch))
	bIsNumber = false;
	if (!PositionIsInBounds())
	return;

	ch = m_pBuf[m_Pos++];
	if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {
	m_Pos--;
	break;
	}
	}
	}

	ByteString CPDF_StreamParser::ReadString() {
	if (!PositionIsInBounds())
	return ByteString();

	uint8_t ch = m_pBuf[m_Pos++];
	std::ostringstream buf;
	int parlevel = 0;
	int status = 0;
	int iEscCode = 0;
	while (1) {
	switch (status) {
	case 0:
	if (ch == ')') {
	if (parlevel == 0) {
	if (buf.tellp() <= 0)
	return ByteString();

	return ByteString(
	buf.str().c_str(),
	std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
	}
	parlevel--;
	buf << ')';
	} else if (ch == '(') {
	parlevel++;
	buf << '(';
	} else if (ch == '\\') {
	status = 1;
	} else {
	buf << static_cast<char>(ch);
	}
	break;
	case 1:
	if (FXSYS_IsOctalDigit(ch)) {
	iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
	status = 2;
	break;
	}
	if (ch == '\r') {
	status = 4;
	break;
	}
	if (ch == '\n') {
	// Do nothing.
	} else if (ch == 'n') {
	buf << '\n';
	} else if (ch == 'r') {
	buf << '\r';
	} else if (ch == 't') {
	buf << '\t';
	} else if (ch == 'b') {
	buf << '\b';
	} else if (ch == 'f') {
	buf << '\f';
	} else {
	buf << static_cast<char>(ch);
	}
	status = 0;
	break;
	case 2:
	if (FXSYS_IsOctalDigit(ch)) {
	iEscCode =
	iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
	status = 3;
	} else {
	buf << static_cast<char>(iEscCode);
	status = 0;
	continue;
	}
	break;
	case 3:
	if (FXSYS_IsOctalDigit(ch)) {
	iEscCode =
	iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
	buf << static_cast<char>(iEscCode);
	status = 0;
	} else {
	buf << static_cast<char>(iEscCode);
	status = 0;
	continue;
	}
	break;
	case 4:
	status = 0;
	if (ch != '\n')
	continue;
	break;
	}
	if (!PositionIsInBounds())
	break;

	ch = m_pBuf[m_Pos++];
	}
	if (PositionIsInBounds())
	++m_Pos;

	if (buf.tellp() <= 0)
	return ByteString();

	return ByteString(
	buf.str().c_str(),
	std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
	}

	ByteString CPDF_StreamParser::ReadHexString() {
	if (!PositionIsInBounds())
	return ByteString();

	std::ostringstream buf;
	bool bFirst = true;
	int code = 0;
	while (PositionIsInBounds()) {
	uint8_t ch = m_pBuf[m_Pos++];
	if (ch == '>')
	break;

	if (!std::isxdigit(ch))
	continue;

	int val = FXSYS_HexCharToInt(ch);
	if (bFirst) {
	code = val * 16;
	} else {
	code += val;
	buf << static_cast<uint8_t>(code);
	}
	bFirst = !bFirst;
	}
	if (!bFirst)
	buf << static_cast<char>(code);

	if (buf.tellp() <= 0)
	return ByteString();

	return ByteString(
	buf.str().c_str(),
	std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
	}

	bool CPDF_StreamParser::PositionIsInBounds() const {
	return m_Pos < m_pBuf.size();
	}

	bool CPDF_StreamParser::WordBufferMatches(const char* pWord) const {
	const size_t iLength = strlen(pWord);
	return m_WordSize == iLength && memcmp(m_WordBuffer, pWord, iLength) == 0;
	}