M107: Copy data in CPDF_SyntaxParser::ReadStream().
Previously, https://pdfium-review.googlesource.com/97491 changed
CPDF_ObjectStream to avoid copying. CPDF_ObjectStream now holds on to
both the data object, and a data stream that references the data object.
The data stream is expected to be destroyed before the data object, to
avoid dangling pointers.
CPDF_ObjectStream then allows CPDF_SyntaxParser an only
CPDF_SyntaxParser to use the data stream. Sometimes CPDF_SyntaxParser
creates new objects that references the data stream, Thereby violating
the object lifetime requirement that CPDF_ObjectStream was expecting.
Fix this issue by doing a data copy in CPDF_SyntaxParser::ReadStream()
only when needed. This copies less data than the previous strategy where
CPDF_ObjectStream does the copying because:
1) CPDF_SyntaxParser only copies in certain cases.
2) CPDF_SyntaxParser copies a substream, which is oftentimes smaller
than the stream data that CPDF_ObjectStream copies.
This also prevents similar issues from happening when other objects
instantiates CPDF_SyntaxParser.
For the M107 merge, switch to using DataVector, as FixedUninitDataVector
did not make the M107 cut.
Bug: chromium:1364735
Change-Id: Iacf7c3c1ae1a3f97882e52fbacfc68a7ed9d146a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/98091
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
(cherry picked from commit a52ea7f7b8a6ad60e553ccea8c01ec17bde5825b)
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/98152
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
index c3a01cb..473c007 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
@@ -24,6 +24,8 @@
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcrt/autorestorer.h"
+#include "core/fxcrt/cfx_read_only_vector_stream.h"
+#include "core/fxcrt/data_vector.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
#include "third_party/base/check.h"
@@ -722,7 +724,7 @@
len = -1;
}
- RetainPtr<IFX_SeekableReadStream> data;
+ RetainPtr<IFX_SeekableReadStream> substream;
if (len > 0) {
// Check data availability first to allow the Validator to request data
// smoothly, without jumps.
@@ -731,7 +733,7 @@
return nullptr;
}
- data = pdfium::MakeRetain<ReadableSubStream>(
+ substream = pdfium::MakeRetain<ReadableSubStream>(
GetValidator(), m_HeaderOffset + GetPos(), len);
SetPos(GetPos() + len);
}
@@ -754,7 +756,7 @@
// specified length, it signals the end of stream.
if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
kEndStreamStr.GetLength()) != 0) {
- data.Reset();
+ substream.Reset();
len = -1;
SetPos(streamStartPos);
}
@@ -778,16 +780,26 @@
return nullptr;
}
- data = pdfium::MakeRetain<ReadableSubStream>(
+ substream = pdfium::MakeRetain<ReadableSubStream>(
GetValidator(), m_HeaderOffset + GetPos(), len);
SetPos(GetPos() + len);
}
}
RetainPtr<CPDF_Stream> pStream;
- if (data) {
+ if (substream) {
+ // It is unclear from CPDF_SyntaxParser's perspective what object
+ // `substream` is ultimately holding references to. To avoid unexpectedly
+ // changing object lifetimes by handing `substream` to `pStream`, make a
+ // copy of the data here.
+ DataVector<uint8_t> data(substream->GetSize());
+ bool did_read = substream->ReadBlockAtOffset(data.data(), 0, data.size());
+ CHECK(did_read);
+ auto data_as_stream =
+ pdfium::MakeRetain<CFX_ReadOnlyVectorStream>(std::move(data));
+
pStream = pdfium::MakeRetain<CPDF_Stream>();
- pStream->InitStreamFromFile(std::move(data), std::move(pDict));
+ pStream->InitStreamFromFile(std::move(data_as_stream), std::move(pDict));
} else {
DCHECK(!len);
pStream = pdfium::MakeRetain<CPDF_Stream>(pdfium::span<const uint8_t>(),