| // Copyright 2015 The PDFium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <algorithm> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "core/fxcrt/bytestring.h" |
| #include "core/fxcrt/compiler_specific.h" |
| #include "core/fxcrt/numerics/safe_conversions.h" |
| #include "core/fxcrt/span_util.h" |
| #include "public/fpdf_doc.h" |
| #include "public/fpdfview.h" |
| #include "testing/embedder_test.h" |
| #include "testing/fx_string_testhelpers.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| #include "testing/range_set.h" |
| #include "testing/utils/file_util.h" |
| #include "testing/utils/path_service.h" |
| |
| namespace { |
| |
| class MockDownloadHints final : public FX_DOWNLOADHINTS { |
| public: |
| static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { |
| } |
| |
| MockDownloadHints() { |
| FX_DOWNLOADHINTS::version = 1; |
| FX_DOWNLOADHINTS::AddSegment = SAddSegment; |
| } |
| |
| ~MockDownloadHints() = default; |
| }; |
| |
| class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL { |
| public: |
| explicit TestAsyncLoader(const std::string& file_name) { |
| std::string file_path = PathService::GetTestFilePath(file_name); |
| if (file_path.empty()) { |
| return; |
| } |
| file_contents_ = GetFileContents(file_path.c_str()); |
| if (file_contents_.empty()) { |
| return; |
| } |
| |
| file_access_.m_FileLen = |
| pdfium::checked_cast<unsigned long>(file_contents_.size()); |
| file_access_.m_GetBlock = SGetBlock; |
| file_access_.m_Param = this; |
| |
| FX_DOWNLOADHINTS::version = 1; |
| FX_DOWNLOADHINTS::AddSegment = SAddSegment; |
| |
| FX_FILEAVAIL::version = 1; |
| FX_FILEAVAIL::IsDataAvail = SIsDataAvail; |
| } |
| |
| bool IsOpened() const { return !file_contents_.empty(); } |
| |
| FPDF_FILEACCESS* file_access() { return &file_access_; } |
| FX_DOWNLOADHINTS* hints() { return this; } |
| FX_FILEAVAIL* file_avail() { return this; } |
| |
| const std::vector<std::pair<size_t, size_t>>& requested_segments() const { |
| return requested_segments_; |
| } |
| |
| size_t max_requested_bound() const { return max_requested_bound_; } |
| |
| void ClearRequestedSegments() { |
| requested_segments_.clear(); |
| max_requested_bound_ = 0; |
| } |
| |
| bool is_new_data_available() const { return is_new_data_available_; } |
| void set_is_new_data_available(bool is_new_data_available) { |
| is_new_data_available_ = is_new_data_available; |
| } |
| |
| size_t max_already_available_bound() const { |
| return available_ranges_.IsEmpty() |
| ? 0 |
| : available_ranges_.ranges().rbegin()->second; |
| } |
| |
| void FlushRequestedData() { |
| for (const auto& it : requested_segments_) { |
| SetDataAvailable(it.first, it.second); |
| } |
| ClearRequestedSegments(); |
| } |
| |
| pdfium::span<const uint8_t> file_contents() const { return file_contents_; } |
| pdfium::span<uint8_t> mutable_file_contents() { return file_contents_; } |
| |
| private: |
| void SetDataAvailable(size_t start, size_t size) { |
| available_ranges_.Union(RangeSet::Range(start, start + size)); |
| } |
| |
| bool CheckDataAlreadyAvailable(size_t start, size_t size) const { |
| return available_ranges_.Contains(RangeSet::Range(start, start + size)); |
| } |
| |
| int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) { |
| if (!IsDataAvailImpl(pos, size)) |
| return 0; |
| const unsigned long end = std::min( |
| pdfium::checked_cast<unsigned long>(file_contents_.size()), pos + size); |
| if (end <= pos) |
| return 0; |
| const unsigned long bytes_to_copy = end - pos; |
| fxcrt::spancpy(UNSAFE_TODO(pdfium::make_span(pBuf, size)), |
| file_contents().subspan(pos, bytes_to_copy)); |
| SetDataAvailable(pos, bytes_to_copy); |
| return static_cast<int>(bytes_to_copy); |
| } |
| |
| void AddSegmentImpl(size_t offset, size_t size) { |
| requested_segments_.emplace_back(offset, size); |
| max_requested_bound_ = std::max(max_requested_bound_, offset + size); |
| } |
| |
| bool IsDataAvailImpl(size_t offset, size_t size) { |
| if (offset + size > file_contents_.size()) { |
| return false; |
| } |
| if (is_new_data_available_) { |
| SetDataAvailable(offset, size); |
| return true; |
| } |
| return CheckDataAlreadyAvailable(offset, size); |
| } |
| |
| static int SGetBlock(void* param, |
| unsigned long pos, |
| unsigned char* pBuf, |
| unsigned long size) { |
| return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size); |
| } |
| |
| static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { |
| return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size); |
| } |
| |
| static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis, |
| size_t offset, |
| size_t size) { |
| return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size); |
| } |
| |
| FPDF_FILEACCESS file_access_; |
| |
| std::vector<uint8_t> file_contents_; |
| std::vector<std::pair<size_t, size_t>> requested_segments_; |
| size_t max_requested_bound_ = 0; |
| bool is_new_data_available_ = true; |
| |
| RangeSet available_ranges_; |
| }; |
| |
| } // namespace |
| |
| class FPDFDataAvailEmbedderTest : public EmbedderTest {}; |
| |
| TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) { |
| // Document must load without crashing but is too malformed to be available. |
| EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf")); |
| MockDownloadHints hints; |
| EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) { |
| // Document must load without crashing but is too malformed to be available. |
| EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf")); |
| MockDownloadHints hints; |
| EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) { |
| TestAsyncLoader loader("feature_linearized_loading.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints())); |
| |
| // No new data available, to prevent load "Pages" node. |
| loader.set_is_new_data_available(false); |
| ScopedFPDFPage page(FPDF_LoadPage(document(), 1)); |
| EXPECT_TRUE(page); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) { |
| TestAsyncLoader loader("feature_linearized_loading.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| |
| // Prevent access to non-requested data to coerce the parser to send new |
| // request for non available (non-requested before) data. |
| loader.set_is_new_data_available(false); |
| loader.ClearRequestedSegments(); |
| |
| int status = PDF_FORM_NOTAVAIL; |
| while (status == PDF_FORM_NOTAVAIL) { |
| loader.FlushRequestedData(); |
| status = FPDFAvail_IsFormAvail(avail(), loader.hints()); |
| } |
| EXPECT_NE(PDF_FORM_ERROR, status); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, |
| DoNotLoadMainCrossRefForFirstPageIfLinearized) { |
| TestAsyncLoader loader("feature_linearized_loading.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| const int first_page_num = FPDFAvail_GetFirstPageNum(document()); |
| |
| // The main cross ref table should not be processed. |
| // (It is always at file end) |
| EXPECT_GT(loader.file_access()->m_FileLen, |
| loader.max_already_available_bound()); |
| |
| // Prevent access to non-requested data to coerce the parser to send new |
| // request for non available (non-requested before) data. |
| loader.set_is_new_data_available(false); |
| FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints()); |
| |
| // The main cross ref table should not be requested. |
| // (It is always at file end) |
| EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound()); |
| |
| // Allow parse page. |
| loader.set_is_new_data_available(true); |
| ASSERT_EQ(PDF_DATA_AVAIL, |
| FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints())); |
| |
| // The main cross ref table should not be processed. |
| // (It is always at file end) |
| EXPECT_GT(loader.file_access()->m_FileLen, |
| loader.max_already_available_bound()); |
| |
| // Prevent loading data, while page loading. |
| loader.set_is_new_data_available(false); |
| ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num)); |
| EXPECT_TRUE(page); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) { |
| TestAsyncLoader loader("feature_linearized_loading.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| |
| static constexpr uint32_t kSecondPageNum = 1; |
| |
| // Prevent access to non-requested data to coerce the parser to send new |
| // request for non available (non-requested before) data. |
| loader.set_is_new_data_available(false); |
| loader.ClearRequestedSegments(); |
| |
| int status = PDF_DATA_NOTAVAIL; |
| while (status == PDF_DATA_NOTAVAIL) { |
| loader.FlushRequestedData(); |
| status = FPDFAvail_IsPageAvail(avail(), kSecondPageNum, loader.hints()); |
| } |
| EXPECT_EQ(PDF_DATA_AVAIL, status); |
| |
| // Prevent loading data, while page loading. |
| loader.set_is_new_data_available(false); |
| ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum)); |
| EXPECT_TRUE(page); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) { |
| TestAsyncLoader loader("linearized.pdf"); |
| loader.set_is_new_data_available(false); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) { |
| loader.FlushRequestedData(); |
| } |
| |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| |
| // The "info" dictionary should still be unavailable. |
| EXPECT_FALSE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0)); |
| |
| // Simulate receiving whole file. |
| loader.set_is_new_data_available(true); |
| // Load second page, to parse additional crossref sections. |
| EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints())); |
| |
| EXPECT_TRUE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) { |
| TestAsyncLoader loader("linearized.pdf"); |
| // Map "Info" to an object within the first section without breaking |
| // linearization. |
| ByteString data(ByteStringView(loader.file_contents())); |
| std::optional<size_t> index = data.Find("/Info 27 0 R"); |
| ASSERT_TRUE(index.has_value()); |
| auto span = loader.mutable_file_contents().subspan(index.value()).subspan(7); |
| ASSERT_FALSE(span.empty()); |
| EXPECT_EQ('7', span[0]); |
| span[0] = '9'; |
| |
| loader.set_is_new_data_available(false); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) { |
| loader.FlushRequestedData(); |
| } |
| |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| |
| // The "Info" dictionary should be available for the linearized document, if |
| // it is located in the first page section. |
| // Info was remapped to a dictionary with Type "Catalog" |
| unsigned short buffer[100] = {0}; |
| EXPECT_TRUE(FPDF_GetMetaText(document(), "Type", buffer, sizeof(buffer))); |
| EXPECT_EQ(L"Catalog", GetPlatformWString(buffer)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) { |
| TestAsyncLoader loader("linearized.pdf"); |
| // Map "Info" to an invalid object without breaking linearization. |
| ByteString data(ByteStringView(loader.file_contents())); |
| std::optional<size_t> index = data.Find("/Info 27 0 R"); |
| ASSERT_TRUE(index.has_value()); |
| auto span = loader.mutable_file_contents().subspan(index.value()).subspan(6); |
| ASSERT_GE(span.size(), 2u); |
| EXPECT_EQ('2', span[0]); |
| EXPECT_EQ('7', span[1]); |
| span[0] = '9'; |
| span[1] = '9'; |
| |
| loader.set_is_new_data_available(false); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) { |
| loader.FlushRequestedData(); |
| } |
| |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| |
| // Set all data available. |
| loader.set_is_new_data_available(true); |
| // Check second page, to load additional crossrefs. |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints())); |
| |
| // Test that api is robust enough to handle the bad case. |
| EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) { |
| TestAsyncLoader loader("linearized.pdf"); |
| // Break the "Info" parameter without breaking linearization. |
| ByteString data(ByteStringView(loader.file_contents())); |
| std::optional<size_t> index = data.Find("/Info 27 0 R"); |
| ASSERT_TRUE(index.has_value()); |
| auto span = loader.mutable_file_contents().subspan(index.value()).subspan(2); |
| ASSERT_FALSE(span.empty()); |
| EXPECT_EQ('n', span[0]); |
| span[0] = '_'; |
| |
| loader.set_is_new_data_available(false); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) { |
| loader.FlushRequestedData(); |
| } |
| |
| SetDocumentFromAvail(); |
| ASSERT_TRUE(document()); |
| |
| // Set all data available. |
| loader.set_is_new_data_available(true); |
| // Check second page, to load additional crossrefs. |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints())); |
| |
| // Test that api is robust enough to handle the bad case. |
| EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) { |
| EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr)); |
| EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr)); |
| EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr)); |
| EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr)); |
| EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr)); |
| EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr)); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) { |
| TestAsyncLoader loader("linearized.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| EXPECT_EQ(PDF_DATA_NOTAVAIL, |
| FPDFAvail_IsPageAvail(avail(), -1, loader.hints())); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, Bug_1324189) { |
| // Test passes if it doesn't crash. |
| TestAsyncLoader loader("bug_1324189.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| } |
| |
| TEST_F(FPDFDataAvailEmbedderTest, Bug_1324503) { |
| // Test passes if it doesn't crash. |
| TestAsyncLoader loader("bug_1324503.pdf"); |
| CreateAvail(loader.file_avail(), loader.file_access()); |
| ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints())); |
| } |