Implement read validator.

The wrapper for IFX_SeekableReadStream.
Which allow us to check data availability on read request
and request downloading of non available data on fly.

Change-Id: I27c66cd58f43f8432f73104cc3f4c980515a9b56
Reviewed-on: https://pdfium-review.googlesource.com/9050
Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Reviewed-by: (OOO Jul 28 - Aug 8) dsinclair <dsinclair@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index f2dbc68..db87b16 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -605,6 +605,8 @@
     "core/fpdfapi/parser/cpdf_object.h",
     "core/fpdfapi/parser/cpdf_parser.cpp",
     "core/fpdfapi/parser/cpdf_parser.h",
+    "core/fpdfapi/parser/cpdf_read_validator.cpp",
+    "core/fpdfapi/parser/cpdf_read_validator.h",
     "core/fpdfapi/parser/cpdf_reference.cpp",
     "core/fpdfapi/parser/cpdf_reference.h",
     "core/fpdfapi/parser/cpdf_security_handler.cpp",
@@ -1897,6 +1899,7 @@
     "core/fpdfapi/parser/cpdf_document_unittest.cpp",
     "core/fpdfapi/parser/cpdf_object_unittest.cpp",
     "core/fpdfapi/parser/cpdf_parser_unittest.cpp",
+    "core/fpdfapi/parser/cpdf_read_validator_unittest.cpp",
     "core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp",
     "core/fpdfapi/parser/cpdf_syntax_parser_unittest.cpp",
     "core/fpdfapi/parser/fpdf_parser_decode_unittest.cpp",
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 043462c..bc81e99 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -18,6 +18,7 @@
 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
 #include "core/fpdfapi/parser/cpdf_name.h"
 #include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
 #include "core/fpdfapi/parser/cpdf_reference.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
@@ -51,6 +52,27 @@
   return nullptr;
 }
 
+class HintsAssigner {
+ public:
+  HintsAssigner(CPDF_ReadValidator* validator,
+                CPDF_DataAvail::DownloadHints* hints)
+      : validator_(validator) {
+    if (validator_) {
+      validator_->ResetErrors();
+      validator_->SetDownloadHints(hints);
+    }
+  }
+
+  ~HintsAssigner() {
+    if (validator_) {
+      validator_->SetDownloadHints(nullptr);
+    }
+  }
+
+ private:
+  CFX_UnownedPtr<CPDF_ReadValidator> validator_;
+};
+
 }  // namespace
 
 CPDF_DataAvail::FileAvail::~FileAvail() {}
@@ -61,7 +83,11 @@
     FileAvail* pFileAvail,
     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead,
     bool bSupportHintTable)
-    : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {
+    : m_pFileAvail(pFileAvail),
+      m_pFileRead(
+          pFileRead
+              ? pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)
+              : nullptr) {
   m_Pos = 0;
   m_dwFileLen = 0;
   if (m_pFileRead) {
@@ -211,6 +237,8 @@
 
 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
     DownloadHints* pHints) {
+  const HintsAssigner hints_assigner(m_pFileRead.Get(), pHints);
+
   if (!m_dwFileLen && m_pFileRead) {
     m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
     if (!m_dwFileLen)
@@ -285,7 +313,7 @@
     case PDF_DATAAVAIL_CROSSREF:
       return CheckCrossRef(pHints);
     case PDF_DATAAVAIL_CROSSREF_ITEM:
-      return CheckCrossRefItem(pHints);
+      return CheckCrossRefItem();
     case PDF_DATAAVAIL_CROSSREF_STREAM:
       return CheckAllCrossRefStream(pHints);
     case PDF_DATAAVAIL_TRAILER:
@@ -955,14 +983,12 @@
   return true;
 }
 
-bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) {
-  int32_t iSize = 0;
+bool CPDF_DataAvail::CheckCrossRefItem() {
   CFX_ByteString token;
   while (1) {
     if (!GetNextToken(&token)) {
-      iSize = static_cast<int32_t>(
-          m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
-      pHints->AddSegment(m_Pos, iSize);
+      if (!m_pFileRead->has_read_problems())
+        m_docStatus = PDF_DATAAVAIL_ERROR;
       return false;
     }
 
@@ -1567,6 +1593,10 @@
   return true;
 }
 
+CFX_RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const {
+  return m_pFileRead;
+}
+
 int CPDF_DataAvail::GetPageCount() const {
   if (m_pLinearized)
     return m_pLinearized->GetPageCount();
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index eb45c14..f19b36d 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -21,6 +21,7 @@
 class CPDF_IndirectObjectHolder;
 class CPDF_LinearizedHeader;
 class CPDF_Parser;
+class CPDF_ReadValidator;
 
 enum PDF_DATAAVAIL_STATUS {
   PDF_DATAAVAIL_HEADER = 0,
@@ -107,9 +108,7 @@
   DocFormStatus IsFormAvail(DownloadHints* pHints);
   DocLinearizationStatus IsLinearizedPDF();
   bool IsLinearized();
-  CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const {
-    return m_pFileRead;
-  }
+  CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const;
   int GetPageCount() const;
   CPDF_Dictionary* GetPage(int index);
 
@@ -137,7 +136,7 @@
   bool CheckHintTables(DownloadHints* pHints);
   bool CheckEnd(DownloadHints* pHints);
   bool CheckCrossRef(DownloadHints* pHints);
-  bool CheckCrossRefItem(DownloadHints* pHints);
+  bool CheckCrossRefItem();
   bool CheckTrailer(DownloadHints* pHints);
   bool CheckRoot(DownloadHints* pHints);
   bool CheckInfo(DownloadHints* pHints);
@@ -194,7 +193,7 @@
   bool ValidateForm();
 
   FileAvail* const m_pFileAvail;
-  CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead;
+  CFX_RetainPtr<CPDF_ReadValidator> m_pFileRead;
   CPDF_Parser m_parser;
   CPDF_SyntaxParser m_syntaxParser;
   std::unique_ptr<CPDF_Object> m_pRoot;
diff --git a/core/fpdfapi/parser/cpdf_read_validator.cpp b/core/fpdfapi/parser/cpdf_read_validator.cpp
new file mode 100644
index 0000000..148ecfd
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_read_validator.cpp
@@ -0,0 +1,94 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
+
+#include <algorithm>
+
+#include "third_party/base/logging.h"
+
+namespace {
+
+constexpr FX_FILESIZE kAlignBlockValue = 512;
+
+FX_FILESIZE AlignDown(FX_FILESIZE offset) {
+  return offset > 0 ? (offset - offset % kAlignBlockValue) : 0;
+}
+
+FX_FILESIZE AlignUp(FX_FILESIZE offset) {
+  FX_SAFE_FILESIZE safe_result = AlignDown(offset);
+  safe_result += kAlignBlockValue;
+  if (safe_result.IsValid())
+    return safe_result.ValueOrDie();
+  return offset;
+}
+
+}  // namespace
+
+CPDF_ReadValidator::CPDF_ReadValidator(
+    const CFX_RetainPtr<IFX_SeekableReadStream>& file_read,
+    CPDF_DataAvail::FileAvail* file_avail)
+    : file_read_(file_read),
+      file_avail_(file_avail),
+      read_error_(false),
+      has_unavailable_data_(false) {
+  ASSERT(file_read_);
+}
+
+CPDF_ReadValidator::~CPDF_ReadValidator() {}
+
+void CPDF_ReadValidator::ResetErrors() {
+  read_error_ = false;
+  has_unavailable_data_ = false;
+}
+
+bool CPDF_ReadValidator::ReadBlock(void* buffer,
+                                   FX_FILESIZE offset,
+                                   size_t size) {
+  // correct values checks:
+  if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(size))
+    return false;
+
+  FX_SAFE_FILESIZE end_offset = offset;
+  end_offset += size;
+  if (!end_offset.IsValid() || end_offset.ValueOrDie() > GetSize())
+    return false;
+
+  if (!file_avail_ ||
+      file_avail_->IsDataAvail(offset, static_cast<uint32_t>(size))) {
+    if (file_read_->ReadBlock(buffer, offset, size))
+      return true;
+    read_error_ = true;
+  }
+  has_unavailable_data_ = true;
+  ScheduleDownload(offset, size);
+  return false;
+}
+
+FX_FILESIZE CPDF_ReadValidator::GetSize() {
+  return file_read_->GetSize();
+}
+
+void CPDF_ReadValidator::ScheduleDownload(FX_FILESIZE offset, size_t size) {
+  if (!hints_ || size == 0)
+    return;
+
+  const FX_FILESIZE start_segment_offset = AlignDown(offset);
+  FX_SAFE_FILESIZE end_segment_offset = offset;
+  end_segment_offset += size;
+  if (!end_segment_offset.IsValid()) {
+    NOTREACHED();
+    return;
+  }
+  end_segment_offset =
+      std::min(GetSize(), AlignUp(end_segment_offset.ValueOrDie()));
+
+  FX_SAFE_UINT32 segment_size = end_segment_offset;
+  segment_size -= start_segment_offset;
+  if (!segment_size.IsValid()) {
+    NOTREACHED();
+    return;
+  }
+  hints_->AddSegment(start_segment_offset, segment_size.ValueOrDie());
+}
diff --git a/core/fpdfapi/parser/cpdf_read_validator.h b/core/fpdfapi/parser/cpdf_read_validator.h
new file mode 100644
index 0000000..da8acfe
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_read_validator.h
@@ -0,0 +1,48 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_
+#define CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_
+
+#include "core/fpdfapi/parser/cpdf_data_avail.h"
+
+class CPDF_ReadValidator : public IFX_SeekableReadStream {
+ public:
+  template <typename T, typename... Args>
+  friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+  void SetDownloadHints(CPDF_DataAvail::DownloadHints* hints) {
+    hints_ = hints;
+  }
+
+  bool read_error() const { return read_error_; }
+  bool has_unavailable_data() const { return has_unavailable_data_; }
+
+  bool has_read_problems() const {
+    return read_error() || has_unavailable_data();
+  }
+
+  void ResetErrors();
+
+  // IFX_SeekableReadStream overrides:
+  bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override;
+  FX_FILESIZE GetSize() override;
+
+ private:
+  CPDF_ReadValidator(const CFX_RetainPtr<IFX_SeekableReadStream>& file_read,
+                     CPDF_DataAvail::FileAvail* file_avail);
+  ~CPDF_ReadValidator() override;
+
+  void ScheduleDownload(FX_FILESIZE offset, size_t size);
+
+  CFX_RetainPtr<IFX_SeekableReadStream> file_read_;
+  CFX_UnownedPtr<CPDF_DataAvail::FileAvail> file_avail_;
+
+  CFX_UnownedPtr<CPDF_DataAvail::DownloadHints> hints_;
+
+  bool read_error_;
+  bool has_unavailable_data_;
+};
+
+#endif  // CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_
diff --git a/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp
new file mode 100644
index 0000000..f0e47f5
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp
@@ -0,0 +1,180 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
+
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "core/fxcrt/cfx_memorystream.h"
+#include "core/fxcrt/fx_stream.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+constexpr uint32_t kTestDataSize = 64 * 1024 - 467;
+
+std::pair<FX_FILESIZE, FX_FILESIZE> MakeRange(uint32_t start, uint32_t end) {
+  return std::pair<FX_FILESIZE, FX_FILESIZE>(start, end);
+}
+
+class MockFileAvail : public CPDF_DataAvail::FileAvail {
+ public:
+  MockFileAvail() : available_range_(0, 0) {}
+  ~MockFileAvail() override {}
+
+  bool IsDataAvail(FX_FILESIZE offset, uint32_t size) override {
+    return available_range_.first <= offset &&
+           available_range_.second >= static_cast<FX_FILESIZE>(offset + size);
+  }
+
+  void SetAvailableRange(const std::pair<FX_FILESIZE, FX_FILESIZE>& range) {
+    available_range_ = range;
+  }
+
+  void SetAvailableRange(uint32_t start, uint32_t end) {
+    SetAvailableRange(MakeRange(start, end));
+  }
+
+ private:
+  std::pair<FX_FILESIZE, FX_FILESIZE> available_range_;
+};
+
+class MockDownloadHints : public CPDF_DataAvail::DownloadHints {
+ public:
+  MockDownloadHints() : last_requested_range_(0, 0) {}
+  ~MockDownloadHints() override {}
+
+  void AddSegment(FX_FILESIZE offset, uint32_t size) override {
+    last_requested_range_.first = offset;
+    last_requested_range_.second = offset + size;
+  }
+
+  const std::pair<FX_FILESIZE, FX_FILESIZE>& GetLastRequstedRange() const {
+    return last_requested_range_;
+  }
+
+  void Reset() { last_requested_range_ = MakeRange(0, 0); }
+
+ private:
+  std::pair<FX_FILESIZE, FX_FILESIZE> last_requested_range_;
+};
+
+class InvalidReader : public IFX_SeekableReadStream {
+ public:
+  template <typename T, typename... Args>
+  friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+  // IFX_SeekableReadStream overrides:
+  bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override {
+    return false;
+  }
+  FX_FILESIZE GetSize() override { return kTestDataSize; }
+
+ private:
+  InvalidReader() {}
+  ~InvalidReader() override {}
+};
+
+}  // namespace
+
+TEST(CPDF_ReadValidatorTest, UnavailableData) {
+  std::vector<uint8_t> test_data(kTestDataSize);
+  auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(),
+                                                   test_data.size(), false);
+  MockFileAvail file_avail;
+  auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail);
+
+  std::vector<uint8_t> read_buffer(100);
+  EXPECT_FALSE(
+      validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+
+  EXPECT_FALSE(validator->read_error());
+  EXPECT_TRUE(validator->has_unavailable_data());
+
+  validator->ResetErrors();
+
+  file_avail.SetAvailableRange(5000, 5000 + read_buffer.size());
+
+  EXPECT_TRUE(
+      validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+  EXPECT_FALSE(validator->read_error());
+  EXPECT_FALSE(validator->has_unavailable_data());
+}
+
+TEST(CPDF_ReadValidatorTest, UnavailableDataWithHints) {
+  std::vector<uint8_t> test_data(kTestDataSize);
+  auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(),
+                                                   test_data.size(), false);
+  MockFileAvail file_avail;
+  auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail);
+
+  MockDownloadHints hints;
+  validator->SetDownloadHints(&hints);
+
+  std::vector<uint8_t> read_buffer(100);
+
+  EXPECT_FALSE(
+      validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+  EXPECT_FALSE(validator->read_error());
+  EXPECT_TRUE(validator->has_unavailable_data());
+
+  // Requested range should be enlarged and aligned.
+  EXPECT_EQ(MakeRange(4608, 5120), hints.GetLastRequstedRange());
+
+  file_avail.SetAvailableRange(hints.GetLastRequstedRange());
+  hints.Reset();
+
+  validator->ResetErrors();
+  EXPECT_TRUE(
+      validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+  // No new request on already available data.
+  EXPECT_EQ(MakeRange(0, 0), hints.GetLastRequstedRange());
+  EXPECT_FALSE(validator->read_error());
+  EXPECT_FALSE(validator->has_unavailable_data());
+
+  validator->ResetErrors();
+  // Try read unavailable data at file end.
+  EXPECT_FALSE(validator->ReadBlock(read_buffer.data(),
+                                    validator->GetSize() - read_buffer.size(),
+                                    read_buffer.size()));
+  // Should not enlarge request at file end.
+  EXPECT_EQ(validator->GetSize(), hints.GetLastRequstedRange().second);
+  EXPECT_FALSE(validator->read_error());
+  EXPECT_TRUE(validator->has_unavailable_data());
+
+  validator->SetDownloadHints(nullptr);
+}
+
+TEST(CPDF_ReadValidatorTest, ReadError) {
+  auto file = pdfium::MakeRetain<InvalidReader>();
+  auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, nullptr);
+
+  static const uint32_t kBufferSize = 3 * 1000;
+  std::vector<uint8_t> buffer(kBufferSize);
+
+  EXPECT_FALSE(validator->ReadBlock(buffer.data(), 5000, 100));
+  EXPECT_TRUE(validator->read_error());
+  EXPECT_TRUE(validator->has_unavailable_data());
+}
+
+TEST(CPDF_ReadValidatorTest, IntOverflow) {
+  std::vector<uint8_t> test_data(kTestDataSize);
+  auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(),
+                                                   test_data.size(), false);
+  MockFileAvail file_avail;
+  auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail);
+
+  std::vector<uint8_t> read_buffer(100);
+
+  // If we have int overflow, this is equal reading after file end. This is not
+  // read_error, and in this case we have not unavailable data. It is just error
+  // of input params.
+  EXPECT_FALSE(validator->ReadBlock(read_buffer.data(),
+                                    std::numeric_limits<FX_FILESIZE>::max() - 1,
+                                    read_buffer.size()));
+  EXPECT_FALSE(validator->read_error());
+  EXPECT_FALSE(validator->has_unavailable_data());
+}
diff --git a/fpdfsdk/fpdf_dataavail.cpp b/fpdfsdk/fpdf_dataavail.cpp
index 2511441..85dd873 100644
--- a/fpdfsdk/fpdf_dataavail.cpp
+++ b/fpdfsdk/fpdf_dataavail.cpp
@@ -105,9 +105,9 @@
         m_FileRead(pdfium::MakeRetain<CFPDF_FileAccessWrap>()) {}
   ~CFPDF_DataAvail() {}
 
-  std::unique_ptr<CPDF_DataAvail> m_pDataAvail;
   std::unique_ptr<CFPDF_FileAvailWrap> m_FileAvail;
   CFX_RetainPtr<CFPDF_FileAccessWrap> m_FileRead;
+  std::unique_ptr<CPDF_DataAvail> m_pDataAvail;
 };
 
 CFPDF_DataAvail* CFPDFDataAvailFromFPDFAvail(FPDF_AVAIL avail) {
diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc
index bb969b1..ee95e2f 100644
--- a/samples/pdfium_test.cc
+++ b/samples/pdfium_test.cc
@@ -1256,6 +1256,8 @@
   platform_callbacks.Doc_gotoPage = ExampleDocGotoPage;
   platform_callbacks.Doc_mail = ExampleDocMail;
 
+  // The pdf_avail must outlive doc.
+  std::unique_ptr<void, FPDFAvailDeleter> pdf_avail;
   // The document must outlive |form_callbacks.loaded_pages|.
   std::unique_ptr<void, FPDFDocumentDeleter> doc;
   FPDF_FORMFILLINFO_PDFiumTest form_callbacks = {};
@@ -1283,8 +1285,7 @@
 
   int nRet = PDF_DATA_NOTAVAIL;
   bool bIsLinearized = false;
-  std::unique_ptr<void, FPDFAvailDeleter> pdf_avail(
-      FPDFAvail_Create(&file_avail, &file_access));
+  pdf_avail.reset(FPDFAvail_Create(&file_avail, &file_access));
 
   if (FPDFAvail_IsLinearized(pdf_avail.get()) == PDF_LINEARIZED) {
     doc.reset(FPDFAvail_GetDocument(pdf_avail.get(), nullptr));