Add FPDF_GetTrailerEnds() API This follows the same pattern as FPDFSignatureObj_GetTime(), so the client has to call this function twice, but allocation of the buffer happens outside pdfium. The benefit of this API is that client code can learn about the initial trailer and the trailer at the end of incremental updates for signature verification purposes. This information can be used to write client code that emits no warnings when a document is signed multiple times, but still warn when there are unsigned incremental updates between the signatures or after the last one. This behavior matches what Acrobat does, but it does not seem to be in the PDF reference. So it is reasonable to have an API that allows doing this, but not hardcode this policy into pdfium. Change-Id: Ic741b672821d7743df88cdf51af54e85251857e6 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/73370 Commit-Queue: Miklos V <vmiklos@collabora.co.uk> Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org>

commit: cc2e3238bcef3571ceb20846f9811556bd83921d [log] [tgz]
author: Miklos Vajna <vmiklos@collabora.co.uk> Wed Sep 23 09:38:04 2020 +0000
committer: Chromium commit bot <commit-bot@chromium.org> Wed Sep 23 09:38:04 2020 +0000
tree: 743d39d5d3ed2195b7717be5438ec7c7c7700cb2
parent: 52d8c65408b9c600d829156f7c46f8cc43e4f9c8 [diff]
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
index 0fc3e96..619d253 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp

@@ -370,6 +370,11 @@
 }
 
 void CPDF_SyntaxParser::ToNextWord() {
+  if (m_TrailerEnds) {
+    RecordingToNextWord();
+    return;
+  }
+
   uint8_t ch;
   if (!GetNextChar(ch))
     return;
@@ -393,6 +398,71 @@
   m_Pos--;
 }
 
+// A state machine which goes % -> E -> O -> F -> line ending.
+enum class EofState {
+  kInitial = 0,
+  kNonPercent,
+  kPercent,
+  kE,
+  kO,
+  kF,
+  kInvalid,
+};
+
+void CPDF_SyntaxParser::RecordingToNextWord() {
+  assert(m_TrailerEnds);
+
+  EofState eof_state = EofState::kInitial;
+  // Find the first character which is neither whitespace, nor part of a
+  // comment.
+  while (1) {
+    uint8_t ch;
+    if (!GetNextChar(ch))
+      return;
+    switch (eof_state) {
+      case EofState::kInitial:
+        if (!PDFCharIsWhitespace(ch))
+          eof_state = ch == '%' ? EofState::kPercent : EofState::kNonPercent;
+        break;
+      case EofState::kNonPercent:
+        break;
+      case EofState::kPercent:
+        if (ch == 'E')
+          eof_state = EofState::kE;
+        else if (ch != '%')
+          eof_state = EofState::kInvalid;
+        break;
+      case EofState::kE:
+        eof_state = ch == 'O' ? EofState::kO : EofState::kInvalid;
+        break;
+      case EofState::kO:
+        eof_state = ch == 'F' ? EofState::kF : EofState::kInvalid;
+        break;
+      case EofState::kF:
+        if (ch == '\r') {
+          // See if \r has to be combined with a \n that follows it
+          // immediately.
+          if (GetNextChar(ch) && ch != '\n') {
+            ch = '\r';
+            m_Pos--;
+          }
+        }
+        // If we now have a \r, that's not followed by a \n, so both are OK.
+        if (ch == '\r' || ch == '\n')
+          m_TrailerEnds->push_back(m_Pos);
+        eof_state = EofState::kInvalid;
+        break;
+      case EofState::kInvalid:
+        break;
+    }
+    if (PDFCharIsLineEnding(ch))
+      eof_state = EofState::kInitial;
+    if (eof_state == EofState::kNonPercent)
+      break;
+  }
+  m_Pos--;
+}
+
 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
   const CPDF_ReadValidator::Session read_session(GetValidator());
   GetNextWordInternal(bIsNumber);

diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h
index bdcfa44..7d78dcf 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.h
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.h

@@ -12,6 +12,7 @@
 
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fxcrt/string_pool_template.h"
+#include "core/fxcrt/unowned_ptr.h"
 #include "core/fxcrt/weak_ptr.h"
 
 class CPDF_CryptoHandler;
@@ -51,6 +52,7 @@
   ByteString GetKeyword();
   void ToNextLine();
   void ToNextWord();
+  void RecordingToNextWord();
   bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit);
   FX_FILESIZE FindTag(ByteStringView tag);
   bool ReadBlock(uint8_t* pBuf, uint32_t size);
@@ -75,6 +77,10 @@
   ByteString ReadString();
   ByteString ReadHexString();
 
+  void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) {
+    m_TrailerEnds = trailer_ends;
+  }
+
  private:
   friend class CPDF_DataAvail;
   friend class cpdf_syntax_parser_ReadHexString_Test;
@@ -114,6 +120,9 @@
   uint32_t m_WordSize = 0;
   uint8_t m_WordBuffer[257];
   uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
+
+  // The syntax parser records traversed trailer end byte offsets here.
+  UnownedPtr<std::vector<unsigned int>> m_TrailerEnds;
 };
 
 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_

diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp
index 4fff57c..e627ede 100644
--- a/fpdfsdk/fpdf_view.cpp
+++ b/fpdfsdk/fpdf_view.cpp

@@ -23,6 +23,7 @@
 #include "core/fpdfapi/parser/cpdf_parser.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
 #include "core/fpdfapi/render/cpdf_docrenderdata.h"
 #include "core/fpdfapi/render/cpdf_pagerendercache.h"
@@ -1222,3 +1223,65 @@
                                                      buffer, buflen);
   return true;
 }
+
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetTrailerEnds(FPDF_DOCUMENT document,
+                    unsigned int* buffer,
+                    unsigned long length) {
+  auto* doc = CPDFDocumentFromFPDFDocument(document);
+  if (!doc)
+    return 0;
+
+  // Start recording trailer ends.
+  auto* parser = doc->GetParser();
+  CPDF_SyntaxParser* syntax = parser->GetSyntax();
+  std::vector<unsigned int> trailer_ends;
+  syntax->SetTrailerEnds(&trailer_ends);
+
+  // Traverse the document.
+  syntax->SetPos(0);
+  while (1) {
+    bool number;
+    ByteString word = syntax->GetNextWord(&number);
+    if (number) {
+      // The object number was read. Read the generation number.
+      word = syntax->GetNextWord(&number);
+      if (!number)
+        break;
+
+      word = syntax->GetNextWord(nullptr);
+      if (word != "obj")
+        break;
+
+      syntax->GetObjectBody(nullptr);
+
+      word = syntax->GetNextWord(nullptr);
+      if (word != "endobj")
+        break;
+    } else if (word == "trailer") {
+      syntax->GetObjectBody(nullptr);
+    } else if (word == "startxref") {
+      syntax->GetNextWord(nullptr);
+    } else if (word == "xref") {
+      while (1) {
+        word = syntax->GetNextWord(nullptr);
+        if (word.IsEmpty() || word == "startxref")
+          break;
+      }
+      syntax->GetNextWord(nullptr);
+    } else {
+      break;
+    }
+  }
+
+  // Stop recording trailer ends.
+  syntax->SetTrailerEnds(nullptr);
+
+  unsigned long trailer_ends_len = trailer_ends.size();
+  if (buffer && length >= trailer_ends_len) {
+    for (size_t i = 0; i < trailer_ends_len; ++i)
+      buffer[i] = trailer_ends[i];
+  }
+
+  return trailer_ends_len;
+}

diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index d8190e0..7708cba 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c

@@ -450,6 +450,7 @@
     CHK(FPDF_GetRecommendedV8Flags);
 #endif
     CHK(FPDF_GetSecurityHandlerRevision);
+    CHK(FPDF_GetTrailerEnds);
     CHK(FPDF_GetXFAPacketContent);
     CHK(FPDF_GetXFAPacketCount);
     CHK(FPDF_GetXFAPacketName);

diff --git a/fpdfsdk/fpdf_view_embeddertest.cpp b/fpdfsdk/fpdf_view_embeddertest.cpp
index d47edb6..6002917 100644
--- a/fpdfsdk/fpdf_view_embeddertest.cpp
+++ b/fpdfsdk/fpdf_view_embeddertest.cpp

@@ -1634,3 +1634,72 @@
   UnloadPage(page);
 }
 #endif  // defined(OS_WIN)
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEnds) {
+  ASSERT_TRUE(OpenDocument("two_signatures.pdf"));
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{633, 1703, 2781};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+
+  // FPDF_GetTrailerEnds() negative testing.
+  ASSERT_EQ(0U, FPDF_GetTrailerEnds(nullptr, nullptr, 0));
+
+  ends.resize(2);
+  ends[0] = 0;
+  ends[1] = 1;
+  size = FPDF_GetTrailerEnds(document(), ends.data(), ends.size());
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  EXPECT_EQ(0U, ends[0]);
+  EXPECT_EQ(1U, ends[1]);
+}
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsHelloWorld) {
+  // Single trailer, \n line ending at the trailer end.
+  ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{840};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+}
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsAnnotationStamp) {
+  // Multiple trailers, \r\n line ending at the trailer ends.
+  ASSERT_TRUE(OpenDocument("annotation_stamp_with_ap.pdf"));
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{441, 7945, 101719};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+}
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsLinearized) {
+  // Set up linearized PDF.
+  FileAccessForTesting file_acc("linearized.pdf");
+  FakeFileAccess fake_acc(&file_acc);
+  avail_ = FPDFAvail_Create(fake_acc.GetFileAvail(), fake_acc.GetFileAccess());
+  fake_acc.SetWholeFileAvailable();
+
+  // Multiple trailers, \r line ending at the trailer ends (no \n).
+  document_ = FPDFAvail_GetDocument(avail_, nullptr);
+  ASSERT_TRUE(document());
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{474, 11384};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+}

diff --git a/public/fpdfview.h b/public/fpdfview.h
index 27df9bd..e996d4a 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h

@@ -606,6 +606,25 @@
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
 FPDF_DocumentHasValidCrossReferenceTable(FPDF_DOCUMENT document);
 
+// Experimental API.
+// Function: FPDF_GetTrailerEnds
+//          Get the byte offsets of trailer ends.
+// Parameters:
+//          document    -   Handle to document. Returned by FPDF_LoadDocument().
+//          buffer      -   The address of a buffer that receives the
+//                          byte offsets.
+//          length      -   The size, in ints, of |buffer|.
+// Return value:
+//          Returns the number of ints in the buffer on success, 0 on error.
+//
+// |buffer| is an array of integers that describes the exact byte offsets of the
+// trailer ends in the document. If |length| is less than the returned length,
+// or |document| or |buffer| is NULL, |buffer| will not be modified.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetTrailerEnds(FPDF_DOCUMENT document,
+                    unsigned int* buffer,
+                    unsigned long length);
+
 // Function: FPDF_GetDocPermission
 //          Get file permission flags of the document.
 // Parameters:
commit	cc2e3238bcef3571ceb20846f9811556bd83921d	[log] [tgz]
author	Miklos Vajna <vmiklos@collabora.co.uk>	Wed Sep 23 09:38:04 2020 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	Wed Sep 23 09:38:04 2020 +0000
tree	743d39d5d3ed2195b7717be5438ec7c7c7700cb2
parent	52d8c65408b9c600d829156f7c46f8cc43e4f9c8 [diff]