Add FPDF_GetTrailerEnds() API

This follows the same pattern as FPDFSignatureObj_GetTime(), so the
client has to call this function twice, but allocation of the buffer
happens outside pdfium.

The benefit of this API is that client code can learn about the initial
trailer and the trailer at the end of incremental updates for signature
verification purposes.

This information can be used to write client code that emits no warnings
when a document is signed multiple times, but still warn when there are
unsigned incremental updates between the signatures or after the last
one. This behavior matches what Acrobat does, but it does not seem to be
in the PDF reference. So it is reasonable to have an API that allows
doing this, but not hardcode this policy into pdfium.

Change-Id: Ic741b672821d7743df88cdf51af54e85251857e6
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/73370
Commit-Queue: Miklos V <vmiklos@collabora.co.uk>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
index 0fc3e96..619d253 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
@@ -370,6 +370,11 @@
 }
 
 void CPDF_SyntaxParser::ToNextWord() {
+  if (m_TrailerEnds) {
+    RecordingToNextWord();
+    return;
+  }
+
   uint8_t ch;
   if (!GetNextChar(ch))
     return;
@@ -393,6 +398,71 @@
   m_Pos--;
 }
 
+// A state machine which goes % -> E -> O -> F -> line ending.
+enum class EofState {
+  kInitial = 0,
+  kNonPercent,
+  kPercent,
+  kE,
+  kO,
+  kF,
+  kInvalid,
+};
+
+void CPDF_SyntaxParser::RecordingToNextWord() {
+  assert(m_TrailerEnds);
+
+  EofState eof_state = EofState::kInitial;
+  // Find the first character which is neither whitespace, nor part of a
+  // comment.
+  while (1) {
+    uint8_t ch;
+    if (!GetNextChar(ch))
+      return;
+    switch (eof_state) {
+      case EofState::kInitial:
+        if (!PDFCharIsWhitespace(ch))
+          eof_state = ch == '%' ? EofState::kPercent : EofState::kNonPercent;
+        break;
+      case EofState::kNonPercent:
+        break;
+      case EofState::kPercent:
+        if (ch == 'E')
+          eof_state = EofState::kE;
+        else if (ch != '%')
+          eof_state = EofState::kInvalid;
+        break;
+      case EofState::kE:
+        eof_state = ch == 'O' ? EofState::kO : EofState::kInvalid;
+        break;
+      case EofState::kO:
+        eof_state = ch == 'F' ? EofState::kF : EofState::kInvalid;
+        break;
+      case EofState::kF:
+        if (ch == '\r') {
+          // See if \r has to be combined with a \n that follows it
+          // immediately.
+          if (GetNextChar(ch) && ch != '\n') {
+            ch = '\r';
+            m_Pos--;
+          }
+        }
+        // If we now have a \r, that's not followed by a \n, so both are OK.
+        if (ch == '\r' || ch == '\n')
+          m_TrailerEnds->push_back(m_Pos);
+        eof_state = EofState::kInvalid;
+        break;
+      case EofState::kInvalid:
+        break;
+    }
+    if (PDFCharIsLineEnding(ch))
+      eof_state = EofState::kInitial;
+    if (eof_state == EofState::kNonPercent)
+      break;
+  }
+  m_Pos--;
+}
+
 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
   const CPDF_ReadValidator::Session read_session(GetValidator());
   GetNextWordInternal(bIsNumber);
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h
index bdcfa44..7d78dcf 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.h
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.h
@@ -12,6 +12,7 @@
 
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fxcrt/string_pool_template.h"
+#include "core/fxcrt/unowned_ptr.h"
 #include "core/fxcrt/weak_ptr.h"
 
 class CPDF_CryptoHandler;
@@ -51,6 +52,7 @@
   ByteString GetKeyword();
   void ToNextLine();
   void ToNextWord();
+  void RecordingToNextWord();
   bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit);
   FX_FILESIZE FindTag(ByteStringView tag);
   bool ReadBlock(uint8_t* pBuf, uint32_t size);
@@ -75,6 +77,10 @@
   ByteString ReadString();
   ByteString ReadHexString();
 
+  void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) {
+    m_TrailerEnds = trailer_ends;
+  }
+
  private:
   friend class CPDF_DataAvail;
   friend class cpdf_syntax_parser_ReadHexString_Test;
@@ -114,6 +120,9 @@
   uint32_t m_WordSize = 0;
   uint8_t m_WordBuffer[257];
   uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
+
+  // The syntax parser records traversed trailer end byte offsets here.
+  UnownedPtr<std::vector<unsigned int>> m_TrailerEnds;
 };
 
 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp
index 4fff57c..e627ede 100644
--- a/fpdfsdk/fpdf_view.cpp
+++ b/fpdfsdk/fpdf_view.cpp
@@ -23,6 +23,7 @@
 #include "core/fpdfapi/parser/cpdf_parser.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
 #include "core/fpdfapi/render/cpdf_docrenderdata.h"
 #include "core/fpdfapi/render/cpdf_pagerendercache.h"
@@ -1222,3 +1223,65 @@
                                                      buffer, buflen);
   return true;
 }
+
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetTrailerEnds(FPDF_DOCUMENT document,
+                    unsigned int* buffer,
+                    unsigned long length) {
+  auto* doc = CPDFDocumentFromFPDFDocument(document);
+  if (!doc)
+    return 0;
+
+  // Start recording trailer ends.
+  auto* parser = doc->GetParser();
+  CPDF_SyntaxParser* syntax = parser->GetSyntax();
+  std::vector<unsigned int> trailer_ends;
+  syntax->SetTrailerEnds(&trailer_ends);
+
+  // Traverse the document.
+  syntax->SetPos(0);
+  while (1) {
+    bool number;
+    ByteString word = syntax->GetNextWord(&number);
+    if (number) {
+      // The object number was read. Read the generation number.
+      word = syntax->GetNextWord(&number);
+      if (!number)
+        break;
+
+      word = syntax->GetNextWord(nullptr);
+      if (word != "obj")
+        break;
+
+      syntax->GetObjectBody(nullptr);
+
+      word = syntax->GetNextWord(nullptr);
+      if (word != "endobj")
+        break;
+    } else if (word == "trailer") {
+      syntax->GetObjectBody(nullptr);
+    } else if (word == "startxref") {
+      syntax->GetNextWord(nullptr);
+    } else if (word == "xref") {
+      while (1) {
+        word = syntax->GetNextWord(nullptr);
+        if (word.IsEmpty() || word == "startxref")
+          break;
+      }
+      syntax->GetNextWord(nullptr);
+    } else {
+      break;
+    }
+  }
+
+  // Stop recording trailer ends.
+  syntax->SetTrailerEnds(nullptr);
+
+  unsigned long trailer_ends_len = trailer_ends.size();
+  if (buffer && length >= trailer_ends_len) {
+    for (size_t i = 0; i < trailer_ends_len; ++i)
+      buffer[i] = trailer_ends[i];
+  }
+
+  return trailer_ends_len;
+}
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index d8190e0..7708cba 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -450,6 +450,7 @@
     CHK(FPDF_GetRecommendedV8Flags);
 #endif
     CHK(FPDF_GetSecurityHandlerRevision);
+    CHK(FPDF_GetTrailerEnds);
     CHK(FPDF_GetXFAPacketContent);
     CHK(FPDF_GetXFAPacketCount);
     CHK(FPDF_GetXFAPacketName);
diff --git a/fpdfsdk/fpdf_view_embeddertest.cpp b/fpdfsdk/fpdf_view_embeddertest.cpp
index d47edb6..6002917 100644
--- a/fpdfsdk/fpdf_view_embeddertest.cpp
+++ b/fpdfsdk/fpdf_view_embeddertest.cpp
@@ -1634,3 +1634,72 @@
   UnloadPage(page);
 }
 #endif  // defined(OS_WIN)
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEnds) {
+  ASSERT_TRUE(OpenDocument("two_signatures.pdf"));
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{633, 1703, 2781};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+
+  // FPDF_GetTrailerEnds() negative testing.
+  ASSERT_EQ(0U, FPDF_GetTrailerEnds(nullptr, nullptr, 0));
+
+  ends.resize(2);
+  ends[0] = 0;
+  ends[1] = 1;
+  size = FPDF_GetTrailerEnds(document(), ends.data(), ends.size());
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  EXPECT_EQ(0U, ends[0]);
+  EXPECT_EQ(1U, ends[1]);
+}
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsHelloWorld) {
+  // Single trailer, \n line ending at the trailer end.
+  ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{840};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+}
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsAnnotationStamp) {
+  // Multiple trailers, \r\n line ending at the trailer ends.
+  ASSERT_TRUE(OpenDocument("annotation_stamp_with_ap.pdf"));
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{441, 7945, 101719};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+}
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsLinearized) {
+  // Set up linearized PDF.
+  FileAccessForTesting file_acc("linearized.pdf");
+  FakeFileAccess fake_acc(&file_acc);
+  avail_ = FPDFAvail_Create(fake_acc.GetFileAvail(), fake_acc.GetFileAccess());
+  fake_acc.SetWholeFileAvailable();
+
+  // Multiple trailers, \r line ending at the trailer ends (no \n).
+  document_ = FPDFAvail_GetDocument(avail_, nullptr);
+  ASSERT_TRUE(document());
+
+  // FPDF_GetTrailerEnds() positive testing.
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{474, 11384};
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  ASSERT_EQ(kExpectedEnds, ends);
+}
diff --git a/public/fpdfview.h b/public/fpdfview.h
index 27df9bd..e996d4a 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h
@@ -606,6 +606,25 @@
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
 FPDF_DocumentHasValidCrossReferenceTable(FPDF_DOCUMENT document);
 
+// Experimental API.
+// Function: FPDF_GetTrailerEnds
+//          Get the byte offsets of trailer ends.
+// Parameters:
+//          document    -   Handle to document. Returned by FPDF_LoadDocument().
+//          buffer      -   The address of a buffer that receives the
+//                          byte offsets.
+//          length      -   The size, in ints, of |buffer|.
+// Return value:
+//          Returns the number of ints in the buffer on success, 0 on error.
+//
+// |buffer| is an array of integers that describes the exact byte offsets of the
+// trailer ends in the document. If |length| is less than the returned length,
+// or |document| or |buffer| is NULL, |buffer| will not be modified.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetTrailerEnds(FPDF_DOCUMENT document,
+                    unsigned int* buffer,
+                    unsigned long length);
+
 // Function: FPDF_GetDocPermission
 //          Get file permission flags of the document.
 // Parameters: