FPDF_GetTrailerEnds: make this not depend on whitespace

PDF-1.7 calls out no bytes other than whitespace when specifying what
can occur between endstream and endobj, so whitespace needs to be
handled. When CPDF_SyntaxParser::ReadStream() reads the stream, it reads
'endobj', and then resets the position back to the end of 'endstream'.
This mechanism is disabled in case there is whitespace between the
tokens and the newline, see the end of the function.

This results in reporting no trailer ends, as the parsing fails, as the
next token is expected to be 'endobj', but it's the ID of the next
object instead.

Fix the problem by handling whitespace in
CPDF_SyntaxParser::ReadStream() where it was looking for \ntoken\n, not
allowing whitespace between the token and the following newline.

Change-Id: I7048e8d081af04af3dd08d957212c885b7982b5e
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/76850
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
index 06389bc..5318efd 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
@@ -795,6 +795,14 @@
   memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
   GetNextWordInternal(nullptr);
 
+  // Allow whitespace after endstream and before a newline.
+  unsigned char ch = 0;
+  while (GetNextChar(ch)) {
+    if (!PDFCharIsWhitespace(ch) || PDFCharIsLineEnding(ch))
+      break;
+  }
+  SetPos(GetPos() - 1);
+
   int numMarkers = ReadEOLMarkers(GetPos());
   if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
       numMarkers != 0 &&
diff --git a/fpdfsdk/fpdf_view_embeddertest.cpp b/fpdfsdk/fpdf_view_embeddertest.cpp
index 14cfa2a..f755162 100644
--- a/fpdfsdk/fpdf_view_embeddertest.cpp
+++ b/fpdfsdk/fpdf_view_embeddertest.cpp
@@ -1714,3 +1714,17 @@
   ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
   ASSERT_EQ(kExpectedEnds, ends);
 }
+
+TEST_F(FPDFViewEmbedderTest, GetTrailerEndsWhitespace) {
+  // Whitespace between 'endstream'/'endobj' and the newline.
+  ASSERT_TRUE(OpenDocument("trailer_end_trailing_space.pdf"));
+
+  unsigned long size = FPDF_GetTrailerEnds(document(), nullptr, 0);
+  const std::vector<unsigned int> kExpectedEnds{1193};
+  // Without the accompanying fix in place, this test would have failed, as the
+  // size was 0, not 1, i.e. no trailer ends were found.
+  ASSERT_EQ(kExpectedEnds.size(), size);
+  std::vector<unsigned int> ends(size);
+  ASSERT_EQ(size, FPDF_GetTrailerEnds(document(), ends.data(), size));
+  EXPECT_EQ(kExpectedEnds, ends);
+}
diff --git a/testing/resources/trailer_end_trailing_space.in b/testing/resources/trailer_end_trailing_space.in
new file mode 100644
index 0000000..0233b75
--- /dev/null
+++ b/testing/resources/trailer_end_trailing_space.in
@@ -0,0 +1,86 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [0 0 200 200]
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents 6 0 R
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+% Single space after endstream and endobj.
+{{object 6 0}} <<
+  {{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream 
+endobj 
+% Multiple spaces after endstream and endobj.
+{{object 6 0}} <<
+  {{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream  
+endobj  
+% Tab after endstream and endobj.
+{{object 6 0}} <<
+  {{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream	
+endobj	
+{{object 5 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/trailer_end_trailing_space.pdf b/testing/resources/trailer_end_trailing_space.pdf
new file mode 100644
index 0000000..e0ec6e4
--- /dev/null
+++ b/testing/resources/trailer_end_trailing_space.pdf
@@ -0,0 +1,99 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [0 0 200 200]
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents 6 0 R
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+% Single space after endstream and endobj.
+6 0 obj <<
+  /Length 83
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream 
+endobj 
+% Multiple spaces after endstream and endobj.
+6 0 obj <<
+  /Length 83
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream  
+endobj  
+% Tab after endstream and endobj.
+6 0 obj <<
+  /Length 83
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream	
+endobj	
+5 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+xref
+0 7
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000157 00000 n 
+0000000299 00000 n 
+0000000910 00000 n 
+0000000774 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 7
+>>
+startxref
+986
+%%EOF