Fix CPDF_SimpleParser skipping a '>' delimiter CPDF_SimpleParser mistakenly increments the current position too many times when handling a '<' delimiter. Fix this and refactor the logic to make it easier to understand. Bug: 358381390 Change-Id: I65e28a03f6ded19537f64d0ad94774ce6c3c8f83 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/123415 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Andy Phan <andyphan@chromium.org>

commit: 5706997dfea1966a43eb6502782a9c754f6e665c [log] [tgz]
author: Andy Phan <andyphan@chromium.org> Mon Aug 19 17:47:10 2024 +0000
committer: Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com> Mon Aug 19 17:47:10 2024 +0000
tree: 9a5445d7a79930d4b0792bf30fba59769ff2333b
parent: 4bb56452605ff5ea572114a29817012dd5ddebbf [diff]
diff --git a/core/fpdfapi/parser/cpdf_simple_parser.cpp b/core/fpdfapi/parser/cpdf_simple_parser.cpp
index 6df95b4..13d9872 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser.cpp

@@ -104,14 +104,15 @@
     return GetDataToCurrentPosition(start_position);
   }
 
-  if (data_[cur_position_++] != '<') {
-    while (cur_position_ < data_.size() && data_[cur_position_] != '>') {
-      ++cur_position_;
-    }
+  uint8_t cur_char = data_[cur_position_++];
+  // Stop parsing if encountering "<<".
+  if (cur_char == '<') {
+    return GetDataToCurrentPosition(start_position);
+  }
 
-    if (cur_position_ < data_.size()) {
-      ++cur_position_;
-    }
+  // Continue parsing until end of `data_` or closing bracket.
+  while (cur_position_ < data_.size() && cur_char != '>') {
+    cur_char = data_[cur_position_++];
   }
   return GetDataToCurrentPosition(start_position);
 }

diff --git a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
index 48439d8..7f09375 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp

@@ -77,9 +77,9 @@
   EXPECT_EQ(parser.GetWord(), "1");
   EXPECT_EQ(parser.GetWord(), "beginbfchar");
   EXPECT_EQ(parser.GetWord(), "<01>");
-  // TODO(crbug.com/358381390): Should parse to:
-  // {"<>", "endbfchar", "1", "beginbfchar"}
-  // Note that the span below includes the NUL at the end of the string.
-  const char kWrongResult[] = "<>\nendbfchar\n1 beginbfchar";
-  EXPECT_EQ(parser.GetWord(), ByteStringView(pdfium::make_span(kWrongResult)));
+  EXPECT_EQ(parser.GetWord(), "<>");
+  EXPECT_EQ(parser.GetWord(), "endbfchar");
+  EXPECT_EQ(parser.GetWord(), "1");
+  EXPECT_EQ(parser.GetWord(), "beginbfchar");
+  EXPECT_EQ(parser.GetWord(), "");
 }
commit	5706997dfea1966a43eb6502782a9c754f6e665c	[log] [tgz]
author	Andy Phan <andyphan@chromium.org>	Mon Aug 19 17:47:10 2024 +0000
committer	Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com>	Mon Aug 19 17:47:10 2024 +0000
tree	9a5445d7a79930d4b0792bf30fba59769ff2333b
parent	4bb56452605ff5ea572114a29817012dd5ddebbf [diff]