Fix CPDF_SimpleParser skipping a '>' delimiter
CPDF_SimpleParser mistakenly increments the current position too many
times when handling a '<' delimiter. Fix this and refactor the logic to
make it easier to understand.
Bug: 358381390
Change-Id: I65e28a03f6ded19537f64d0ad94774ce6c3c8f83
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/123415
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Andy Phan <andyphan@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_simple_parser.cpp b/core/fpdfapi/parser/cpdf_simple_parser.cpp
index 6df95b4..13d9872 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser.cpp
@@ -104,14 +104,15 @@
return GetDataToCurrentPosition(start_position);
}
- if (data_[cur_position_++] != '<') {
- while (cur_position_ < data_.size() && data_[cur_position_] != '>') {
- ++cur_position_;
- }
+ uint8_t cur_char = data_[cur_position_++];
+ // Stop parsing if encountering "<<".
+ if (cur_char == '<') {
+ return GetDataToCurrentPosition(start_position);
+ }
- if (cur_position_ < data_.size()) {
- ++cur_position_;
- }
+ // Continue parsing until end of `data_` or closing bracket.
+ while (cur_position_ < data_.size() && cur_char != '>') {
+ cur_char = data_[cur_position_++];
}
return GetDataToCurrentPosition(start_position);
}
diff --git a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
index 48439d8..7f09375 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
@@ -77,9 +77,9 @@
EXPECT_EQ(parser.GetWord(), "1");
EXPECT_EQ(parser.GetWord(), "beginbfchar");
EXPECT_EQ(parser.GetWord(), "<01>");
- // TODO(crbug.com/358381390): Should parse to:
- // {"<>", "endbfchar", "1", "beginbfchar"}
- // Note that the span below includes the NUL at the end of the string.
- const char kWrongResult[] = "<>\nendbfchar\n1 beginbfchar";
- EXPECT_EQ(parser.GetWord(), ByteStringView(pdfium::make_span(kWrongResult)));
+ EXPECT_EQ(parser.GetWord(), "<>");
+ EXPECT_EQ(parser.GetWord(), "endbfchar");
+ EXPECT_EQ(parser.GetWord(), "1");
+ EXPECT_EQ(parser.GetWord(), "beginbfchar");
+ EXPECT_EQ(parser.GetWord(), "");
}