Ignore whitespace in ToUnicode codepoint strings
White-space characters shall be ignored in Hexadecimal Strings in
ToUnicode CMaps. "Make CPDF_ToUnicodeMap::StringToCode() tolerate
whitespaces" [0] allowed white-space in character codes but not in the
code point strings. This change ignores any whitespace present in a code
point string and simplifies ignoring it in character codes.
[0] https://pdfium.googlesource.com/pdfium/+/bf9170ee47d4f656964252898787152edc0345cd
Bug: 42270019
Change-Id: I13e8f2b5e495a732d11b20759bbcde99828880ea
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/133030
Reviewed-by: Ben Wagner <bungeman@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Ben Wagner <bungeman@google.com>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index a105486..f71d21c 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -91,28 +91,7 @@
}
// static
-std::optional<uint32_t> CPDF_ToUnicodeMap::StringToCode(ByteStringView input) {
- // Ignore whitespaces within `input`. See https://crbug.com/pdfium/2065.
- std::set<char> seen_whitespace_chars;
- for (char c : input) {
- if (PDFCharIsWhitespace(c)) {
- seen_whitespace_chars.insert(c);
- }
- }
- ByteString str_without_whitespace_chars; // Must outlive `str`.
- ByteStringView str;
- if (seen_whitespace_chars.empty()) {
- str = input;
- } else {
- str_without_whitespace_chars.Reserve(input.GetLength());
- for (char c : input) {
- if (!pdfium::Contains(seen_whitespace_chars, c)) {
- str_without_whitespace_chars += c;
- }
- }
- str = str_without_whitespace_chars.AsStringView();
- }
-
+std::optional<uint32_t> CPDF_ToUnicodeMap::StringToCode(ByteStringView str) {
size_t len = str.GetLength();
if (len <= 2 || str[0] != '<' || str[len - 1] != '>') {
return std::nullopt;
@@ -120,6 +99,10 @@
FX_SAFE_UINT32 code = 0;
for (char c : str.Substr(1, len - 2)) {
+ // Ignore whitespace https://crbug.com/pdfium/2065
+ if (PDFCharIsWhitespace(c)) {
+ continue;
+ }
if (!FXSYS_IsHexDigit(c)) {
return std::nullopt;
}
@@ -143,6 +126,10 @@
int byte_pos = 0;
wchar_t ch = 0;
for (char c : str.Substr(1, len - 2)) {
+ // Ignore whitespace https://crbug.com/pdfium/1022
+ if (PDFCharIsWhitespace(c)) {
+ continue;
+ }
if (!FXSYS_IsHexDigit(c)) {
break;
}
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index 3a2c48e..ba73b82 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -34,7 +34,7 @@
friend class CPDFToUnicodeMapTest_StringToCode_Test;
friend class CPDFToUnicodeMapTest_StringToWideString_Test;
- static std::optional<uint32_t> StringToCode(ByteStringView input);
+ static std::optional<uint32_t> StringToCode(ByteStringView str);
static WideString StringToWideString(ByteStringView str);
void Load(RetainPtr<const CPDF_Stream> pStream);
diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
index 3081ad4..ce04af5 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
@@ -52,11 +52,14 @@
WideString res = L"\xc2ab";
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
- EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
res += L"\xfaab";
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
+ EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab FaAb>"));
+ EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab FaAb12>"));
+ EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab FaAb 12>"));
+ EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("< c 2 a b F a A b 1 2 >"));
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFCharBadCount) {