Ignore whitespace in ToUnicode codepoint strings

White-space characters shall be ignored in Hexadecimal Strings in
ToUnicode CMaps. "Make CPDF_ToUnicodeMap::StringToCode() tolerate
whitespaces" [0] allowed white-space in character codes but not in the
code point strings. This change ignores any whitespace present in a code
point string and simplifies ignoring it in character codes.

[0] https://pdfium.googlesource.com/pdfium/+/bf9170ee47d4f656964252898787152edc0345cd

Bug: 42270019
Change-Id: I13e8f2b5e495a732d11b20759bbcde99828880ea
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/133030
Reviewed-by: Ben Wagner <bungeman@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Ben Wagner <bungeman@google.com>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index a105486..f71d21c 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -91,28 +91,7 @@
 }
 
 // static
-std::optional<uint32_t> CPDF_ToUnicodeMap::StringToCode(ByteStringView input) {
-  // Ignore whitespaces within `input`. See https://crbug.com/pdfium/2065.
-  std::set<char> seen_whitespace_chars;
-  for (char c : input) {
-    if (PDFCharIsWhitespace(c)) {
-      seen_whitespace_chars.insert(c);
-    }
-  }
-  ByteString str_without_whitespace_chars;  // Must outlive `str`.
-  ByteStringView str;
-  if (seen_whitespace_chars.empty()) {
-    str = input;
-  } else {
-    str_without_whitespace_chars.Reserve(input.GetLength());
-    for (char c : input) {
-      if (!pdfium::Contains(seen_whitespace_chars, c)) {
-        str_without_whitespace_chars += c;
-      }
-    }
-    str = str_without_whitespace_chars.AsStringView();
-  }
-
+std::optional<uint32_t> CPDF_ToUnicodeMap::StringToCode(ByteStringView str) {
   size_t len = str.GetLength();
   if (len <= 2 || str[0] != '<' || str[len - 1] != '>') {
     return std::nullopt;
@@ -120,6 +99,10 @@
 
   FX_SAFE_UINT32 code = 0;
   for (char c : str.Substr(1, len - 2)) {
+    // Ignore whitespace https://crbug.com/pdfium/2065
+    if (PDFCharIsWhitespace(c)) {
+      continue;
+    }
     if (!FXSYS_IsHexDigit(c)) {
       return std::nullopt;
     }
@@ -143,6 +126,10 @@
   int byte_pos = 0;
   wchar_t ch = 0;
   for (char c : str.Substr(1, len - 2)) {
+    // Ignore whitespace https://crbug.com/pdfium/1022
+    if (PDFCharIsWhitespace(c)) {
+      continue;
+    }
     if (!FXSYS_IsHexDigit(c)) {
       break;
     }
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index 3a2c48e..ba73b82 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -34,7 +34,7 @@
   friend class CPDFToUnicodeMapTest_StringToCode_Test;
   friend class CPDFToUnicodeMapTest_StringToWideString_Test;
 
-  static std::optional<uint32_t> StringToCode(ByteStringView input);
+  static std::optional<uint32_t> StringToCode(ByteStringView str);
   static WideString StringToWideString(ByteStringView str);
 
   void Load(RetainPtr<const CPDF_Stream> pStream);
diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
index 3081ad4..ce04af5 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
@@ -52,11 +52,14 @@
   WideString res = L"\xc2ab";
   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
 
   res += L"\xfaab";
   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab FaAb>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab FaAb12>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab FaAb 12>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("< c 2 a b  F a A b  1 2 >"));
 }
 
 TEST(CPDFToUnicodeMapTest, HandleBeginBFCharBadCount) {