Add string validations in StringToCode() and StringToWideString().

Add the following validations in CPDF_ToUnicodeMap::StringToCode():
(1) Check whether the string has matching "<" and ">".
(2) Check whether the string represents a valid hexadecimal number.
(3) Detect integer overflow.

In CPDF_ToUnicodeMap::StringToWideString(), add a check for matching
"<" and ">". No need to add invalid hex character check since Acrobat
and Okular both ignore the rest of the string when encountering
invalid characters.

This CL also updates the unit test for StringToCode() and
StringToWideString() according to the changes above.

Bug: pdfium:1422
Change-Id: Ie3608515670b8d1efd6909b6f465e2d0e2e7ebf6
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/62550
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Hui Yingst <nigi@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index acef5e4..2bd5798 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -74,27 +74,35 @@
 // static
 pdfium::Optional<uint32_t> CPDF_ToUnicodeMap::StringToCode(ByteStringView str) {
   size_t len = str.GetLength();
-  if (len == 0 || str[0] != '<')
+  if (len <= 2 || str[0] != '<' || str[len - 1] != '>')
     return pdfium::nullopt;
 
-  uint32_t result = 0;
-  for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i) {
-    result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+  FX_SAFE_UINT32 code = 0;
+  for (char c : str.Mid(1, len - 2)) {
+    if (!FXSYS_IsHexDigit(c))
+      return pdfium::nullopt;
+
+    code = code * 16 + FXSYS_HexCharToInt(c);
+    if (!code.IsValid())
+      return pdfium::nullopt;
   }
-  return result;
+  return pdfium::Optional<uint32_t>(code.ValueOrDie());
 }
 
 // static
 WideString CPDF_ToUnicodeMap::StringToWideString(ByteStringView str) {
   size_t len = str.GetLength();
-  if (len == 0 || str[0] != '<')
+  if (len <= 2 || str[0] != '<' || str[len - 1] != '>')
     return WideString();
 
   WideString result;
   int byte_pos = 0;
   wchar_t ch = 0;
-  for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i) {
-    ch = ch * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+  for (char c : str.Mid(1, len - 2)) {
+    if (!FXSYS_IsHexDigit(c))
+      break;
+
+    ch = ch * 16 + FXSYS_HexCharToInt(c);
     byte_pos++;
     if (byte_pos == 4) {
       result += ch;
@@ -140,25 +148,32 @@
     if (word.IsEmpty() || word == "endbfchar")
       return;
 
-    SetCode(StringToCode(word).value_or(0),
-            StringToWideString(pParser->GetWord()));
+    pdfium::Optional<uint32_t> code = StringToCode(word);
+    if (!code.has_value())
+      return;
+
+    SetCode(code.value(), StringToWideString(pParser->GetWord()));
   }
 }
 
 void CPDF_ToUnicodeMap::HandleBeginBFRange(CPDF_SimpleParser* pParser) {
   while (1) {
-    ByteStringView low = pParser->GetWord();
-    if (low.IsEmpty() || low == "endbfrange")
+    ByteStringView lowcode_str = pParser->GetWord();
+    if (lowcode_str.IsEmpty() || lowcode_str == "endbfrange")
       return;
 
-    ByteStringView high = pParser->GetWord();
-    uint32_t lowcode = StringToCode(low).value_or(0);
-    uint32_t highcode =
-        (lowcode & 0xffffff00) | (StringToCode(high).value_or(0) & 0xff);
-
-    if (highcode == 0xffffffff)
+    pdfium::Optional<uint32_t> lowcode_opt = StringToCode(lowcode_str);
+    if (!lowcode_opt.has_value())
       return;
 
+    ByteStringView highcode_str = pParser->GetWord();
+    pdfium::Optional<uint32_t> highcode_opt = StringToCode(highcode_str);
+    if (!highcode_opt.has_value())
+      return;
+
+    uint32_t lowcode = lowcode_opt.value();
+    uint32_t highcode = (lowcode & 0xffffff00) | (highcode_opt.value() & 0xff);
+
     ByteStringView start = pParser->GetWord();
     if (start == "[") {
       for (uint32_t code = lowcode; code <= highcode; code++)
@@ -169,7 +184,11 @@
 
     WideString destcode = StringToWideString(start);
     if (destcode.GetLength() == 1) {
-      uint32_t value = StringToCode(start).value_or(0);
+      pdfium::Optional<uint32_t> value_or_error = StringToCode(start);
+      if (!value_or_error.has_value())
+        return;
+
+      uint32_t value = value_or_error.value();
       for (uint32_t code = lowcode; code <= highcode; code++)
         m_Map[code] = value++;
     } else {
diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
index 891fa08..7dacc5a 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
@@ -4,36 +4,46 @@
 
 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
 
+#include "testing/gmock/include/gmock/gmock.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
 TEST(cpdf_tounicodemap, StringToCode) {
-  Optional<uint32_t> result = CPDF_ToUnicodeMap::StringToCode("<c2");
-  ASSERT_TRUE(result.has_value());
-  EXPECT_EQ(194u, result.value());
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
+              testing::Optional(2802u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
+              testing::Optional(4294967295u));
 
-  result = CPDF_ToUnicodeMap::StringToCode("<A2");
-  ASSERT_TRUE(result.has_value());
-  EXPECT_EQ(162u, result.value());
+  // Integer overflow
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
 
-  result = CPDF_ToUnicodeMap::StringToCode("<Af2");
-  ASSERT_TRUE(result.has_value());
-  EXPECT_EQ(2802u, result.value());
-
+  // Invalid string
   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
 }
 
 TEST(cpdf_tounicodemap, StringToWideString) {
   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
-
   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
+  EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
+  EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
 
   WideString res = L"\xc2ab";
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab"));
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab"));
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
 
   res += L"\xfaab";
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
 }