Add string validations in StringToCode() and StringToWideString(). Add the following validations in CPDF_ToUnicodeMap::StringToCode(): (1) Check whether the string has matching "<" and ">". (2) Check whether the string represents a valid hexadecimal number. (3) Detect integer overflow. In CPDF_ToUnicodeMap::StringToWideString(), add a check for matching "<" and ">". No need to add invalid hex character check since Acrobat and Okular both ignore the rest of the string when encountering invalid characters. This CL also updates the unit test for StringToCode() and StringToWideString() according to the changes above. Bug: pdfium:1422 Change-Id: Ie3608515670b8d1efd6909b6f465e2d0e2e7ebf6 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/62550 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Hui Yingst <nigi@chromium.org>

commit: f53184f1be028b67d850f3b0aa0cf1bec3c6b84d [log] [tgz]
author: Hui Yingst <nigi@chromium.org> Mon Jan 13 15:38:07 2020 +0000
committer: Chromium commit bot <commit-bot@chromium.org> Mon Jan 13 15:38:07 2020 +0000
tree: bf64349653585fad2ff7317c1d57f18ccc5d3574
parent: 801b95f34349ff15e4e2650ae86beea2bda5eeef [diff]
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index acef5e4..2bd5798 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp

@@ -74,27 +74,35 @@
 // static
 pdfium::Optional<uint32_t> CPDF_ToUnicodeMap::StringToCode(ByteStringView str) {
   size_t len = str.GetLength();
-  if (len == 0 || str[0] != '<')
+  if (len <= 2 || str[0] != '<' || str[len - 1] != '>')
     return pdfium::nullopt;
 
-  uint32_t result = 0;
-  for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i) {
-    result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+  FX_SAFE_UINT32 code = 0;
+  for (char c : str.Mid(1, len - 2)) {
+    if (!FXSYS_IsHexDigit(c))
+      return pdfium::nullopt;
+
+    code = code * 16 + FXSYS_HexCharToInt(c);
+    if (!code.IsValid())
+      return pdfium::nullopt;
   }
-  return result;
+  return pdfium::Optional<uint32_t>(code.ValueOrDie());
 }
 
 // static
 WideString CPDF_ToUnicodeMap::StringToWideString(ByteStringView str) {
   size_t len = str.GetLength();
-  if (len == 0 || str[0] != '<')
+  if (len <= 2 || str[0] != '<' || str[len - 1] != '>')
     return WideString();
 
   WideString result;
   int byte_pos = 0;
   wchar_t ch = 0;
-  for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i) {
-    ch = ch * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+  for (char c : str.Mid(1, len - 2)) {
+    if (!FXSYS_IsHexDigit(c))
+      break;
+
+    ch = ch * 16 + FXSYS_HexCharToInt(c);
     byte_pos++;
     if (byte_pos == 4) {
       result += ch;
@@ -140,25 +148,32 @@
     if (word.IsEmpty() || word == "endbfchar")
       return;
 
-    SetCode(StringToCode(word).value_or(0),
-            StringToWideString(pParser->GetWord()));
+    pdfium::Optional<uint32_t> code = StringToCode(word);
+    if (!code.has_value())
+      return;
+
+    SetCode(code.value(), StringToWideString(pParser->GetWord()));
   }
 }
 
 void CPDF_ToUnicodeMap::HandleBeginBFRange(CPDF_SimpleParser* pParser) {
   while (1) {
-    ByteStringView low = pParser->GetWord();
-    if (low.IsEmpty() || low == "endbfrange")
+    ByteStringView lowcode_str = pParser->GetWord();
+    if (lowcode_str.IsEmpty() || lowcode_str == "endbfrange")
       return;
 
-    ByteStringView high = pParser->GetWord();
-    uint32_t lowcode = StringToCode(low).value_or(0);
-    uint32_t highcode =
-        (lowcode & 0xffffff00) | (StringToCode(high).value_or(0) & 0xff);
-
-    if (highcode == 0xffffffff)
+    pdfium::Optional<uint32_t> lowcode_opt = StringToCode(lowcode_str);
+    if (!lowcode_opt.has_value())
       return;
 
+    ByteStringView highcode_str = pParser->GetWord();
+    pdfium::Optional<uint32_t> highcode_opt = StringToCode(highcode_str);
+    if (!highcode_opt.has_value())
+      return;
+
+    uint32_t lowcode = lowcode_opt.value();
+    uint32_t highcode = (lowcode & 0xffffff00) | (highcode_opt.value() & 0xff);
+
     ByteStringView start = pParser->GetWord();
     if (start == "[") {
       for (uint32_t code = lowcode; code <= highcode; code++)
@@ -169,7 +184,11 @@
 
     WideString destcode = StringToWideString(start);
     if (destcode.GetLength() == 1) {
-      uint32_t value = StringToCode(start).value_or(0);
+      pdfium::Optional<uint32_t> value_or_error = StringToCode(start);
+      if (!value_or_error.has_value())
+        return;
+
+      uint32_t value = value_or_error.value();
       for (uint32_t code = lowcode; code <= highcode; code++)
         m_Map[code] = value++;
     } else {

diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
index 891fa08..7dacc5a 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp

@@ -4,36 +4,46 @@
 
 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
 
+#include "testing/gmock/include/gmock/gmock.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
 TEST(cpdf_tounicodemap, StringToCode) {
-  Optional<uint32_t> result = CPDF_ToUnicodeMap::StringToCode("<c2");
-  ASSERT_TRUE(result.has_value());
-  EXPECT_EQ(194u, result.value());
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
+              testing::Optional(2802u));
+  EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
+              testing::Optional(4294967295u));
 
-  result = CPDF_ToUnicodeMap::StringToCode("<A2");
-  ASSERT_TRUE(result.has_value());
-  EXPECT_EQ(162u, result.value());
+  // Integer overflow
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
 
-  result = CPDF_ToUnicodeMap::StringToCode("<Af2");
-  ASSERT_TRUE(result.has_value());
-  EXPECT_EQ(2802u, result.value());
-
+  // Invalid string
   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
+  EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
 }
 
 TEST(cpdf_tounicodemap, StringToWideString) {
   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
-
   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
+  EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
+  EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
 
   WideString res = L"\xc2ab";
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab"));
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab"));
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
 
   res += L"\xfaab";
-  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
+  EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
 }
commit	f53184f1be028b67d850f3b0aa0cf1bec3c6b84d	[log] [tgz]
author	Hui Yingst <nigi@chromium.org>	Mon Jan 13 15:38:07 2020 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	Mon Jan 13 15:38:07 2020 +0000
tree	bf64349653585fad2ff7317c1d57f18ccc5d3574
parent	801b95f34349ff15e4e2650ae86beea2bda5eeef [diff]