Clean up CPDF_ToUnicodeMap.

- Make Load() private and call it from the ctor.
- Rearrange some methods to do early returns.
- Fix various nits.

Change-Id: Id3058dcbdda63ec2bbf92e4f18368c4b498590f2
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/59259
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_font.cpp b/core/fpdfapi/font/cpdf_font.cpp
index e075484..28e5ffb 100644
--- a/core/fpdfapi/font/cpdf_font.cpp
+++ b/core/fpdfapi/font/cpdf_font.cpp
@@ -273,8 +273,7 @@
   if (!pStream)
     return;
 
-  m_pToUnicodeMap = pdfium::MakeUnique<CPDF_ToUnicodeMap>();
-  m_pToUnicodeMap->Load(pStream);
+  m_pToUnicodeMap = pdfium::MakeUnique<CPDF_ToUnicodeMap>(pStream);
 }
 
 uint32_t CPDF_Font::GetStringWidth(ByteStringView pString) {
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index ce503b5..aa6a421 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -20,11 +20,10 @@
 
 WideString StringDataAdd(WideString str) {
   WideString ret;
-  int len = str.GetLength();
   wchar_t value = 1;
-  for (int i = len - 1; i >= 0; --i) {
-    wchar_t ch = str[i] + value;
-    if (ch < str[i]) {
+  for (size_t i = str.GetLength(); i > 0; --i) {
+    wchar_t ch = str[i - 1] + value;
+    if (ch < str[i - 1]) {
       ret.InsertAtFront(0);
     } else {
       ret.InsertAtFront(ch);
@@ -38,37 +37,39 @@
 
 }  // namespace
 
-CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {}
+CPDF_ToUnicodeMap::CPDF_ToUnicodeMap(const CPDF_Stream* pStream) {
+  Load(pStream);
+}
 
-CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {}
+CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() = default;
 
 WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const {
   auto it = m_Map.find(charcode);
-  if (it != m_Map.end()) {
-    uint32_t value = it->second;
-    wchar_t unicode = (wchar_t)(value & 0xffff);
-    if (unicode != 0xffff) {
-      return unicode;
-    }
-    const wchar_t* buf = m_MultiCharBuf.GetBuffer();
-    uint32_t buf_len = m_MultiCharBuf.GetLength();
-    if (!buf || buf_len == 0) {
+  if (it == m_Map.end()) {
+    if (!m_pBaseMap)
       return WideString();
-    }
-    uint32_t index = value >> 16;
-    if (index >= buf_len) {
-      return WideString();
-    }
-    uint32_t len = buf[index];
-    if (index + len < index || index + len >= buf_len) {
-      return WideString();
-    }
-    return WideString(buf + index + 1, len);
+    return m_pBaseMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
   }
-  if (m_pBaseMap) {
-    return m_pBaseMap->UnicodeFromCID((uint16_t)charcode);
-  }
-  return WideString();
+
+  uint32_t value = it->second;
+  wchar_t unicode = static_cast<wchar_t>(value & 0xffff);
+  if (unicode != 0xffff)
+    return unicode;
+
+  const wchar_t* buf = m_MultiCharBuf.GetBuffer();
+  uint32_t buf_len = m_MultiCharBuf.GetLength();
+  if (!buf || buf_len == 0)
+    return WideString();
+
+  uint32_t index = value >> 16;
+  if (index >= buf_len)
+    return WideString();
+
+  uint32_t len = buf[index];
+  if (index + len < index || index + len >= buf_len)
+    return WideString();
+
+  return WideString(buf + index + 1, len);
 }
 
 uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
@@ -79,45 +80,42 @@
   return 0;
 }
 
-// Static.
+// static
 uint32_t CPDF_ToUnicodeMap::StringToCode(ByteStringView str) {
-  int len = str.GetLength();
+  size_t len = str.GetLength();
   if (len == 0)
     return 0;
 
   uint32_t result = 0;
   if (str[0] == '<') {
-    for (int i = 1; i < len && std::isxdigit(str[i]); ++i)
+    for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i)
       result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
     return result;
   }
 
-  for (int i = 0; i < len && std::isdigit(str[i]); ++i)
+  for (size_t i = 0; i < len && std::isdigit(str[i]); ++i)
     result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i));
 
   return result;
 }
 
-// Static.
+// static
 WideString CPDF_ToUnicodeMap::StringToWideString(ByteStringView str) {
-  int len = str.GetLength();
-  if (len == 0)
+  size_t len = str.GetLength();
+  if (len == 0 || str[0] != '<')
     return WideString();
 
   WideString result;
-  if (str[0] == '<') {
-    int byte_pos = 0;
-    wchar_t ch = 0;
-    for (int i = 1; i < len && std::isxdigit(str[i]); ++i) {
-      ch = ch * 16 + FXSYS_HexCharToInt(str[i]);
-      byte_pos++;
-      if (byte_pos == 4) {
-        result += ch;
-        byte_pos = 0;
-        ch = 0;
-      }
+  int byte_pos = 0;
+  wchar_t ch = 0;
+  for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i) {
+    ch = ch * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+    byte_pos++;
+    if (byte_pos == 4) {
+      result += ch;
+      byte_pos = 0;
+      ch = 0;
     }
-    return result;
   }
   return result;
 }
@@ -129,84 +127,79 @@
   CPDF_SimpleParser parser(pAcc->GetSpan());
   while (1) {
     ByteStringView word = parser.GetWord();
-    if (word.IsEmpty()) {
+    if (word.IsEmpty())
       break;
-    }
+
     if (word == "beginbfchar") {
       while (1) {
         word = parser.GetWord();
-        if (word.IsEmpty() || word == "endbfchar") {
+        if (word.IsEmpty() || word == "endbfchar")
           break;
-        }
+
         uint32_t srccode = StringToCode(word);
         word = parser.GetWord();
         WideString destcode = StringToWideString(word);
-        int len = destcode.GetLength();
-        if (len == 0) {
+        size_t len = destcode.GetLength();
+        if (len == 0)
           continue;
-        }
+
         if (len == 1) {
           m_Map[srccode] = destcode[0];
         } else {
           m_Map[srccode] = GetUnicode();
-          m_MultiCharBuf.AppendChar(destcode.GetLength());
+          m_MultiCharBuf.AppendChar(len);
           m_MultiCharBuf << destcode;
         }
       }
     } else if (word == "beginbfrange") {
       while (1) {
-        ByteString low, high;
-        low = parser.GetWord();
-        if (low.IsEmpty() || low == "endbfrange") {
+        ByteStringView low = parser.GetWord();
+        if (low.IsEmpty() || low == "endbfrange")
           break;
-        }
-        high = parser.GetWord();
-        uint32_t lowcode = StringToCode(low.AsStringView());
+
+        ByteStringView high = parser.GetWord();
+        uint32_t lowcode = StringToCode(low);
         uint32_t highcode =
-            (lowcode & 0xffffff00) | (StringToCode(high.AsStringView()) & 0xff);
-        if (highcode == (uint32_t)-1) {
+            (lowcode & 0xffffff00) | (StringToCode(high) & 0xff);
+        if (highcode == 0xffffffff)
           break;
-        }
-        ByteString start(parser.GetWord());
+
+        ByteStringView start = parser.GetWord();
         if (start == "[") {
           for (uint32_t code = lowcode; code <= highcode; code++) {
-            ByteString dest(parser.GetWord());
-            WideString destcode = StringToWideString(dest.AsStringView());
-            int len = destcode.GetLength();
-            if (len == 0) {
+            ByteStringView dest = parser.GetWord();
+            WideString destcode = StringToWideString(dest);
+            size_t len = destcode.GetLength();
+            if (len == 0)
               continue;
-            }
+
             if (len == 1) {
               m_Map[code] = destcode[0];
             } else {
               m_Map[code] = GetUnicode();
-              m_MultiCharBuf.AppendChar(destcode.GetLength());
+              m_MultiCharBuf.AppendChar(len);
               m_MultiCharBuf << destcode;
             }
           }
           parser.GetWord();
+          continue;
+        }
+
+        WideString destcode = StringToWideString(start);
+        size_t len = destcode.GetLength();
+        uint32_t value = 0;
+        if (len == 1) {
+          value = StringToCode(start);
+          for (uint32_t code = lowcode; code <= highcode; code++)
+            m_Map[code] = value++;
         } else {
-          WideString destcode = StringToWideString(start.AsStringView());
-          int len = destcode.GetLength();
-          uint32_t value = 0;
-          if (len == 1) {
-            value = StringToCode(start.AsStringView());
-            for (uint32_t code = lowcode; code <= highcode; code++) {
-              m_Map[code] = value++;
-            }
-          } else {
-            for (uint32_t code = lowcode; code <= highcode; code++) {
-              WideString retcode;
-              if (code == lowcode) {
-                retcode = destcode;
-              } else {
-                retcode = StringDataAdd(destcode);
-              }
-              m_Map[code] = GetUnicode();
-              m_MultiCharBuf.AppendChar(retcode.GetLength());
-              m_MultiCharBuf << retcode;
-              destcode = std::move(retcode);
-            }
+          for (uint32_t code = lowcode; code <= highcode; code++) {
+            WideString retcode =
+                code == lowcode ? destcode : StringDataAdd(destcode);
+            m_Map[code] = GetUnicode();
+            m_MultiCharBuf.AppendChar(retcode.GetLength());
+            m_MultiCharBuf << retcode;
+            destcode = std::move(retcode);
           }
         }
       }
@@ -221,15 +214,12 @@
     }
   }
   if (cid_set) {
-    m_pBaseMap =
-        CPDF_FontGlobals::GetInstance()->GetCMapManager()->GetCID2UnicodeMap(
-            cid_set);
-  } else {
-    m_pBaseMap = nullptr;
+    auto* manager = CPDF_FontGlobals::GetInstance()->GetCMapManager();
+    m_pBaseMap = manager->GetCID2UnicodeMap(cid_set);
   }
 }
 
-uint32_t CPDF_ToUnicodeMap::GetUnicode() {
+uint32_t CPDF_ToUnicodeMap::GetUnicode() const {
   FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
   uni = uni * 0x10000 + 0xffff;
   return uni.ValueOrDefault(0);
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index 7be9d19..753a985 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -17,11 +17,9 @@
 
 class CPDF_ToUnicodeMap {
  public:
-  CPDF_ToUnicodeMap();
+  explicit CPDF_ToUnicodeMap(const CPDF_Stream* pStream);
   ~CPDF_ToUnicodeMap();
 
-  void Load(const CPDF_Stream* pStream);
-
   WideString Lookup(uint32_t charcode) const;
   uint32_t ReverseLookup(wchar_t unicode) const;
 
@@ -32,7 +30,8 @@
   static uint32_t StringToCode(ByteStringView str);
   static WideString StringToWideString(ByteStringView str);
 
-  uint32_t GetUnicode();
+  void Load(const CPDF_Stream* pStream);
+  uint32_t GetUnicode() const;
 
   std::map<uint32_t, uint32_t> m_Map;
   UnownedPtr<const CPDF_CID2UnicodeMap> m_pBaseMap;