Avoid interspersed length/value buffer in CPDF_ToUnicodeMap.

Somewhat less memory efficient, but simpler and safer, avoids casting
character values to lengths. Avoids allocations and copies from buffer
when returning multi-char strings.

-- Fix some IWYU in headers.
-- Explicitly construct single-char strings.
-- Rename GetUnicode() method to describe what it actually returns.

Change-Id: I36a8a4d7d79321e01b5f3e8bd20fa917c7f92850
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/88070
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index d1df1f8..99409e2 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -54,13 +54,10 @@
   uint32_t value = it->second;
   wchar_t unicode = static_cast<wchar_t>(value & 0xffff);
   if (unicode != 0xffff)
-    return unicode;
+    return WideString(unicode);
 
-  WideStringView buf = m_MultiCharBuf.AsStringView();
   size_t index = value >> 16;
-  if (!buf.IsValidIndex(index))
-    return WideString();
-  return WideString(buf.Substr(index + 1, buf[index]));
+  return index < m_MultiCharVec.size() ? m_MultiCharVec[index] : WideString();
 }
 
 uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
@@ -195,17 +192,16 @@
       for (uint32_t code = lowcode; code <= highcode; code++) {
         WideString retcode =
             code == lowcode ? destcode : StringDataAdd(destcode);
-        m_Multimap.emplace(code, GetUnicode());
-        m_MultiCharBuf.AppendChar(retcode.GetLength());
-        m_MultiCharBuf << retcode;
+        m_Multimap.emplace(code, GetMultiCharIndexIndicator());
+        m_MultiCharVec.push_back(retcode);
         destcode = std::move(retcode);
       }
     }
   }
 }
 
-uint32_t CPDF_ToUnicodeMap::GetUnicode() const {
-  FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
+uint32_t CPDF_ToUnicodeMap::GetMultiCharIndexIndicator() const {
+  FX_SAFE_UINT32 uni = m_MultiCharVec.size();
   uni = uni * 0x10000 + 0xffff;
   return uni.ValueOrDefault(0);
 }
@@ -218,8 +214,7 @@
   if (len == 1) {
     m_Multimap.emplace(srccode, destcode[0]);
   } else {
-    m_Multimap.emplace(srccode, GetUnicode());
-    m_MultiCharBuf.AppendChar(len);
-    m_MultiCharBuf << destcode;
+    m_Multimap.emplace(srccode, GetMultiCharIndexIndicator());
+    m_MultiCharVec.push_back(destcode);
   }
 }
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index bd6f075..106bb00 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -8,9 +8,11 @@
 #define CORE_FPDFAPI_FONT_CPDF_TOUNICODEMAP_H_
 
 #include <map>
+#include <vector>
 
-#include "core/fxcrt/cfx_widetextbuf.h"
+#include "core/fxcrt/fx_string.h"
 #include "core/fxcrt/unowned_ptr.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
 
 class CPDF_CID2UnicodeMap;
 class CPDF_SimpleParser;
@@ -34,12 +36,12 @@
   void Load(const CPDF_Stream* pStream);
   void HandleBeginBFChar(CPDF_SimpleParser* pParser);
   void HandleBeginBFRange(CPDF_SimpleParser* pParser);
-  uint32_t GetUnicode() const;
+  uint32_t GetMultiCharIndexIndicator() const;
   void SetCode(uint32_t srccode, WideString destcode);
 
   std::multimap<uint32_t, uint32_t> m_Multimap;
   UnownedPtr<const CPDF_CID2UnicodeMap> m_pBaseMap;
-  CFX_WideTextBuf m_MultiCharBuf;
+  std::vector<WideString> m_MultiCharVec;
 };
 
 #endif  // CORE_FPDFAPI_FONT_CPDF_TOUNICODEMAP_H_