Fix potential out-of-memory issues in CPDF_ToUnicodeMap.

Replaces direct std::multimap::emplace() calls with a new method
CPDF_ToUnicodeMap::InsertIntoMultimap() which checks for duplicate
mappings before inserting new entries into `m_Multimap`.

This prevents OOM issues caused by a large quantity of duplicate
cid-to-unicode mappings getting inserted in `m_Multimap`.

Bug: chromium:1245067
Change-Id: I4aad548b26c91da1dbef46758a5ce0966cd981cc
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/95390
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Nigi <nigi@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index 4ec5a3c..00714f1 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -6,6 +6,7 @@
 
 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
 
+#include <map>
 #include <utility>
 
 #include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
@@ -14,6 +15,7 @@
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_safe_types.h"
+#include "third_party/base/containers/contains.h"
 #include "third_party/base/numerics/safe_conversions.h"
 
 namespace {
@@ -190,7 +192,7 @@
       uint32_t value = value_or_error.value();
       for (FX_SAFE_UINT32 code = lowcode;
            code.IsValid() && code.ValueOrDie() <= highcode; code++) {
-        m_Multimap.emplace(code.ValueOrDie(), value++);
+        InsertIntoMultimap(code.ValueOrDie(), value++);
       }
     } else {
       for (FX_SAFE_UINT32 code = lowcode;
@@ -198,7 +200,7 @@
         uint32_t code_value = code.ValueOrDie();
         WideString retcode =
             code_value == lowcode ? destcode : StringDataAdd(destcode);
-        m_Multimap.emplace(code_value, GetMultiCharIndexIndicator());
+        InsertIntoMultimap(code_value, GetMultiCharIndexIndicator());
         m_MultiCharVec.push_back(retcode);
         destcode = std::move(retcode);
       }
@@ -218,9 +220,26 @@
     return;
 
   if (len == 1) {
-    m_Multimap.emplace(srccode, destcode[0]);
+    InsertIntoMultimap(srccode, destcode[0]);
   } else {
-    m_Multimap.emplace(srccode, GetMultiCharIndexIndicator());
+    InsertIntoMultimap(srccode, GetMultiCharIndexIndicator());
     m_MultiCharVec.push_back(destcode);
   }
 }
+
+void CPDF_ToUnicodeMap::InsertIntoMultimap(uint32_t code, uint32_t destcode) {
+  if (!pdfium::Contains(m_Multimap, code)) {
+    m_Multimap.emplace(code, destcode);
+    return;
+  }
+
+  auto ret = m_Multimap.equal_range(code);
+  for (auto iter = ret.first; iter != ret.second; ++iter) {
+    if (iter->second == destcode) {
+      // Do not insert since a duplicate mapping is found.
+      return;
+    }
+  }
+
+  m_Multimap.emplace(code, destcode);
+}
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index 106bb00..61d0ac6 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -26,6 +26,8 @@
   WideString Lookup(uint32_t charcode) const;
   uint32_t ReverseLookup(wchar_t unicode) const;
 
+  size_t GetMultimapSizeForTesting() const { return m_Multimap.size(); }
+
  private:
   friend class cpdf_tounicodemap_StringToCode_Test;
   friend class cpdf_tounicodemap_StringToWideString_Test;
@@ -39,6 +41,10 @@
   uint32_t GetMultiCharIndexIndicator() const;
   void SetCode(uint32_t srccode, WideString destcode);
 
+  // Inserts a new entry which hasn't not been inserted into `m_Multimap`
+  // before.
+  void InsertIntoMultimap(uint32_t code, uint32_t destcode);
+
   std::multimap<uint32_t, uint32_t> m_Multimap;
   UnownedPtr<const CPDF_CID2UnicodeMap> m_pBaseMap;
   std::vector<WideString> m_MultiCharVec;
diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
index 48e7c2f..098983c 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
@@ -79,3 +79,40 @@
     EXPECT_STREQ(L"AB", map.Lookup(0xffffffff).c_str());
   }
 }
+
+TEST(cpdf_tounicodemap, InsertIntoMultimap) {
+  {
+    // Both the CIDs and the unicodes are different.
+    static constexpr uint8_t kInput1[] =
+        "beginbfchar<1><0041><2><0042>endbfchar";
+    auto stream = pdfium::MakeRetain<CPDF_Stream>();
+    stream->SetData(pdfium::make_span(kInput1));
+    CPDF_ToUnicodeMap map(stream.Get());
+    EXPECT_EQ(1u, map.ReverseLookup(0x0041));
+    EXPECT_EQ(2u, map.ReverseLookup(0x0042));
+    EXPECT_EQ(2u, map.GetMultimapSizeForTesting());
+  }
+  {
+    // The same CID with different unicodes.
+    static constexpr uint8_t kInput2[] =
+        "beginbfrange<0><0><0041><0><0><0042>endbfrange";
+    auto stream = pdfium::MakeRetain<CPDF_Stream>();
+    stream->SetData(pdfium::make_span(kInput2));
+    CPDF_ToUnicodeMap map(stream.Get());
+    EXPECT_EQ(0u, map.ReverseLookup(0x0041));
+    EXPECT_EQ(0u, map.ReverseLookup(0x0042));
+    EXPECT_EQ(2u, map.GetMultimapSizeForTesting());
+  }
+  {
+    // Duplicate mappings of CID 0 to unicode "A". There should be only 1 entry
+    // in `m_Multimap`.
+    static constexpr uint8_t kInput3[] =
+        "beginbfrange<0><0>[<0041>]endbfrange\n"
+        "beginbfchar<0><0041>endbfchar";
+    auto stream = pdfium::MakeRetain<CPDF_Stream>();
+    stream->SetData(pdfium::make_span(kInput3));
+    CPDF_ToUnicodeMap map(stream.Get());
+    EXPECT_EQ(0u, map.ReverseLookup(0x0041));
+    EXPECT_EQ(1u, map.GetMultimapSizeForTesting());
+  }
+}