Fix CPDF_CMap::m_pAddMapping lack of type information.

Using a ByteBuffer and an array of uint8_t's isn't how one
would represent an array of structured data. Packing uint16_t's
into a uint32_t via / and % isn't ideal, either.

Bug:
Change-Id: Ib09ae2659ba2f027724546bb7aef99bdfd2dea25
Reviewed-on: https://pdfium-review.googlesource.com/4951
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/font/font_int.h b/core/fpdfapi/font/font_int.h
index 926ea91..8bf7ef5 100644
--- a/core/fpdfapi/font/font_int.h
+++ b/core/fpdfapi/font/font_int.h
@@ -9,6 +9,7 @@
 
 #include <map>
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "core/fpdfapi/font/cpdf_cidfont.h"
@@ -16,9 +17,10 @@
 #include "core/fxcrt/fx_basic.h"
 
 class CPDF_CID2UnicodeMap;
-class CPDF_CMap;
+class CPDF_CMapManager;
 class CPDF_Font;
 class CPDF_Stream;
+struct FXCMAP_CMap;
 
 using FXFT_Library = void*;
 
@@ -26,25 +28,6 @@
 bool FT_UseTTCharmap(FXFT_Face face, int platform_id, int encoding_id);
 CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering);
 
-class CPDF_CMapManager {
- public:
-  CPDF_CMapManager();
-  ~CPDF_CMapManager();
-
-  CFX_RetainPtr<CPDF_CMap> GetPredefinedCMap(const CFX_ByteString& name,
-                                             bool bPromptCJK);
-  CPDF_CID2UnicodeMap* GetCID2UnicodeMap(CIDSet charset, bool bPromptCJK);
-
- private:
-  CFX_RetainPtr<CPDF_CMap> LoadPredefinedCMap(const CFX_ByteString& name,
-                                              bool bPromptCJK);
-  std::unique_ptr<CPDF_CID2UnicodeMap> LoadCID2UnicodeMap(CIDSet charset,
-                                                          bool bPromptCJK);
-
-  std::map<CFX_ByteString, CFX_RetainPtr<CPDF_CMap>> m_CMaps;
-  std::unique_ptr<CPDF_CID2UnicodeMap> m_CID2UnicodeMaps[6];
-};
-
 class CFX_StockFontArray {
  public:
   CFX_StockFontArray();
@@ -58,64 +41,6 @@
   std::unique_ptr<CPDF_Font> m_StockFonts[14];
 };
 
-class CPDF_FontGlobals {
- public:
-  CPDF_FontGlobals();
-  ~CPDF_FontGlobals();
-
-  void Clear(CPDF_Document* pDoc);
-  CPDF_Font* Find(CPDF_Document* pDoc, uint32_t index);
-
-  // Takes ownership of |pFont|, returns unowned pointer to it.
-  CPDF_Font* Set(CPDF_Document* key,
-                 uint32_t index,
-                 std::unique_ptr<CPDF_Font> pFont);
-
-  CPDF_CMapManager m_CMapManager;
-  struct {
-    const struct FXCMAP_CMap* m_pMapList;
-    uint32_t m_Count;
-  } m_EmbeddedCharsets[CIDSET_NUM_SETS];
-  struct {
-    const uint16_t* m_pMap;
-    uint32_t m_Count;
-  } m_EmbeddedToUnicodes[CIDSET_NUM_SETS];
-
- private:
-  std::map<CPDF_Document*, std::unique_ptr<CFX_StockFontArray>> m_StockMap;
-};
-
-struct CMap_CodeRange {
-  int m_CharSize;
-  uint8_t m_Lower[4];
-  uint8_t m_Upper[4];
-};
-
-class CPDF_CMapParser {
- public:
-  CPDF_CMapParser();
-  ~CPDF_CMapParser();
-  void Initialize(CPDF_CMap* pMap);
-  void ParseWord(const CFX_ByteStringC& str);
-  CFX_BinaryBuf m_AddMaps;
-
- private:
-  friend class fpdf_font_cid_CMap_GetCode_Test;
-  friend class fpdf_font_cid_CMap_GetCodeRange_Test;
-
-  static uint32_t CMap_GetCode(const CFX_ByteStringC& word);
-  static bool CMap_GetCodeRange(CMap_CodeRange& range,
-                                const CFX_ByteStringC& first,
-                                const CFX_ByteStringC& second);
-
-  CPDF_CMap* m_pCMap;
-  int m_Status;
-  int m_CodeSeq;
-  uint32_t m_CodePoints[4];
-  std::vector<CMap_CodeRange> m_CodeRanges;
-  CFX_ByteString m_LastWord;
-};
-
 enum CIDCoding : uint8_t {
   CIDCODING_UNKNOWN = 0,
   CIDCODING_GB,
@@ -136,6 +61,18 @@
     MixedFourBytes
   };
 
+  struct CodeRange {
+    int m_CharSize;
+    uint8_t m_Lower[4];
+    uint8_t m_Upper[4];
+  };
+
+  struct CIDRange {
+    uint32_t m_StartCode;
+    uint32_t m_EndCode;
+    uint16_t m_StartCID;
+  };
+
   template <typename T, typename... Args>
   friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
 
@@ -167,11 +104,59 @@
   int m_nCodeRanges;
   uint8_t* m_pLeadingBytes;
   uint16_t* m_pMapping;
-  uint8_t* m_pAddMapping;
   bool m_bLoaded;
+  std::vector<CIDRange> m_AddMapping;
   const FXCMAP_CMap* m_pEmbedMap;
 };
 
+class CPDF_CMapManager {
+ public:
+  CPDF_CMapManager();
+  ~CPDF_CMapManager();
+
+  CFX_RetainPtr<CPDF_CMap> GetPredefinedCMap(const CFX_ByteString& name,
+                                             bool bPromptCJK);
+  CPDF_CID2UnicodeMap* GetCID2UnicodeMap(CIDSet charset, bool bPromptCJK);
+
+ private:
+  CFX_RetainPtr<CPDF_CMap> LoadPredefinedCMap(const CFX_ByteString& name,
+                                              bool bPromptCJK);
+  std::unique_ptr<CPDF_CID2UnicodeMap> LoadCID2UnicodeMap(CIDSet charset,
+                                                          bool bPromptCJK);
+
+  std::map<CFX_ByteString, CFX_RetainPtr<CPDF_CMap>> m_CMaps;
+  std::unique_ptr<CPDF_CID2UnicodeMap> m_CID2UnicodeMaps[6];
+};
+
+class CPDF_CMapParser {
+ public:
+  explicit CPDF_CMapParser(CPDF_CMap* pMap);
+  ~CPDF_CMapParser();
+
+  void ParseWord(const CFX_ByteStringC& str);
+  bool HasAddMaps() const { return !m_AddMaps.empty(); }
+  std::vector<CPDF_CMap::CIDRange> TakeAddMaps() {
+    return std::move(m_AddMaps);
+  }
+
+ private:
+  friend class fpdf_font_cid_CMap_GetCode_Test;
+  friend class fpdf_font_cid_CMap_GetCodeRange_Test;
+
+  static uint32_t CMap_GetCode(const CFX_ByteStringC& word);
+  static bool CMap_GetCodeRange(CPDF_CMap::CodeRange& range,
+                                const CFX_ByteStringC& first,
+                                const CFX_ByteStringC& second);
+
+  CPDF_CMap* const m_pCMap;
+  int m_Status;
+  int m_CodeSeq;
+  uint32_t m_CodePoints[4];
+  std::vector<CPDF_CMap::CodeRange> m_CodeRanges;
+  std::vector<CPDF_CMap::CIDRange> m_AddMaps;
+  CFX_ByteString m_LastWord;
+};
+
 class CPDF_CID2UnicodeMap {
  public:
   CPDF_CID2UnicodeMap();
@@ -211,4 +196,31 @@
   CFX_WideTextBuf m_MultiCharBuf;
 };
 
+class CPDF_FontGlobals {
+ public:
+  CPDF_FontGlobals();
+  ~CPDF_FontGlobals();
+
+  void Clear(CPDF_Document* pDoc);
+  CPDF_Font* Find(CPDF_Document* pDoc, uint32_t index);
+
+  // Takes ownership of |pFont|, returns unowned pointer to it.
+  CPDF_Font* Set(CPDF_Document* key,
+                 uint32_t index,
+                 std::unique_ptr<CPDF_Font> pFont);
+
+  CPDF_CMapManager m_CMapManager;
+  struct {
+    const FXCMAP_CMap* m_pMapList;
+    uint32_t m_Count;
+  } m_EmbeddedCharsets[CIDSET_NUM_SETS];
+  struct {
+    const uint16_t* m_pMap;
+    uint32_t m_Count;
+  } m_EmbeddedToUnicodes[CIDSET_NUM_SETS];
+
+ private:
+  std::map<CPDF_Document*, std::unique_ptr<CFX_StockFontArray>> m_StockMap;
+};
+
 #endif  // CORE_FPDFAPI_FONT_FONT_INT_H_
diff --git a/core/fpdfapi/font/fpdf_font_cid.cpp b/core/fpdfapi/font/fpdf_font_cid.cpp
index 001e8ac..3f8e919 100644
--- a/core/fpdfapi/font/fpdf_font_cid.cpp
+++ b/core/fpdfapi/font/fpdf_font_cid.cpp
@@ -200,24 +200,9 @@
   return CFX_ByteStringC(&word[1], word.GetLength() - 2);
 }
 
-int CompareDWORD(const void* data1, const void* data2) {
-  return (*(uint32_t*)data1) - (*(uint32_t*)data2);
-}
-
-int CompareCID(const void* key, const void* element) {
-  if ((*(uint32_t*)key) < (*(uint32_t*)element)) {
-    return -1;
-  }
-  if ((*(uint32_t*)key) >
-      (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) {
-    return 1;
-  }
-  return 0;
-}
-
 int CheckCodeRange(uint8_t* codes,
                    int size,
-                   CMap_CodeRange* pRanges,
+                   CPDF_CMap::CodeRange* pRanges,
                    int nRanges) {
   int iSeg = nRanges - 1;
   while (iSeg >= 0) {
@@ -244,7 +229,7 @@
 }
 
 int GetCharSizeImpl(uint32_t charcode,
-                    CMap_CodeRange* pRanges,
+                    CPDF_CMap::CodeRange* pRanges,
                     int iRangesSize) {
   if (!iRangesSize)
     return 1;
@@ -328,18 +313,11 @@
   return pMap;
 }
 
-CPDF_CMapParser::CPDF_CMapParser()
-    : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {}
+CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap)
+    : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {}
 
 CPDF_CMapParser::~CPDF_CMapParser() {}
 
-void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) {
-  m_pCMap = pCMap;
-  m_Status = 0;
-  m_CodeSeq = 0;
-  m_AddMaps.EstimateSize(0, 10240);
-}
-
 void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
   if (word.IsEmpty()) {
     return;
@@ -388,10 +366,7 @@
         m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode);
       }
     } else {
-      uint32_t buf[2];
-      buf[0] = StartCode;
-      buf[1] = ((EndCode - StartCode) << 16) + StartCID;
-      m_AddMaps.AppendBlock(buf, sizeof buf);
+      m_AddMaps.push_back({StartCode, EndCode, StartCID});
     }
     m_CodeSeq = 0;
   } else if (m_Status == 3) {
@@ -412,9 +387,9 @@
         m_pCMap->m_nCodeRanges = nSegs;
         FX_Free(m_pCMap->m_pLeadingBytes);
         m_pCMap->m_pLeadingBytes =
-            FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange));
+            FX_Alloc2D(uint8_t, nSegs, sizeof(CPDF_CMap::CodeRange));
         memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(),
-               nSegs * sizeof(CMap_CodeRange));
+               nSegs * sizeof(CPDF_CMap::CodeRange));
       } else if (nSegs == 1) {
         m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
                                       ? CPDF_CMap::TwoBytes
@@ -426,7 +401,7 @@
         return;
       }
       if (m_CodeSeq % 2) {
-        CMap_CodeRange range;
+        CPDF_CMap::CodeRange range;
         if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word))
           m_CodeRanges.push_back(range);
       }
@@ -458,7 +433,7 @@
 }
 
 // Static.
-bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range,
+bool CPDF_CMapParser::CMap_GetCodeRange(CPDF_CMap::CodeRange& range,
                                         const CFX_ByteStringC& first,
                                         const CFX_ByteStringC& second) {
   if (first.GetLength() == 0 || first.GetAt(0) != '<')
@@ -503,13 +478,12 @@
   m_bLoaded = false;
   m_pMapping = nullptr;
   m_pLeadingBytes = nullptr;
-  m_pAddMapping = nullptr;
   m_pEmbedMap = nullptr;
   m_nCodeRanges = 0;
 }
+
 CPDF_CMap::~CPDF_CMap() {
   FX_Free(m_pMapping);
-  FX_Free(m_pAddMapping);
   FX_Free(m_pLeadingBytes);
 }
 
@@ -567,8 +541,7 @@
 
 void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) {
   m_pMapping = FX_Alloc(uint16_t, 65536);
-  CPDF_CMapParser parser;
-  parser.Initialize(this);
+  CPDF_CMapParser parser(this);
   CPDF_SimpleParser syntax(pData, size);
   while (1) {
     CFX_ByteStringC word = syntax.GetWord();
@@ -577,37 +550,37 @@
     }
     parser.ParseWord(word);
   }
-  if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) {
-    m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4);
-    *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8;
-    memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(),
-           parser.m_AddMaps.GetSize());
-    qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8, CompareDWORD);
+  if (m_CodingScheme == MixedFourBytes && parser.HasAddMaps()) {
+    m_AddMapping = parser.TakeAddMaps();
+    std::sort(
+        m_AddMapping.begin(), m_AddMapping.end(),
+        [](const CPDF_CMap::CIDRange& arg1, const CPDF_CMap::CIDRange& arg2) {
+          return arg1.m_EndCode < arg2.m_EndCode;
+        });
   }
 }
 
 uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
-  if (m_Coding == CIDCODING_CID) {
-    return (uint16_t)charcode;
-  }
-  if (m_pEmbedMap) {
+  if (m_Coding == CIDCODING_CID)
+    return static_cast<uint16_t>(charcode);
+
+  if (m_pEmbedMap)
     return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode);
-  }
-  if (!m_pMapping) {
-    return (uint16_t)charcode;
-  }
-  if (charcode >> 16) {
-    if (m_pAddMapping) {
-      void* found = bsearch(&charcode, m_pAddMapping + 4,
-                            *(uint32_t*)m_pAddMapping, 8, CompareCID);
-      if (!found)
-        return 0;
-      return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode -
-                        *(uint32_t*)found);
-    }
+
+  if (!m_pMapping)
+    return static_cast<uint16_t>(charcode);
+
+  if (charcode < 0x10000)
+    return m_pMapping[charcode];
+
+  auto it = std::lower_bound(m_AddMapping.begin(), m_AddMapping.end(), charcode,
+                             [](const CPDF_CMap::CIDRange& arg, uint32_t val) {
+                               return arg.m_EndCode < val;
+                             });
+  if (it == m_AddMapping.end() || it->m_StartCode > charcode)
     return 0;
-  }
-  return m_pMapping[charcode];
+
+  return it->m_StartCID + charcode - it->m_StartCode;
 }
 
 uint32_t CPDF_CMap::GetNextChar(const char* pString,
@@ -632,7 +605,7 @@
       uint8_t codes[4];
       int char_size = 1;
       codes[0] = ((uint8_t*)pString)[offset++];
-      CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
+      auto* pRanges = reinterpret_cast<CPDF_CMap::CodeRange*>(m_pLeadingBytes);
       while (1) {
         int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges);
         if (ret == 0) {
@@ -716,7 +689,8 @@
     case MixedTwoBytes:
     case MixedFourBytes:
       if (charcode < 0x100) {
-        CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
+        auto* pRanges =
+            reinterpret_cast<CPDF_CMap::CodeRange*>(m_pLeadingBytes);
         int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges);
         if (iSize == 0) {
           iSize = 1;
diff --git a/core/fpdfapi/font/fpdf_font_cid_unittest.cpp b/core/fpdfapi/font/fpdf_font_cid_unittest.cpp
index 53f5e47..813a3a9 100644
--- a/core/fpdfapi/font/fpdf_font_cid_unittest.cpp
+++ b/core/fpdfapi/font/fpdf_font_cid_unittest.cpp
@@ -36,7 +36,7 @@
 }
 
 TEST(fpdf_font_cid, CMap_GetCodeRange) {
-  CMap_CodeRange range;
+  CPDF_CMap::CodeRange range;
 
   // Must start with a <
   EXPECT_FALSE(CPDF_CMapParser::CMap_GetCodeRange(range, "", ""));