Split CPDF_ToUnicodeMap::Load(). Add a couple of methods to handle some portions of Load(). Change-Id: If28c585de4824d5662455e039e447a047182a1e0 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/59390 Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org>

commit: 8133374cb7fee20ce5b765269fa11e4c35332251 [log] [tgz]
author: Lei Zhang <thestig@chromium.org> Sat Aug 17 00:39:50 2019 +0000
committer: Chromium commit bot <commit-bot@chromium.org> Sat Aug 17 00:39:50 2019 +0000
tree: 0c98aec027928f83b6e484f8025ae0ee892d20fe
parent: 880eab9f7ead13e1735ec0d46049d5884e38ebb7 [diff]
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index bdebc19..b4831fd 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp

@@ -130,60 +130,18 @@
     if (word.IsEmpty())
       break;
 
-    if (word == "beginbfchar") {
-      while (1) {
-        word = parser.GetWord();
-        if (word.IsEmpty() || word == "endbfchar")
-          break;
-
-        SetCode(StringToCode(word), StringToWideString(parser.GetWord()));
-      }
-    } else if (word == "beginbfrange") {
-      while (1) {
-        ByteStringView low = parser.GetWord();
-        if (low.IsEmpty() || low == "endbfrange")
-          break;
-
-        ByteStringView high = parser.GetWord();
-        uint32_t lowcode = StringToCode(low);
-        uint32_t highcode =
-            (lowcode & 0xffffff00) | (StringToCode(high) & 0xff);
-        if (highcode == 0xffffffff)
-          break;
-
-        ByteStringView start = parser.GetWord();
-        if (start == "[") {
-          for (uint32_t code = lowcode; code <= highcode; code++)
-            SetCode(code, StringToWideString(parser.GetWord()));
-          parser.GetWord();
-          continue;
-        }
-
-        WideString destcode = StringToWideString(start);
-        if (destcode.GetLength() == 1) {
-          uint32_t value = StringToCode(start);
-          for (uint32_t code = lowcode; code <= highcode; code++)
-            m_Map[code] = value++;
-        } else {
-          for (uint32_t code = lowcode; code <= highcode; code++) {
-            WideString retcode =
-                code == lowcode ? destcode : StringDataAdd(destcode);
-            m_Map[code] = GetUnicode();
-            m_MultiCharBuf.AppendChar(retcode.GetLength());
-            m_MultiCharBuf << retcode;
-            destcode = std::move(retcode);
-          }
-        }
-      }
-    } else if (word == "/Adobe-Korea1-UCS2") {
+    if (word == "beginbfchar")
+      HandleBeginBFChar(&parser);
+    else if (word == "beginbfrange")
+      HandleBeginBFRange(&parser);
+    else if (word == "/Adobe-Korea1-UCS2")
       cid_set = CIDSET_KOREA1;
-    } else if (word == "/Adobe-Japan1-UCS2") {
+    else if (word == "/Adobe-Japan1-UCS2")
       cid_set = CIDSET_JAPAN1;
-    } else if (word == "/Adobe-CNS1-UCS2") {
+    else if (word == "/Adobe-CNS1-UCS2")
       cid_set = CIDSET_CNS1;
-    } else if (word == "/Adobe-GB1-UCS2") {
+    else if (word == "/Adobe-GB1-UCS2")
       cid_set = CIDSET_GB1;
-    }
   }
   if (cid_set) {
     auto* manager = CPDF_FontGlobals::GetInstance()->GetCMapManager();
@@ -191,6 +149,54 @@
   }
 }
 
+void CPDF_ToUnicodeMap::HandleBeginBFChar(CPDF_SimpleParser* pParser) {
+  while (1) {
+    ByteStringView word = pParser->GetWord();
+    if (word.IsEmpty() || word == "endbfchar")
+      return;
+
+    SetCode(StringToCode(word), StringToWideString(pParser->GetWord()));
+  }
+}
+
+void CPDF_ToUnicodeMap::HandleBeginBFRange(CPDF_SimpleParser* pParser) {
+  while (1) {
+    ByteStringView low = pParser->GetWord();
+    if (low.IsEmpty() || low == "endbfrange")
+      return;
+
+    ByteStringView high = pParser->GetWord();
+    uint32_t lowcode = StringToCode(low);
+    uint32_t highcode = (lowcode & 0xffffff00) | (StringToCode(high) & 0xff);
+    if (highcode == 0xffffffff)
+      return;
+
+    ByteStringView start = pParser->GetWord();
+    if (start == "[") {
+      for (uint32_t code = lowcode; code <= highcode; code++)
+        SetCode(code, StringToWideString(pParser->GetWord()));
+      pParser->GetWord();
+      continue;
+    }
+
+    WideString destcode = StringToWideString(start);
+    if (destcode.GetLength() == 1) {
+      uint32_t value = StringToCode(start);
+      for (uint32_t code = lowcode; code <= highcode; code++)
+        m_Map[code] = value++;
+    } else {
+      for (uint32_t code = lowcode; code <= highcode; code++) {
+        WideString retcode =
+            code == lowcode ? destcode : StringDataAdd(destcode);
+        m_Map[code] = GetUnicode();
+        m_MultiCharBuf.AppendChar(retcode.GetLength());
+        m_MultiCharBuf << retcode;
+        destcode = std::move(retcode);
+      }
+    }
+  }
+}
+
 uint32_t CPDF_ToUnicodeMap::GetUnicode() const {
   FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
   uni = uni * 0x10000 + 0xffff;

diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index 71f4381..e3d04ed 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h

@@ -13,6 +13,7 @@
 #include "core/fxcrt/unowned_ptr.h"
 
 class CPDF_CID2UnicodeMap;
+class CPDF_SimpleParser;
 class CPDF_Stream;
 
 class CPDF_ToUnicodeMap {
@@ -31,6 +32,8 @@
   static WideString StringToWideString(ByteStringView str);
 
   void Load(const CPDF_Stream* pStream);
+  void HandleBeginBFChar(CPDF_SimpleParser* pParser);
+  void HandleBeginBFRange(CPDF_SimpleParser* pParser);
   uint32_t GetUnicode() const;
   void SetCode(uint32_t srccode, WideString destcode);
commit	8133374cb7fee20ce5b765269fa11e4c35332251	[log] [tgz]
author	Lei Zhang <thestig@chromium.org>	Sat Aug 17 00:39:50 2019 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	Sat Aug 17 00:39:50 2019 +0000
tree	0c98aec027928f83b6e484f8025ae0ee892d20fe
parent	880eab9f7ead13e1735ec0d46049d5884e38ebb7 [diff]