Split CPDF_CMapParser::ParseWord().

Break off the larger pieces into their own methods. Silightly simplify
code in one of the new methods.

Change-Id: I3cbe362b008e65a38f286a654d67c5506be1534a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/59511
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_cmapparser.cpp b/core/fpdfapi/font/cpdf_cmapparser.cpp
index 56efae7..a4a8c14 100644
--- a/core/fpdfapi/font/cpdf_cmapparser.cpp
+++ b/core/fpdfapi/font/cpdf_cmapparser.cpp
@@ -54,36 +54,10 @@
     m_Status = kProcessingCodeSpaceRange;
     m_CodeSeq = 0;
   } else if (word == "usecmap") {
-  } else if (m_Status == kProcessingCidChar ||
-             m_Status == kProcessingCidRange) {
-    m_CodePoints[m_CodeSeq] = GetCode(word);
-    m_CodeSeq++;
-    uint32_t StartCode, EndCode;
-    uint16_t StartCID;
-    if (m_Status == kProcessingCidChar) {
-      if (m_CodeSeq < 2)
-        return;
-
-      EndCode = StartCode = m_CodePoints[0];
-      StartCID = (uint16_t)m_CodePoints[1];
-    } else {
-      if (m_CodeSeq < 3)
-        return;
-
-      StartCode = m_CodePoints[0];
-      EndCode = m_CodePoints[1];
-      StartCID = (uint16_t)m_CodePoints[2];
-    }
-    if (EndCode < 0x10000) {
-      for (uint32_t code = StartCode; code <= EndCode; code++) {
-        m_pCMap->SetDirectCharcodeToCIDTable(
-            code, static_cast<uint16_t>(StartCID + code - StartCode));
-      }
-    } else {
-      m_AdditionalCharcodeToCIDMappings.push_back(
-          {StartCode, EndCode, StartCID});
-    }
-    m_CodeSeq = 0;
+  } else if (m_Status == kProcessingCidChar) {
+    HandleCid(word);
+  } else if (m_Status == kProcessingCidRange) {
+    HandleCid(word);
   } else if (m_Status == kProcessingRegistry) {
     m_Status = kStart;
   } else if (m_Status == kProcessingOrdering) {
@@ -95,38 +69,73 @@
     m_pCMap->SetVertical(GetCode(word) != 0);
     m_Status = kStart;
   } else if (m_Status == kProcessingCodeSpaceRange) {
-    if (word == "endcodespacerange") {
-      const auto& code_ranges = m_pCMap->GetMixedFourByteLeadingRanges();
-      size_t nSegs = code_ranges.size() + m_PendingRanges.size();
-      if (nSegs == 1) {
-        const auto& first_range =
-            !code_ranges.empty() ? code_ranges[0] : m_PendingRanges[0];
-        m_pCMap->SetCodingScheme((first_range.m_CharSize == 2)
-                                     ? CPDF_CMap::TwoBytes
-                                     : CPDF_CMap::OneByte);
-      } else if (nSegs > 1) {
-        m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
-        for (const auto& range : m_PendingRanges)
-          m_pCMap->AppendMixedFourByteLeadingRanges(range);
-        m_PendingRanges.clear();
-      }
-      m_Status = kStart;
-    } else {
-      if (word.GetLength() == 0 || word[0] != '<')
-        return;
-
-      if (m_CodeSeq % 2) {
-        Optional<CPDF_CMap::CodeRange> range =
-            GetCodeRange(m_LastWord.AsStringView(), word);
-        if (range.has_value())
-          m_PendingRanges.push_back(range.value());
-      }
-      m_CodeSeq++;
-    }
+    HandleCodeSpaceRange(word);
   }
   m_LastWord = word;
 }
 
+void CPDF_CMapParser::HandleCid(ByteStringView word) {
+  ASSERT(m_Status == kProcessingCidChar || m_Status == kProcessingCidRange);
+  bool bChar = m_Status == kProcessingCidChar;
+
+  m_CodePoints[m_CodeSeq] = GetCode(word);
+  m_CodeSeq++;
+  int nRequiredCodePoints = bChar ? 2 : 3;
+  if (m_CodeSeq < nRequiredCodePoints)
+    return;
+
+  uint32_t StartCode = m_CodePoints[0];
+  uint32_t EndCode;
+  uint16_t StartCID;
+  if (bChar) {
+    EndCode = StartCode;
+    StartCID = static_cast<uint16_t>(m_CodePoints[1]);
+  } else {
+    EndCode = m_CodePoints[1];
+    StartCID = static_cast<uint16_t>(m_CodePoints[2]);
+  }
+  if (EndCode < 0x10000) {
+    for (uint32_t code = StartCode; code <= EndCode; code++) {
+      m_pCMap->SetDirectCharcodeToCIDTable(
+          code, static_cast<uint16_t>(StartCID + code - StartCode));
+    }
+  } else {
+    m_AdditionalCharcodeToCIDMappings.push_back({StartCode, EndCode, StartCID});
+  }
+  m_CodeSeq = 0;
+}
+
+void CPDF_CMapParser::HandleCodeSpaceRange(ByteStringView word) {
+  if (word != "endcodespacerange") {
+    if (word.GetLength() == 0 || word[0] != '<')
+      return;
+
+    if (m_CodeSeq % 2) {
+      Optional<CPDF_CMap::CodeRange> range =
+          GetCodeRange(m_LastWord.AsStringView(), word);
+      if (range.has_value())
+        m_PendingRanges.push_back(range.value());
+    }
+    m_CodeSeq++;
+    return;
+  }
+
+  const auto& code_ranges = m_pCMap->GetMixedFourByteLeadingRanges();
+  size_t nSegs = code_ranges.size() + m_PendingRanges.size();
+  if (nSegs == 1) {
+    const auto& first_range =
+        !code_ranges.empty() ? code_ranges[0] : m_PendingRanges[0];
+    m_pCMap->SetCodingScheme(first_range.m_CharSize == 2 ? CPDF_CMap::TwoBytes
+                                                         : CPDF_CMap::OneByte);
+  } else if (nSegs > 1) {
+    m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
+    for (const auto& range : m_PendingRanges)
+      m_pCMap->AppendMixedFourByteLeadingRanges(range);
+    m_PendingRanges.clear();
+  }
+  m_Status = kStart;
+}
+
 // static
 uint32_t CPDF_CMapParser::GetCode(ByteStringView word) {
   if (word.IsEmpty())
diff --git a/core/fpdfapi/font/cpdf_cmapparser.h b/core/fpdfapi/font/cpdf_cmapparser.h
index e8ea29b..2fe37a4 100644
--- a/core/fpdfapi/font/cpdf_cmapparser.h
+++ b/core/fpdfapi/font/cpdf_cmapparser.h
@@ -45,6 +45,9 @@
     kProcessingCodeSpaceRange,
   };
 
+  void HandleCid(ByteStringView word);
+  void HandleCodeSpaceRange(ByteStringView word);
+
   static uint32_t GetCode(ByteStringView word);
   static Optional<CPDF_CMap::CodeRange> GetCodeRange(ByteStringView first,
                                                      ByteStringView second);