Remove m_LeadingSegCount field from struct PredefinedCMap

Instead of specifying the number of segments, just check for segments
whose values are {0, 0} and use those as the sentinel values to indicate
the end of the segments array.

Change-Id: Icc52e74fafc5163027c48e28780fb3830cc40bc4
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/120470
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_cmap.cpp b/core/fpdfapi/font/cpdf_cmap.cpp
index 0c2ac66..df4d92c 100644
--- a/core/fpdfapi/font/cpdf_cmap.cpp
+++ b/core/fpdfapi/font/cpdf_cmap.cpp
@@ -29,7 +29,6 @@
   CIDSet m_Charset;
   CIDCoding m_Coding;
   CPDF_CMap::CodingScheme m_CodingScheme;
-  uint8_t m_LeadingSegCount;
   ByteRange m_LeadingSegs[2];
 };
 
@@ -38,174 +37,122 @@
      CIDSET_GB1,
      CIDCoding::kGB,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfe}}},
     {"GBpc-EUC",
      CIDSET_GB1,
      CIDCoding::kGB,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfc}}},
     {"GBK-EUC",
      CIDSET_GB1,
      CIDCoding::kGB,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x81, 0xfe}}},
     {"GBKp-EUC",
      CIDSET_GB1,
      CIDCoding::kGB,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x81, 0xfe}}},
     {"GBK2K-EUC",
      CIDSET_GB1,
      CIDCoding::kGB,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x81, 0xfe}}},
     {"GBK2K",
      CIDSET_GB1,
      CIDCoding::kGB,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x81, 0xfe}}},
-    {"UniGB-UCS2", CIDSET_GB1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, 0, {}},
-    {"UniGB-UTF16", CIDSET_GB1, CIDCoding::kUTF16, CPDF_CMap::TwoBytes, 0, {}},
+    {"UniGB-UCS2", CIDSET_GB1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, {}},
+    {"UniGB-UTF16", CIDSET_GB1, CIDCoding::kUTF16, CPDF_CMap::TwoBytes, {}},
     {"B5pc",
      CIDSET_CNS1,
      CIDCoding::kBIG5,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfc}}},
     {"HKscs-B5",
      CIDSET_CNS1,
      CIDCoding::kBIG5,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x88, 0xfe}}},
     {"ETen-B5",
      CIDSET_CNS1,
      CIDCoding::kBIG5,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfe}}},
     {"ETenms-B5",
      CIDSET_CNS1,
      CIDCoding::kBIG5,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfe}}},
-    {"UniCNS-UCS2", CIDSET_CNS1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, 0, {}},
-    {"UniCNS-UTF16",
-     CIDSET_CNS1,
-     CIDCoding::kUTF16,
-     CPDF_CMap::TwoBytes,
-     0,
-     {}},
+    {"UniCNS-UCS2", CIDSET_CNS1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, {}},
+    {"UniCNS-UTF16", CIDSET_CNS1, CIDCoding::kUTF16, CPDF_CMap::TwoBytes, {}},
     {"83pv-RKSJ",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"90ms-RKSJ",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"90msp-RKSJ",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"90pv-RKSJ",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"Add-RKSJ",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"EUC",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x8e, 0x8e}, {0xa1, 0xfe}}},
-    {"H",
-     CIDSET_JAPAN1,
-     CIDCoding::kJIS,
-     CPDF_CMap::TwoBytes,
-     1,
-     {{0x21, 0x7e}}},
-    {"V",
-     CIDSET_JAPAN1,
-     CIDCoding::kJIS,
-     CPDF_CMap::TwoBytes,
-     1,
-     {{0x21, 0x7e}}},
+    {"H", CIDSET_JAPAN1, CIDCoding::kJIS, CPDF_CMap::TwoBytes, {{0x21, 0x7e}}},
+    {"V", CIDSET_JAPAN1, CIDCoding::kJIS, CPDF_CMap::TwoBytes, {{0x21, 0x7e}}},
     {"Ext-RKSJ",
      CIDSET_JAPAN1,
      CIDCoding::kJIS,
      CPDF_CMap::MixedTwoBytes,
-     2,
      {{0x81, 0x9f}, {0xe0, 0xfc}}},
-    {"UniJIS-UCS2",
-     CIDSET_JAPAN1,
-     CIDCoding::kUCS2,
-     CPDF_CMap::TwoBytes,
-     0,
-     {}},
+    {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, {}},
     {"UniJIS-UCS2-HW",
      CIDSET_JAPAN1,
      CIDCoding::kUCS2,
      CPDF_CMap::TwoBytes,
-     0,
      {}},
-    {"UniJIS-UTF16",
-     CIDSET_JAPAN1,
-     CIDCoding::kUTF16,
-     CPDF_CMap::TwoBytes,
-     0,
-     {}},
+    {"UniJIS-UTF16", CIDSET_JAPAN1, CIDCoding::kUTF16, CPDF_CMap::TwoBytes, {}},
     {"KSC-EUC",
      CIDSET_KOREA1,
      CIDCoding::kKOREA,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfe}}},
     {"KSCms-UHC",
      CIDSET_KOREA1,
      CIDCoding::kKOREA,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x81, 0xfe}}},
     {"KSCms-UHC-HW",
      CIDSET_KOREA1,
      CIDCoding::kKOREA,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0x81, 0xfe}}},
     {"KSCpc-EUC",
      CIDSET_KOREA1,
      CIDCoding::kKOREA,
      CPDF_CMap::MixedTwoBytes,
-     1,
      {{0xa1, 0xfd}}},
-    {"UniKS-UCS2", CIDSET_KOREA1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, 0, {}},
-    {"UniKS-UTF16",
-     CIDSET_KOREA1,
-     CIDCoding::kUTF16,
-     CPDF_CMap::TwoBytes,
-     0,
-     {}},
+    {"UniKS-UCS2", CIDSET_KOREA1, CIDCoding::kUCS2, CPDF_CMap::TwoBytes, {}},
+    {"UniKS-UTF16", CIDSET_KOREA1, CIDCoding::kUTF16, CPDF_CMap::TwoBytes, {}},
 };
 
 const PredefinedCMap* GetPredefinedCMap(ByteStringView cmapid) {
@@ -221,7 +168,10 @@
 std::vector<bool> LoadLeadingSegments(const PredefinedCMap& map) {
   std::vector<bool> segments(256);
   const auto seg_span = pdfium::make_span(map.m_LeadingSegs);
-  for (const ByteRange& seg : seg_span.first(map.m_LeadingSegCount)) {
+  for (const ByteRange& seg : seg_span) {
+    if (seg.m_First == 0 && seg.m_Last == 0) {
+      break;
+    }
     for (int b = seg.m_First; b <= seg.m_Last; ++b) {
       segments[b] = true;
     }