Tolerate bfchar / bfrange sections with more than 100 entries

Some PDFs have bfchar / bfrange sections with more than the 100 entries
that the spec allows. Other PDF implementations tolerate this, so relax
PDFium's parsing code as well. Add a test case for this scenario.

Bug: 41489089, 394891352
Change-Id: Ibe5e18bcf4b199868a63888e6ab52a0cf4b2b8fd
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/128630
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index 0e686e9..7f82390 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -23,6 +23,12 @@
 
 constexpr uint32_t kCidLimit = 0xffff;
 
+// Per spec, bfchar and bfrange sections should have at most 100 entries. Some
+// PDFs violate this part of the spec and other PDF parsers tolerate it. So set
+// an artificially high limit that should be good enough for PDFs in the wild,
+// but not too high to prevent fuzzers from slowing down fuzzing.
+constexpr int kOutOfSpecBFLimit = 160000;
+
 WideString StringDataAdd(WideString str) {
   WideString ret;
   wchar_t value = 1;
@@ -186,7 +192,7 @@
   std::vector<CodeWord> code_words;
 
   const int raw_count = StringToInt(previous_word);
-  bool is_valid = raw_count >= 0 && raw_count <= 100;
+  bool is_valid = raw_count >= 0 && raw_count <= kOutOfSpecBFLimit;
   const size_t expected_count = is_valid ? static_cast<size_t>(raw_count) : 0;
   code_words.reserve(expected_count);
 
@@ -243,7 +249,7 @@
   std::vector<Range> ranges;
 
   const int raw_count = StringToInt(previous_word);
-  bool is_valid = raw_count >= 0 && raw_count <= 100;
+  bool is_valid = raw_count >= 0 && raw_count <= kOutOfSpecBFLimit;
   const size_t expected_count = is_valid ? static_cast<size_t>(raw_count) : 0;
   ranges.reserve(expected_count);
 
diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
index d516696..b16163f 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
@@ -82,6 +82,131 @@
   }
 }
 
+TEST(CPDFToUnicodeMapTest, HandleBeginBFCharTolerateOutOfSpecCount) {
+  // Tolerate more than 100 entries.
+  static constexpr uint8_t kTooManyEntriesInput[] =
+      "112 beginbfchar"
+      "<0000><0008>"
+      "<0001><0009>"
+      "<0002><000A>"
+      "<0003><000B>"
+      "<0004><000C>"
+      "<0005><000D>"
+      "<0006><000E>"
+      "<0007><000F>"
+      "<0008><0000>"
+      "<0009><0001>"
+      "<000A><0002>"
+      "<000B><0003>"
+      "<000C><0004>"
+      "<000D><0005>"
+      "<000E><0006>"
+      "<000F><0007>"
+      "<0010><0018>"
+      "<0011><0019>"
+      "<0012><001A>"
+      "<0013><001B>"
+      "<0014><001C>"
+      "<0015><001D>"
+      "<0016><001E>"
+      "<0017><001F>"
+      "<0018><0010>"
+      "<0019><0011>"
+      "<001A><0012>"
+      "<001B><0013>"
+      "<001C><0014>"
+      "<001D><0015>"
+      "<001E><0016>"
+      "<001F><0017>"
+      "<0020><0028>"
+      "<0021><0029>"
+      "<0022><002A>"
+      "<0023><002B>"
+      "<0024><002C>"
+      "<0025><002D>"
+      "<0026><002E>"
+      "<0027><002F>"
+      "<0028><0020>"
+      "<0029><0021>"
+      "<002A><0022>"
+      "<002B><0023>"
+      "<002C><0024>"
+      "<002D><0025>"
+      "<002E><0026>"
+      "<002F><0027>"
+      "<0030><0038>"
+      "<0031><0039>"
+      "<0032><003A>"
+      "<0033><003B>"
+      "<0034><003C>"
+      "<0035><003D>"
+      "<0036><003E>"
+      "<0037><003F>"
+      "<0038><0030>"
+      "<0039><0031>"
+      "<003A><0032>"
+      "<003B><0033>"
+      "<003C><0034>"
+      "<003D><0035>"
+      "<003E><0036>"
+      "<003F><0037>"
+      "<0040><0048>"
+      "<0041><0049>"
+      "<0042><004A>"
+      "<0043><004B>"
+      "<0044><004C>"
+      "<0045><004D>"
+      "<0046><004E>"
+      "<0047><004F>"
+      "<0048><0040>"
+      "<0049><0041>"
+      "<004A><0042>"
+      "<004B><0043>"
+      "<004C><0044>"
+      "<004D><0045>"
+      "<004E><0046>"
+      "<004F><0047>"
+      "<0050><0058>"
+      "<0051><0059>"
+      "<0052><005A>"
+      "<0053><005B>"
+      "<0054><005C>"
+      "<0055><005D>"
+      "<0056><005E>"
+      "<0057><005F>"
+      "<0058><0050>"
+      "<0059><0051>"
+      "<005A><0052>"
+      "<005B><0053>"
+      "<005C><0054>"
+      "<005D><0055>"
+      "<005E><0056>"
+      "<005F><0057>"
+      "<0060><0068>"
+      "<0061><0069>"
+      "<0062><006A>"
+      "<0063><006B>"
+      "<0064><006C>"
+      "<0065><006D>"
+      "<0066><006E>"
+      "<0067><006F>"
+      "<0068><0060>"
+      "<0069><0061>"
+      "<006A><0062>"
+      "<006B><0063>"
+      "<006C><0064>"
+      "<006D><0065>"
+      "<006E><0066>"
+      "<006F><0067>"
+      "endbfchar";
+  auto stream = pdfium::MakeRetain<CPDF_Stream>(kTooManyEntriesInput);
+  CPDF_ToUnicodeMap map(stream);
+  EXPECT_EQ(9u, map.ReverseLookup(0x0001));
+  EXPECT_EQ(111u, map.ReverseLookup(0x0067));
+  EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
+  EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(111u));
+}
+
 TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeRejectsInvalidCidValues) {
   {
     static constexpr uint8_t kInput1[] =