Tolerate bfchar / bfrange sections with more than 100 entries Some PDFs have bfchar / bfrange sections with more than the 100 entries that the spec allows. Other PDF implementations tolerate this, so relax PDFium's parsing code as well. Add a test case for this scenario. Bug: 41489089, 394891352 Change-Id: Ibe5e18bcf4b199868a63888e6ab52a0cf4b2b8fd Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/128630 Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org> Reviewed-by: Thomas Sepez <tsepez@google.com>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp index 0e686e9..7f82390 100644 --- a/core/fpdfapi/font/cpdf_tounicodemap.cpp +++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -23,6 +23,12 @@ constexpr uint32_t kCidLimit = 0xffff; +// Per spec, bfchar and bfrange sections should have at most 100 entries. Some +// PDFs violate this part of the spec and other PDF parsers tolerate it. So set +// an artificially high limit that should be good enough for PDFs in the wild, +// but not too high to prevent fuzzers from slowing down fuzzing. +constexpr int kOutOfSpecBFLimit = 160000; + WideString StringDataAdd(WideString str) { WideString ret; wchar_t value = 1; @@ -186,7 +192,7 @@ std::vector<CodeWord> code_words; const int raw_count = StringToInt(previous_word); - bool is_valid = raw_count >= 0 && raw_count <= 100; + bool is_valid = raw_count >= 0 && raw_count <= kOutOfSpecBFLimit; const size_t expected_count = is_valid ? static_cast<size_t>(raw_count) : 0; code_words.reserve(expected_count); @@ -243,7 +249,7 @@ std::vector<Range> ranges; const int raw_count = StringToInt(previous_word); - bool is_valid = raw_count >= 0 && raw_count <= 100; + bool is_valid = raw_count >= 0 && raw_count <= kOutOfSpecBFLimit; const size_t expected_count = is_valid ? static_cast<size_t>(raw_count) : 0; ranges.reserve(expected_count);
diff --git a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp index d516696..b16163f 100644 --- a/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp +++ b/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp
@@ -82,6 +82,131 @@ } } +TEST(CPDFToUnicodeMapTest, HandleBeginBFCharTolerateOutOfSpecCount) { + // Tolerate more than 100 entries. + static constexpr uint8_t kTooManyEntriesInput[] = + "112 beginbfchar" + "<0000><0008>" + "<0001><0009>" + "<0002><000A>" + "<0003><000B>" + "<0004><000C>" + "<0005><000D>" + "<0006><000E>" + "<0007><000F>" + "<0008><0000>" + "<0009><0001>" + "<000A><0002>" + "<000B><0003>" + "<000C><0004>" + "<000D><0005>" + "<000E><0006>" + "<000F><0007>" + "<0010><0018>" + "<0011><0019>" + "<0012><001A>" + "<0013><001B>" + "<0014><001C>" + "<0015><001D>" + "<0016><001E>" + "<0017><001F>" + "<0018><0010>" + "<0019><0011>" + "<001A><0012>" + "<001B><0013>" + "<001C><0014>" + "<001D><0015>" + "<001E><0016>" + "<001F><0017>" + "<0020><0028>" + "<0021><0029>" + "<0022><002A>" + "<0023><002B>" + "<0024><002C>" + "<0025><002D>" + "<0026><002E>" + "<0027><002F>" + "<0028><0020>" + "<0029><0021>" + "<002A><0022>" + "<002B><0023>" + "<002C><0024>" + "<002D><0025>" + "<002E><0026>" + "<002F><0027>" + "<0030><0038>" + "<0031><0039>" + "<0032><003A>" + "<0033><003B>" + "<0034><003C>" + "<0035><003D>" + "<0036><003E>" + "<0037><003F>" + "<0038><0030>" + "<0039><0031>" + "<003A><0032>" + "<003B><0033>" + "<003C><0034>" + "<003D><0035>" + "<003E><0036>" + "<003F><0037>" + "<0040><0048>" + "<0041><0049>" + "<0042><004A>" + "<0043><004B>" + "<0044><004C>" + "<0045><004D>" + "<0046><004E>" + "<0047><004F>" + "<0048><0040>" + "<0049><0041>" + "<004A><0042>" + "<004B><0043>" + "<004C><0044>" + "<004D><0045>" + "<004E><0046>" + "<004F><0047>" + "<0050><0058>" + "<0051><0059>" + "<0052><005A>" + "<0053><005B>" + "<0054><005C>" + "<0055><005D>" + "<0056><005E>" + "<0057><005F>" + "<0058><0050>" + "<0059><0051>" + "<005A><0052>" + "<005B><0053>" + "<005C><0054>" + "<005D><0055>" + "<005E><0056>" + "<005F><0057>" + "<0060><0068>" + "<0061><0069>" + "<0062><006A>" + "<0063><006B>" + "<0064><006C>" + "<0065><006D>" + "<0066><006E>" + "<0067><006F>" + "<0068><0060>" + "<0069><0061>" + "<006A><0062>" + "<006B><0063>" + "<006C><0064>" + "<006D><0065>" + "<006E><0066>" + "<006F><0067>" + "endbfchar"; + auto stream = pdfium::MakeRetain<CPDF_Stream>(kTooManyEntriesInput); + CPDF_ToUnicodeMap map(stream); + EXPECT_EQ(9u, map.ReverseLookup(0x0001)); + EXPECT_EQ(111u, map.ReverseLookup(0x0067)); + EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u)); + EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(111u)); +} + TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeRejectsInvalidCidValues) { { static constexpr uint8_t kInput1[] =