Ensure all TrueType Unicode cmap formats are equal A few parts of the PDF specification specifically call out TrueType cmap format (3,1) as the sole TrueType Unicode cmap format. However, from a user perspective the format of the TrueType cmap table is irrelevant, the only thing that matters is that the character map converts Unicode code points into glyph ids. Add a test that ensures TrueType cmap format (0,3) is treated the same as TrueType cmap format (3,1) in the case where a /Encoding /MacRomanEncoding and (an inaccurate) /ToUnicode is present. Bug: 378932943 Change-Id: Ifc82c51e3fa06c0ad1c559468b26c3bf8fc88689 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/133210 Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Ben Wagner <bungeman@google.com> Commit-Queue: Ben Wagner <bungeman@google.com>
diff --git a/core/fpdfapi/font/BUILD.gn b/core/fpdfapi/font/BUILD.gn index eb8dbcf..14c1e9c 100644 --- a/core/fpdfapi/font/BUILD.gn +++ b/core/fpdfapi/font/BUILD.gn
@@ -61,6 +61,7 @@ "cpdf_cmapparser_unittest.cpp", "cpdf_simplefont_unittest.cpp", "cpdf_tounicodemap_unittest.cpp", + "cpdf_truetypefont_unittest.cpp", ] deps = [ ":font",
diff --git a/core/fpdfapi/font/cpdf_truetypefont_unittest.cpp b/core/fpdfapi/font/cpdf_truetypefont_unittest.cpp new file mode 100644 index 0000000..4d4d508 --- /dev/null +++ b/core/fpdfapi/font/cpdf_truetypefont_unittest.cpp
@@ -0,0 +1,194 @@ +// Copyright 2025 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/font/cpdf_truetypefont.h" + +#include <stdint.h> + +#include <array> +#include <utility> + +#include "core/fpdfapi/page/test_with_page_module.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_stream.h" +#include "core/fpdfapi/parser/cpdf_test_document.h" +#include "core/fxcrt/check_op.h" +#include "core/fxcrt/retain_ptr.h" +#include "core/fxcrt/span.h" +#include "core/fxge/fontdata/chromefontdata/chromefontdata.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +using CPDFTrueTypeFontTest = TestWithPageModule; + +// The following ttf data are all identical except for the format of the single +// Unicode cmap which maps U+2E to glyph 1. + +constexpr std::array<uint8_t, 508> kCmap03Ttf = { + {0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x80, 0x00, 0x03, 0x00, 0x00, + 0x63, 0x6d, 0x61, 0x70, 0x00, 0x09, 0x00, 0x63, 0x00, 0x00, 0x01, 0x10, + 0x00, 0x00, 0x00, 0x2c, 0x67, 0x6c, 0x79, 0x66, 0x15, 0x59, 0x2e, 0xc4, + 0x00, 0x00, 0x01, 0x44, 0x00, 0x00, 0x00, 0x30, 0x68, 0x65, 0x61, 0x64, + 0xf4, 0x41, 0xf2, 0xdf, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x36, + 0x68, 0x68, 0x65, 0x61, 0x0d, 0x3d, 0x05, 0x03, 0x00, 0x00, 0x00, 0xc4, + 0x00, 0x00, 0x00, 0x24, 0x68, 0x6d, 0x74, 0x78, 0x08, 0x39, 0x01, 0xba, + 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x08, 0x6c, 0x6f, 0x63, 0x61, + 0x00, 0x18, 0x00, 0x0d, 0x00, 0x00, 0x01, 0x3c, 0x00, 0x00, 0x00, 0x06, + 0x6d, 0x61, 0x78, 0x70, 0x04, 0xd2, 0x04, 0x52, 0x00, 0x00, 0x00, 0xe8, + 0x00, 0x00, 0x00, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x02, 0x5f, 0x01, 0xe1, + 0x00, 0x00, 0x01, 0x74, 0x00, 0x00, 0x00, 0x88, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x05, 0x02, 0x8f, 0x18, 0x96, 0xe8, 0x3b, 0x5f, 0x0f, 0x3c, 0xf5, + 0x08, 0x1b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa2, 0xe3, 0x27, 0x2a, + 0x00, 0x00, 0x00, 0x00, 0xe4, 0x73, 0x7f, 0x28, 0x00, 0xba, 0x00, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x07, 0x3e, 0xfe, 0x4e, + 0x00, 0x43, 0x06, 0x00, 0x00, 0xba, 0x00, 0xb2, 0x05, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, 0x2f, + 0x00, 0x56, 0x00, 0x00, 0x04, 0x68, 0x04, 0x1d, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x01, 0x00, 0x02, 0x39, 0x00, 0xba, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2e, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0xd3, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x18, 0x00, 0x00, + 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x21, 0x11, 0x21, 0x11, 0x01, 0x00, 0x04, 0x00, 0x05, 0x00, + 0xfb, 0x00, 0x00, 0x01, 0x00, 0xba, 0x00, 0x00, 0x01, 0x87, 0x00, 0xcd, + 0x00, 0x03, 0x00, 0x00, 0x33, 0x35, 0x33, 0x15, 0xba, 0xcd, 0xcd, 0xcd, + 0x00, 0x00, 0x00, 0x09, 0x00, 0x72, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x05, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x07, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x08, 0x00, 0x06, 0x00, 0x10, 0x00, 0x41, 0x00, 0x52, 0x00, 0x41, + 0x00, 0x52, 0x00, 0x56, 0x00, 0x41, 0x00, 0x4d, 0x00, 0x54, 0x00, 0x54, + 0x00, 0x4d, 0x00, 0x43}}; +constexpr std::array<uint8_t, 508> kCmap31Ttf = { + {0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x80, 0x00, 0x03, 0x00, 0x00, + 0x63, 0x6d, 0x61, 0x70, 0x00, 0x0c, 0x00, 0x61, 0x00, 0x00, 0x01, 0x10, + 0x00, 0x00, 0x00, 0x2c, 0x67, 0x6c, 0x79, 0x66, 0x15, 0x59, 0x2e, 0xc4, + 0x00, 0x00, 0x01, 0x44, 0x00, 0x00, 0x00, 0x30, 0x68, 0x65, 0x61, 0x64, + 0xf4, 0x41, 0xf4, 0x22, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x36, + 0x68, 0x68, 0x65, 0x61, 0x0d, 0x3d, 0x05, 0x03, 0x00, 0x00, 0x00, 0xc4, + 0x00, 0x00, 0x00, 0x24, 0x68, 0x6d, 0x74, 0x78, 0x08, 0x39, 0x01, 0xba, + 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x08, 0x6c, 0x6f, 0x63, 0x61, + 0x00, 0x18, 0x00, 0x0d, 0x00, 0x00, 0x01, 0x3c, 0x00, 0x00, 0x00, 0x06, + 0x6d, 0x61, 0x78, 0x70, 0x04, 0xd2, 0x04, 0x52, 0x00, 0x00, 0x00, 0xe8, + 0x00, 0x00, 0x00, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x02, 0x5f, 0x01, 0xe1, + 0x00, 0x00, 0x01, 0x74, 0x00, 0x00, 0x00, 0x88, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x05, 0x02, 0x8f, 0x18, 0x90, 0xe5, 0xb9, 0x5f, 0x0f, 0x3c, 0xf5, + 0x08, 0x1b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa2, 0xe3, 0x27, 0x2a, + 0x00, 0x00, 0x00, 0x00, 0xe4, 0x73, 0x80, 0x6b, 0x00, 0xba, 0x00, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x07, 0x3e, 0xfe, 0x4e, + 0x00, 0x43, 0x06, 0x00, 0x00, 0xba, 0x00, 0xb2, 0x05, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, 0x2f, + 0x00, 0x56, 0x00, 0x00, 0x04, 0x68, 0x04, 0x1d, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x01, 0x00, 0x02, 0x39, 0x00, 0xba, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2e, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0xd3, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x18, 0x00, 0x00, + 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x21, 0x11, 0x21, 0x11, 0x01, 0x00, 0x04, 0x00, 0x05, 0x00, + 0xfb, 0x00, 0x00, 0x01, 0x00, 0xba, 0x00, 0x00, 0x01, 0x87, 0x00, 0xcd, + 0x00, 0x03, 0x00, 0x00, 0x33, 0x35, 0x33, 0x15, 0xba, 0xcd, 0xcd, 0xcd, + 0x00, 0x00, 0x00, 0x09, 0x00, 0x72, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x05, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x07, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x08, 0x00, 0x06, 0x00, 0x10, 0x00, 0x41, 0x00, 0x52, 0x00, 0x41, + 0x00, 0x52, 0x00, 0x56, 0x00, 0x41, 0x00, 0x4d, 0x00, 0x54, 0x00, 0x54, + 0x00, 0x4d, 0x00, 0x43}}; + +} // namespace + +TEST_F(CPDFTrueTypeFontTest, AllUnicodeCmapsTreatedEqually) { + auto make_font = + [](CPDF_TestDocument& doc, + pdfium::span<const uint8_t> font_data) -> RetainPtr<CPDF_Font> { + auto font_file_stream = doc.NewIndirect<CPDF_Stream>( + DataVector<uint8_t>(std::begin(font_data), std::end(font_data)), + pdfium::MakeRetain<CPDF_Dictionary>()); + const uint32_t font_file_stream_object_number = + font_file_stream->GetObjNum(); + CHECK_GT(font_file_stream_object_number, 0u); + auto font_file_stream_ref = pdfium::MakeRetain<CPDF_Reference>( + &doc, font_file_stream_object_number); + + auto font_descriptor_dict = pdfium::MakeRetain<CPDF_Dictionary>(); + font_descriptor_dict->SetNewFor<CPDF_Name>("Type", "FontDescriptor"); + font_descriptor_dict->SetFor("FontFile2", std::move(font_file_stream_ref)); + + // The MacRoman 0x2E char code to U+01 mapping is intentionally + // non-standard. + static constexpr char kToUnicode[] = (R"( +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo << + /Registry (Adobe) + /Ordering (UCS) + /Supplement 0 +>> def +/CMapName /Adobe-Identity-UCS def +/CMapType 2 def +1 begincodespacerange +<00><FF> +endcodespacerange +1 beginbfrange +<2E><2E><0001> +endbfrange +endcmap +CMapName currentdict /CMap defineresource pop +end +end)"); + auto to_unicode_stream = doc.NewIndirect<CPDF_Stream>( + DataVector<uint8_t>(std::begin(kToUnicode), std::end(kToUnicode)), + pdfium::MakeRetain<CPDF_Dictionary>()); + const uint32_t to_unicode_object_number = to_unicode_stream->GetObjNum(); + CHECK_GT(to_unicode_object_number, 0u); + auto to_unicode_ref = + pdfium::MakeRetain<CPDF_Reference>(&doc, to_unicode_object_number); + + auto font_dict = pdfium::MakeRetain<CPDF_Dictionary>(); + font_dict->SetNewFor<CPDF_Name>("BaseFont", "CHEESE+Swiss"); + font_dict->SetNewFor<CPDF_Name>("Type", "Font"); + font_dict->SetNewFor<CPDF_Name>("Subtype", "TrueType"); + font_dict->SetNewFor<CPDF_Name>("Encoding", "MacRomanEncoding"); + font_dict->SetFor("ToUnicode", std::move(to_unicode_ref)); + font_dict->SetFor("FontDescriptor", std::move(font_descriptor_dict)); + + return CPDF_Font::Create(&doc, std::move(font_dict), nullptr); + }; + + CPDF_TestDocument doc; + auto cmap31 = make_font(doc, kCmap31Ttf); + auto cmap03 = make_font(doc, kCmap03Ttf); + ASSERT_TRUE(cmap31); + ASSERT_TRUE(cmap03); + + uint32_t period = 0x2E; + bool unused_is_vert; + int cmap31_glyph = cmap31->GlyphFromCharCode(period, &unused_is_vert); + int cmap03_glyph = cmap03->GlyphFromCharCode(period, &unused_is_vert); + + EXPECT_EQ(cmap31_glyph, 1); + EXPECT_EQ(cmap31_glyph, cmap03_glyph); +}