| // Copyright 2026 The PDFium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "core/fpdfapi/edit/cpdf_fontsubsetter.h" |
| |
| #include <stdint.h> |
| |
| #include <array> |
| #include <numeric> |
| #include <string> |
| #include <vector> |
| |
| #include "core/fpdfapi/font/cpdf_font.h" |
| #include "core/fpdfapi/parser/cpdf_dictionary.h" |
| #include "core/fpdfapi/parser/cpdf_document.h" |
| #include "core/fpdfapi/parser/cpdf_name.h" |
| #include "core/fpdfapi/parser/cpdf_stream.h" |
| #include "core/fxcrt/bytestring.h" |
| #include "core/fxcrt/fx_extension.h" |
| #include "core/fxcrt/retain_ptr.h" |
| #include "core/fxge/fx_font.h" |
| #include "public/fpdf_edit.h" |
| #include "public/fpdfview.h" |
| #include "testing/embedder_test.h" |
| #include "testing/fx_string_testhelpers.h" |
| #include "testing/gmock/include/gmock/gmock.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| #include "testing/utils/file_util.h" |
| #include "testing/utils/path_service.h" |
| |
| using ::testing::IsEmpty; |
| using ::testing::Matcher; |
| using ::testing::UnorderedElementsAre; |
| |
| namespace { |
| |
| constexpr char kArimoBaseFontName[] = "Arimo-Regular"; |
| constexpr char kLohitTamilBaseFontName[] = "Lohit-Tamil"; |
| constexpr char kNotoSansBaseFontName[] = "NotoSansCJKjp-Regular"; |
| |
| constexpr size_t kSubsettedFontPrefixWithPlusLength = |
| kSubsettedFontPrefixLength + 1; |
| |
| // These are cheating slightly to avoid a layering violation, since this file |
| // cannot include fpdfsdk/cpdfsdk_helpers.h. |
| CPDF_Document* CPDFDocumentFromFPDFDocument(FPDF_DOCUMENT document) { |
| return reinterpret_cast<CPDF_Document*>(document); |
| } |
| CPDF_Font* CPDFFontFromFPDFFont(FPDF_FONT font) { |
| return reinterpret_cast<CPDF_Font*>(font); |
| } |
| |
| // Returns the file path for a test font provided by the third-party test_fonts. |
| std::string GetTestFontFilePath(const std::string& file_name) { |
| return PathService::GetThirdPartyFilePath("test_fonts/test_fonts/" + |
| file_name); |
| } |
| |
| // Returns a list of new object nums used for testing. Since the object nums |
| // list is used to see if an object num is new, this can exceed the amount of |
| // objects in the document. This uses a large enough number sufficient for these |
| // tests. As a result, tests can avoid having to explicitly write the new object |
| // numbers added when writing text to the document. |
| std::vector<uint32_t> GetTestNewObjNums() { |
| std::vector<uint32_t> test_obj_nums(100); |
| std::iota(test_obj_nums.begin(), test_obj_nums.end(), 1); |
| return test_obj_nums; |
| } |
| |
| // Returns if `actual_name` follows the "XXXXXX+BaseName" pattern, where 'X' is |
| // an uppercase letter. |
| bool IsSubsetFontName(const ByteString& actual_name, |
| ByteStringView expected_base_name) { |
| if (actual_name.GetLength() != |
| kSubsettedFontPrefixWithPlusLength + expected_base_name.GetLength()) { |
| return false; |
| } |
| |
| // Check the first '+' is in the correct position.. |
| std::optional<size_t> first_plus = actual_name.Find('+'); |
| if (!first_plus.has_value() || |
| first_plus.value() != kSubsettedFontPrefixLength) { |
| return false; |
| } |
| |
| // Check there is not an additional '+'. |
| std::optional<size_t> last_plus = actual_name.ReverseFind('+'); |
| if (!last_plus.has_value() || first_plus.value() != last_plus.value()) { |
| return false; |
| } |
| |
| // Check for the tag. |
| for (char ch : actual_name.First(kSubsettedFontPrefixLength)) { |
| if (!FXSYS_IsUpperASCII(ch)) { |
| return false; |
| } |
| } |
| |
| return actual_name.Substr(kSubsettedFontPrefixWithPlusLength) == |
| expected_base_name; |
| } |
| |
| // Matcher that verifies the stream size is strictly within the range min |
| // inclusive, max exclusive. |
| MATCHER_P2(StreamSizeIsWithinRange, min_size, max_size, "") { |
| const auto& obj = arg.second; |
| if (!obj || !obj->IsStream()) { |
| return false; |
| } |
| |
| size_t actual_size = obj->AsStream()->GetRawSize(); |
| if (actual_size < min_size || actual_size >= max_size) { |
| return false; |
| } |
| return true; |
| } |
| |
| // Matches the Root Font, checking for a valid subset font name. |
| MATCHER_P(IsRootFont, expected_base_name, "") { |
| const auto& obj = arg.second; |
| if (!obj || !obj->IsDictionary()) { |
| return false; |
| } |
| |
| const CPDF_Dictionary* dict = obj->AsDictionary(); |
| if (dict->GetNameFor("Type") != "Font" || |
| dict->GetNameFor("Subtype") != "Type0" || |
| dict->GetNameFor("Encoding") != "Identity-H" || |
| !dict->KeyExist("DescendantFonts")) { |
| return false; |
| } |
| |
| return IsSubsetFontName(dict->GetNameFor("BaseFont"), expected_base_name); |
| } |
| |
| // Matches the CID Font, checking for a valid subset font name. |
| MATCHER_P(IsCIDFont, expected_base_name, "") { |
| const auto& obj = arg.second; |
| if (!obj || !obj->IsDictionary()) { |
| return false; |
| } |
| |
| const CPDF_Dictionary* dict = obj->AsDictionary(); |
| if (dict->GetNameFor("Type") != "Font" || |
| dict->GetNameFor("Subtype") != "CIDFontType2" || |
| !dict->KeyExist("CIDSystemInfo")) { |
| return false; |
| } |
| |
| return IsSubsetFontName(dict->GetNameFor("BaseFont"), expected_base_name); |
| } |
| |
| // Matches the FontDescriptor, checking for a valid subset font name. |
| MATCHER_P(IsFontDescriptor, expected_base_name, "") { |
| const auto& obj = arg.second; |
| if (!obj || !obj->IsDictionary()) { |
| return false; |
| } |
| |
| const CPDF_Dictionary* dict = obj->AsDictionary(); |
| if (dict->GetNameFor("Type") != "FontDescriptor" || |
| !dict->KeyExist("FontFile2")) { |
| return false; |
| } |
| |
| return IsSubsetFontName(dict->GetNameFor("FontName"), expected_base_name); |
| } |
| |
| } // namespace |
| |
| // Prints overrides nicely for debugging purposes. |
| void PrintTo(const RetainPtr<const CPDF_Object>& obj, std::ostream* os) { |
| if (!obj) { |
| *os << "nullptr"; |
| return; |
| } |
| |
| *os << "(Obj type=" << obj->GetType(); |
| if (obj->IsDictionary()) { |
| const CPDF_Dictionary* dict = obj->AsDictionary(); |
| *os << " {"; |
| |
| static constexpr std::array<const char*, 4> kKeys = { |
| "Type", "Subtype", "BaseFont", "FontName"}; |
| bool first = true; |
| for (const char* key : kKeys) { |
| if (dict->KeyExist(key)) { |
| if (!first) { |
| *os << ", "; |
| } |
| *os << key << "=" << dict->GetObjectFor(key)->GetString(); |
| first = false; |
| } |
| } |
| *os << "}"; |
| } else if (obj->IsStream()) { |
| *os << " size=" << obj->AsStream()->GetRawSize(); |
| } |
| |
| *os << ")"; |
| } |
| |
| class CPDFFontSubsetterTest : public EmbedderTest { |
| public: |
| void InsertNewTextObject(const std::wstring& text, |
| FPDF_PAGE page, |
| FPDF_FONT font) { |
| FPDF_PAGEOBJECT text_object = |
| FPDFPageObj_CreateTextObj(document(), font, 20.0f); |
| EXPECT_TRUE(text_object); |
| |
| ScopedFPDFWideString fpdf_text = GetFPDFWideString(text); |
| EXPECT_TRUE(FPDFText_SetText(text_object, fpdf_text.get())); |
| |
| const FS_MATRIX matrix{1.0f, 0.0f, 0.0f, 1.0f, 50.0f, 200.0f}; |
| ASSERT_TRUE(FPDFPageObj_TransformF(text_object, &matrix)); |
| FPDFPage_InsertObject(page, text_object); |
| EXPECT_TRUE(FPDFPage_GenerateContent(page)); |
| } |
| }; |
| |
| TEST_F(CPDFFontSubsetterTest, NoNewText) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| |
| EXPECT_THAT(subsetter.GenerateObjectOverrides({}), IsEmpty()); |
| |
| EXPECT_THAT(subsetter.GenerateObjectOverrides(GetTestNewObjNums()), |
| IsEmpty()); |
| |
| // Not a text object. |
| FPDF_PAGEOBJECT rect = FPDFPageObj_CreateNewRect(20, 100, 50, 50); |
| FPDFPage_InsertObject(page.get(), rect); |
| EXPECT_THAT(subsetter.GenerateObjectOverrides(GetTestNewObjNums()), |
| IsEmpty()); |
| } |
| |
| TEST_F(CPDFFontSubsetterTest, StandardFont) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| ScopedFPDFFont font(FPDFText_LoadStandardFont(document(), "Helvetica")); |
| ASSERT_TRUE(font); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"Hello world", page.get(), font.get())); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| EXPECT_THAT(subsetter.GenerateObjectOverrides(GetTestNewObjNums()), |
| IsEmpty()); |
| } |
| |
| TEST_F(CPDFFontSubsetterTest, OpenType) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| const std::string font_path = |
| GetTestFontFilePath("NotoSansCJKjp-Regular.otf"); |
| ASSERT_FALSE(font_path.empty()); |
| |
| std::vector<uint8_t> font_data = GetFileContents(font_path.c_str()); |
| const size_t original_size = font_data.size(); |
| ASSERT_EQ(16427228u, original_size); |
| |
| ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(), |
| font_data.size(), FPDF_FONT_TRUETYPE, |
| /*cid=*/true)); |
| ASSERT_TRUE(font); |
| |
| ASSERT_NO_FATAL_FAILURE(InsertNewTextObject(L"这", page.get(), font.get())); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| auto overrides = subsetter.GenerateObjectOverrides(GetTestNewObjNums()); |
| ASSERT_EQ(4u, overrides.size()); |
| |
| // Subset size is ~2.5% of the original font file, i.e. ~450 KB. |
| EXPECT_THAT( |
| overrides, |
| UnorderedElementsAre( |
| StreamSizeIsWithinRange(original_size * 0.02, original_size * 0.03), |
| IsRootFont(kNotoSansBaseFontName), IsCIDFont(kNotoSansBaseFontName), |
| IsFontDescriptor(kNotoSansBaseFontName))); |
| } |
| |
| TEST_F(CPDFFontSubsetterTest, TrueType) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| const std::string font_path = GetTestFontFilePath("Arimo-Regular.ttf"); |
| ASSERT_FALSE(font_path.empty()); |
| |
| std::vector<uint8_t> font_data = GetFileContents(font_path.c_str()); |
| const size_t original_size = font_data.size(); |
| ASSERT_EQ(436180u, original_size); |
| |
| ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(), |
| font_data.size(), FPDF_FONT_TRUETYPE, |
| /*cid=*/true)); |
| ASSERT_TRUE(font); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"Hello world", page.get(), font.get())); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| auto overrides = subsetter.GenerateObjectOverrides(GetTestNewObjNums()); |
| ASSERT_EQ(4u, overrides.size()); |
| |
| // Subset size is ~3% of the original font file, i.e. ~13 KB. |
| EXPECT_THAT( |
| overrides, |
| UnorderedElementsAre( |
| StreamSizeIsWithinRange(original_size * 0.025, original_size * 0.035), |
| IsRootFont(kArimoBaseFontName), IsCIDFont(kArimoBaseFontName), |
| IsFontDescriptor(kArimoBaseFontName))); |
| } |
| |
| TEST_F(CPDFFontSubsetterTest, SingleFontMultipleTexts) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| const std::string font_path = GetTestFontFilePath("Arimo-Regular.ttf"); |
| ASSERT_FALSE(font_path.empty()); |
| |
| std::vector<uint8_t> font_data = GetFileContents(font_path.c_str()); |
| const size_t original_size = font_data.size(); |
| ASSERT_EQ(436180u, original_size); |
| |
| ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(), |
| font_data.size(), FPDF_FONT_TRUETYPE, |
| /*cid=*/true)); |
| ASSERT_TRUE(font); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"Abcdefg", page.get(), font.get())); |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"Hijklmnop", page.get(), font.get())); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| auto overrides = subsetter.GenerateObjectOverrides(GetTestNewObjNums()); |
| ASSERT_EQ(4u, overrides.size()); |
| |
| // Subset size is ~3.5% of the original font file, i.e. ~15 KB. |
| EXPECT_THAT( |
| overrides, |
| UnorderedElementsAre( |
| StreamSizeIsWithinRange(original_size * 0.03, original_size * 0.04), |
| IsRootFont(kArimoBaseFontName), IsCIDFont(kArimoBaseFontName), |
| IsFontDescriptor(kArimoBaseFontName))); |
| } |
| |
| TEST_F(CPDFFontSubsetterTest, MultipleFontsMultipleTexts) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| const std::string font_path1 = GetTestFontFilePath("Lohit-Tamil.ttf"); |
| ASSERT_FALSE(font_path1.empty()); |
| const std::string font_path2 = GetTestFontFilePath("Arimo-Regular.ttf"); |
| ASSERT_FALSE(font_path2.empty()); |
| |
| std::vector<uint8_t> font_data1 = GetFileContents(font_path1.c_str()); |
| const size_t original_size1 = font_data1.size(); |
| ASSERT_EQ(48908u, original_size1); |
| std::vector<uint8_t> font_data2 = GetFileContents(font_path2.c_str()); |
| const size_t original_size2 = font_data2.size(); |
| ASSERT_EQ(436180u, original_size2); |
| |
| ScopedFPDFFont font1(FPDFText_LoadFont(document(), font_data1.data(), |
| font_data1.size(), FPDF_FONT_TRUETYPE, |
| /*cid=*/true)); |
| ASSERT_TRUE(font1); |
| ScopedFPDFFont font2(FPDFText_LoadFont(document(), font_data2.data(), |
| font_data2.size(), FPDF_FONT_TRUETYPE, |
| /*cid=*/true)); |
| ASSERT_TRUE(font2); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"வணக்கம்", page.get(), font1.get())); |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"Goodbye", page.get(), font2.get())); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| auto overrides = subsetter.GenerateObjectOverrides(GetTestNewObjNums()); |
| ASSERT_EQ(8u, overrides.size()); |
| |
| // Subset size for `font_data1` is ~6% of the original file, i.e. ~3 KB. |
| // Subset size for `font_data2` is ~3% of the original file, i.e. ~13.3 KB. |
| EXPECT_THAT(overrides, |
| UnorderedElementsAre( |
| StreamSizeIsWithinRange(original_size1 * 0.055, |
| original_size1 * 0.065), |
| IsRootFont(kLohitTamilBaseFontName), |
| IsCIDFont(kLohitTamilBaseFontName), |
| IsFontDescriptor(kLohitTamilBaseFontName), |
| StreamSizeIsWithinRange(original_size2 * 0.025, |
| original_size2 * 0.035), |
| IsRootFont(kArimoBaseFontName), IsCIDFont(kArimoBaseFontName), |
| IsFontDescriptor(kArimoBaseFontName))); |
| } |
| |
| TEST_F(CPDFFontSubsetterTest, ReplaceExistingPrefix) { |
| CreateEmptyDocument(); |
| ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); |
| |
| const std::string font_path = GetTestFontFilePath("Arimo-Regular.ttf"); |
| ASSERT_FALSE(font_path.empty()); |
| |
| // The file size for `font_path` is ~436 KB. |
| std::vector<uint8_t> font_data = GetFileContents(font_path.c_str()); |
| size_t original_size = font_data.size(); |
| ASSERT_GT(original_size, 0u); |
| |
| ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(), |
| font_data.size(), FPDF_FONT_TRUETYPE, |
| /*cid=*/true)); |
| |
| // Manually insert an existing prefix to the font name. |
| CPDF_Font* cfont = CPDFFontFromFPDFFont(font.get()); |
| RetainPtr<CPDF_Dictionary> font_dict = cfont->GetMutableFontDict(); |
| ASSERT_TRUE(font_dict); |
| |
| font_dict->SetNewFor<CPDF_Name>("BaseFont", "AAAAAA+Arimo-Regular"); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| InsertNewTextObject(L"Hello world", page.get(), font.get())); |
| |
| CPDF_FontSubsetter subsetter(CPDFDocumentFromFPDFDocument(document())); |
| auto overrides = subsetter.GenerateObjectOverrides(GetTestNewObjNums()); |
| ASSERT_EQ(4u, overrides.size()); |
| |
| // Subset size is ~3% of the original font file, i.e. ~13 KB. |
| EXPECT_THAT( |
| overrides, |
| UnorderedElementsAre( |
| StreamSizeIsWithinRange(original_size * 0.025, original_size * 0.035), |
| IsRootFont(kArimoBaseFontName), IsCIDFont(kArimoBaseFontName), |
| IsFontDescriptor(kArimoBaseFontName))); |
| } |