Add FPDFText_LoadCidType2Font() API Provide an API similar to FPDFText_LoadFont() that allows the caller to specify custom ToUnicode and CIDToGIDMap data. Fix some nits in existing API documentation and tests along the way. Bug: pdfium:2120 Change-Id: I2ab9e16eeaa38f7d53fbb87066c295d9b8b80ec8 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/115694 Reviewed-by: Tom Sepez <tsepez@chromium.org> Reviewed-by: Thomas Sepez <tsepez@google.com> Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp index a10ee2f..706ff72 100644 --- a/fpdfsdk/fpdf_edit_embeddertest.cpp +++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -296,6 +296,7 @@ ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(), font_data.size(), FPDF_FONT_TRUETYPE, /*cid=*/true)); + ASSERT_TRUE(font); FPDF_PAGEOBJECT text_object = FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f); EXPECT_TRUE(text_object); @@ -341,6 +342,7 @@ ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(), font_data.size(), FPDF_FONT_TRUETYPE, /*cid=*/true)); + ASSERT_TRUE(font); FPDF_PAGEOBJECT text_object = FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f); EXPECT_TRUE(text_object); @@ -3431,7 +3433,7 @@ pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan(); ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(), FPDF_FONT_TYPE1, false)); - ASSERT_TRUE(font.get()); + ASSERT_TRUE(font); CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get()); EXPECT_TRUE(typed_font->IsType1Font()); @@ -3460,7 +3462,7 @@ pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan(); ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(), FPDF_FONT_TRUETYPE, false)); - ASSERT_TRUE(font.get()); + ASSERT_TRUE(font); CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get()); EXPECT_TRUE(typed_font->IsTrueTypeFont()); @@ -3489,7 +3491,7 @@ pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan(); ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(), FPDF_FONT_TYPE1, 1)); - ASSERT_TRUE(font.get()); + ASSERT_TRUE(font); CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get()); EXPECT_TRUE(typed_font->IsCIDFont()); @@ -3540,7 +3542,7 @@ pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan(); ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(), FPDF_FONT_TRUETYPE, 1)); - ASSERT_TRUE(font.get()); + ASSERT_TRUE(font); CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get()); EXPECT_TRUE(typed_font->IsCIDFont()); @@ -3595,7 +3597,7 @@ pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan(); ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(), FPDF_FONT_TRUETYPE, 0)); - ASSERT_TRUE(font.get()); + ASSERT_TRUE(font); // Add some text to the page FPDF_PAGEOBJECT text_object = @@ -3677,7 +3679,7 @@ // Load the data into a FPDF_Font. ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(), FPDF_FONT_TRUETYPE, 1)); - ASSERT_TRUE(font.get()); + ASSERT_TRUE(font); // Add some text to the page FPDF_PAGEOBJECT text_object = @@ -3724,6 +3726,113 @@ } #endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) +TEST_F(FPDFEditEmbedderTest, LoadCidType2FontCustom) { + // This is the same test as FPDFEditEmbedderTest.EmbedNotoSansSCFont, but some + // of the font data is provided by the caller, instead of being generated. + CreateEmptyDocument(); + ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400)); + std::string font_path; + ASSERT_TRUE(PathService::GetThirdPartyFilePath( + "NotoSansCJK/NotoSansSC-Regular.subset.otf", &font_path)); + + std::vector<uint8_t> font_data = GetFileContents(font_path.c_str()); + ASSERT_FALSE(font_data.empty()); + + static const char kToUnicodeCMap[] = R"( +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo << + /Registry (Adobe) + /Ordering (Identity) + /Supplement 0 +>> def +/CMapName /Adobe-Identity-H def +/CMapType 2 def +1 begincodespacerange +<0000> <FFFF> +endcodespacerange +5 beginbfrange +<0001> <0003> [<0020> <3002> <2F00>] +<0003> <0004> [<4E00> <2F06>] +<0004> <0005> [<4E8C> <53E5>] +<0005> <0008> [<F906> <662F> <7B2C> <884C>] +<0008> <0009> [<FA08> <8FD9>] +endbfrange +endcmap +CMapName currentdict /CMap defineresource pop +end +end +)"; + + const std::vector<uint8_t> cid_to_gid_map = {0, 0, 0, 1, 0, 2, 0, 3, 0, 4, + 0, 5, 0, 6, 0, 7, 0, 8, 0, 9}; + + ScopedFPDFFont font(FPDFText_LoadCidType2Font( + document(), font_data.data(), font_data.size(), kToUnicodeCMap, + cid_to_gid_map.data(), cid_to_gid_map.size())); + ASSERT_TRUE(font); + + FPDF_PAGEOBJECT text_object = + FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f); + EXPECT_TRUE(text_object); + + // Test the characters which are either mapped to one single unicode or + // multiple unicodes in the embedded font. + ScopedFPDFWideString text = GetFPDFWideString(L"这是第一句。 这是第二行。"); + EXPECT_TRUE(FPDFText_SetText(text_object, text.get())); + + FPDFPageObj_Transform(text_object, 1, 0, 0, 1, 50, 200); + FPDFPage_InsertObject(page.get(), text_object); + EXPECT_TRUE(FPDFPage_GenerateContent(page.get())); + + const char* checksum = []() { + if (CFX_DefaultRenderDevice::UseSkiaRenderer()) { +#if BUILDFLAG(IS_WIN) + return "a1bc9e4007dc2155e9f56bf16234573e"; +#elif BUILDFLAG(IS_APPLE) + return "9a31fb87d1c6d2346bba22d1196041cd"; +#else + return "5bb65e15fc0a685934cd5006dec08a76"; +#endif + } + return "9a31fb87d1c6d2346bba22d1196041cd"; + }(); + ScopedFPDFBitmap page_bitmap = RenderPage(page.get()); + CompareBitmap(page_bitmap.get(), 400, 400, checksum); + + ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + VerifySavedDocument(400, 400, checksum); +} + +TEST_F(FPDFEditEmbedderTest, LoadCidType2FontWithBadParameters) { + ASSERT_TRUE(CreateNewDocument()); + + const std::vector<uint8_t> dummy_vec(3); + const char kDummyString[] = "dummy"; + EXPECT_FALSE(FPDFText_LoadCidType2Font(nullptr, dummy_vec.data(), + dummy_vec.size(), kDummyString, + dummy_vec.data(), dummy_vec.size())); + EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), nullptr, dummy_vec.size(), + kDummyString, dummy_vec.data(), + dummy_vec.size())); + EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), 0, + kDummyString, dummy_vec.data(), + dummy_vec.size())); + EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), + dummy_vec.size(), nullptr, + dummy_vec.data(), dummy_vec.size())); + EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), + dummy_vec.size(), "", dummy_vec.data(), + dummy_vec.size())); + EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), + dummy_vec.size(), kDummyString, + nullptr, dummy_vec.size())); + EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), + dummy_vec.size(), kDummyString, + dummy_vec.data(), 0)); +} + TEST_F(FPDFEditEmbedderTest, SaveAndRender) { const char* checksum = []() { if (CFX_DefaultRenderDevice::UseSkiaRenderer()) {
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp index 1792b29..af6c3d8 100644 --- a/fpdfsdk/fpdf_edittext.cpp +++ b/fpdfsdk/fpdf_edittext.cpp
@@ -497,6 +497,58 @@ return CPDF_DocPageData::FromDocument(doc)->GetFont(font_dict); } +RetainPtr<CPDF_Font> LoadCustomCompositeFont( + CPDF_Document* doc, + std::unique_ptr<CFX_Font> font, + pdfium::span<const uint8_t> font_span, + const char* to_unicode_cmap, + pdfium::span<const uint8_t> cid_to_gid_map_span) { + // If it doesn't have a single char, just fail. + RetainPtr<CFX_Face> face = font->GetFace(); + if (face->GetGlyphCount() <= 0) { + return nullptr; + } + + auto char_codes_and_indices = + face->GetCharCodesAndIndices(pdfium::kMaximumSupplementaryCodePoint); + if (char_codes_and_indices.empty()) { + return nullptr; + } + + const ByteString name = BaseFontNameForType(font.get(), FPDF_FONT_TRUETYPE); + RetainPtr<CPDF_Dictionary> font_dict = + CreateCompositeFontDict(doc, font.get(), FPDF_FONT_TRUETYPE, name); + + RetainPtr<CPDF_Dictionary> cid_font_dict = + CreateCidFontDict(doc, FPDF_FONT_TRUETYPE, name); + + RetainPtr<CPDF_Dictionary> font_descriptor = + LoadFontDesc(doc, name, font.get(), font_span, FPDF_FONT_TRUETYPE); + cid_font_dict->SetNewFor<CPDF_Reference>("FontDescriptor", doc, + font_descriptor->GetObjNum()); + + std::map<uint32_t, uint32_t> widths; + for (const auto& item : char_codes_and_indices) { + if (!pdfium::Contains(widths, item.glyph_index)) { + widths[item.glyph_index] = font->GetGlyphWidth(item.glyph_index); + } + } + RetainPtr<CPDF_Array> widths_array = CreateWidthsArray(doc, widths); + cid_font_dict->SetNewFor<CPDF_Reference>("W", doc, widths_array->GetObjNum()); + + auto cid_to_gid_map = doc->NewIndirect<CPDF_Stream>(cid_to_gid_map_span); + cid_font_dict->SetNewFor<CPDF_Reference>("CIDToGIDMap", doc, + cid_to_gid_map->GetObjNum()); + + CreateDescendantFontsArray(doc, font_dict, cid_font_dict->GetObjNum()); + + auto to_unicode_stream = + doc->NewIndirect<CPDF_Stream>(ByteStringView(to_unicode_cmap).raw_span()); + font_dict->SetNewFor<CPDF_Reference>("ToUnicode", doc, + to_unicode_stream->GetObjNum()); + return CPDF_DocPageData::FromDocument(doc)->GetFont(font_dict); +} + CPDF_TextObject* CPDFTextObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object) { auto* obj = CPDFPageObjectFromFPDFPageObject(page_object); return obj ? obj->AsText() : nullptr; @@ -607,6 +659,37 @@ CPDF_Font::GetStockFont(pDoc, ByteStringView(font)).Leak()); } +FPDF_EXPORT FPDF_FONT FPDF_CALLCONV +FPDFText_LoadCidType2Font(FPDF_DOCUMENT document, + const uint8_t* font_data, + uint32_t font_data_size, + FPDF_BYTESTRING to_unicode_cmap, + const uint8_t* cid_to_gid_map_data, + uint32_t cid_to_gid_map_data_size) { + CPDF_Document* doc = CPDFDocumentFromFPDFDocument(document); + if (!doc || !font_data || font_data_size == 0 || !to_unicode_cmap || + strlen(to_unicode_cmap) == 0 || !cid_to_gid_map_data || + cid_to_gid_map_data_size == 0) { + return nullptr; + } + + auto font_span = pdfium::make_span(font_data, font_data_size); + auto font = std::make_unique<CFX_Font>(); + + // TODO(thestig): Consider checking the font format. See similar comment in + // FPDFText_LoadFont() above. + if (!font->LoadEmbedded(font_span, /*force_vertical=*/false, + /*object_tag=*/0)) { + return nullptr; + } + + // Caller takes ownership. + return FPDFFontFromCPDFFont( + LoadCustomCompositeFont(doc, std::move(font), font_span, to_unicode_cmap, + {cid_to_gid_map_data, cid_to_gid_map_data_size}) + .Leak()); +} + FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text, float* size) { if (!size)
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c index f02d501..4f8bccf 100644 --- a/fpdfsdk/fpdf_view_c_api_test.c +++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -257,6 +257,7 @@ CHK(FPDFTextObj_GetText); CHK(FPDFTextObj_GetTextRenderMode); CHK(FPDFTextObj_SetTextRenderMode); + CHK(FPDFText_LoadCidType2Font); CHK(FPDFText_LoadFont); CHK(FPDFText_LoadStandardFont); CHK(FPDFText_SetCharcodes);
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h index d071cb2..082e39b 100644 --- a/public/fpdf_edit.h +++ b/public/fpdf_edit.h
@@ -1183,16 +1183,16 @@ size_t count); // Returns a font object loaded from a stream of data. The font is loaded -// into the document. +// into the document. Various font data structures, such as the ToUnicode data, +// are auto-generated based on the inputs. // -// document - handle to the document. -// data - the stream of data, which will be copied by the font object. -// size - size of the stream, in bytes. -// font_type - FPDF_FONT_TYPE1 or FPDF_FONT_TRUETYPE depending on the font -// type. -// cid - a boolean specifying if the font is a CID font or not. +// document - handle to the document. +// data - the stream of font data, which will be copied by the font object. +// size - the size of the font data, in bytes. +// font_type - FPDF_FONT_TYPE1 or FPDF_FONT_TRUETYPE depending on the font type. +// cid - a boolean specifying if the font is a CID font or not. // -// The loaded font can be closed using FPDFFont_Close. +// The loaded font can be closed using FPDFFont_Close(). // // Returns NULL on failure FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document, @@ -1209,12 +1209,36 @@ // document - handle to the document. // font - string containing the font name, without spaces. // -// The loaded font can be closed using FPDFFont_Close. +// The loaded font can be closed using FPDFFont_Close(). // // Returns NULL on failure. FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadStandardFont(FPDF_DOCUMENT document, FPDF_BYTESTRING font); +// Experimental API. +// Returns a font object loaded from a stream of data for a type 2 CID font. The +// font is loaded into the document. Unlike FPDFText_LoadFont(), the ToUnicode +// data and the CIDToGIDMap data are caller provided, instead of auto-generated. +// +// document - handle to the document. +// font_data - the stream of font data, which will be copied by +// the font object. +// font_data_size - the size of the font data, in bytes. +// to_unicode_cmap - the ToUnicode data. +// cid_to_gid_map_data - the stream of CIDToGIDMap data. +// cid_to_gid_map_data_size - the size of the CIDToGIDMap data, in bytes. +// +// The loaded font can be closed using FPDFFont_Close(). +// +// Returns NULL on failure. +FPDF_EXPORT FPDF_FONT FPDF_CALLCONV +FPDFText_LoadCidType2Font(FPDF_DOCUMENT document, + const uint8_t* font_data, + uint32_t font_data_size, + FPDF_BYTESTRING to_unicode_cmap, + const uint8_t* cid_to_gid_map_data, + uint32_t cid_to_gid_map_data_size); + // Get the font size of a text object. // // text - handle to a text.