Add FPDFText_LoadCidType2Font() API
Provide an API similar to FPDFText_LoadFont() that allows the caller to
specify custom ToUnicode and CIDToGIDMap data.
Fix some nits in existing API documentation and tests along the way.
Bug: pdfium:2120
Change-Id: I2ab9e16eeaa38f7d53fbb87066c295d9b8b80ec8
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/115694
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index a10ee2f..706ff72 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -296,6 +296,7 @@
ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(),
font_data.size(), FPDF_FONT_TRUETYPE,
/*cid=*/true));
+ ASSERT_TRUE(font);
FPDF_PAGEOBJECT text_object =
FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f);
EXPECT_TRUE(text_object);
@@ -341,6 +342,7 @@
ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(),
font_data.size(), FPDF_FONT_TRUETYPE,
/*cid=*/true));
+ ASSERT_TRUE(font);
FPDF_PAGEOBJECT text_object =
FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f);
EXPECT_TRUE(text_object);
@@ -3431,7 +3433,7 @@
pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
FPDF_FONT_TYPE1, false));
- ASSERT_TRUE(font.get());
+ ASSERT_TRUE(font);
CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
EXPECT_TRUE(typed_font->IsType1Font());
@@ -3460,7 +3462,7 @@
pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
FPDF_FONT_TRUETYPE, false));
- ASSERT_TRUE(font.get());
+ ASSERT_TRUE(font);
CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
EXPECT_TRUE(typed_font->IsTrueTypeFont());
@@ -3489,7 +3491,7 @@
pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
FPDF_FONT_TYPE1, 1));
- ASSERT_TRUE(font.get());
+ ASSERT_TRUE(font);
CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
EXPECT_TRUE(typed_font->IsCIDFont());
@@ -3540,7 +3542,7 @@
pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
FPDF_FONT_TRUETYPE, 1));
- ASSERT_TRUE(font.get());
+ ASSERT_TRUE(font);
CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
EXPECT_TRUE(typed_font->IsCIDFont());
@@ -3595,7 +3597,7 @@
pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
FPDF_FONT_TRUETYPE, 0));
- ASSERT_TRUE(font.get());
+ ASSERT_TRUE(font);
// Add some text to the page
FPDF_PAGEOBJECT text_object =
@@ -3677,7 +3679,7 @@
// Load the data into a FPDF_Font.
ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
FPDF_FONT_TRUETYPE, 1));
- ASSERT_TRUE(font.get());
+ ASSERT_TRUE(font);
// Add some text to the page
FPDF_PAGEOBJECT text_object =
@@ -3724,6 +3726,113 @@
}
#endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
+TEST_F(FPDFEditEmbedderTest, LoadCidType2FontCustom) {
+ // This is the same test as FPDFEditEmbedderTest.EmbedNotoSansSCFont, but some
+ // of the font data is provided by the caller, instead of being generated.
+ CreateEmptyDocument();
+ ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400));
+ std::string font_path;
+ ASSERT_TRUE(PathService::GetThirdPartyFilePath(
+ "NotoSansCJK/NotoSansSC-Regular.subset.otf", &font_path));
+
+ std::vector<uint8_t> font_data = GetFileContents(font_path.c_str());
+ ASSERT_FALSE(font_data.empty());
+
+ static const char kToUnicodeCMap[] = R"(
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo <<
+ /Registry (Adobe)
+ /Ordering (Identity)
+ /Supplement 0
+>> def
+/CMapName /Adobe-Identity-H def
+/CMapType 2 def
+1 begincodespacerange
+<0000> <FFFF>
+endcodespacerange
+5 beginbfrange
+<0001> <0003> [<0020> <3002> <2F00>]
+<0003> <0004> [<4E00> <2F06>]
+<0004> <0005> [<4E8C> <53E5>]
+<0005> <0008> [<F906> <662F> <7B2C> <884C>]
+<0008> <0009> [<FA08> <8FD9>]
+endbfrange
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+)";
+
+ const std::vector<uint8_t> cid_to_gid_map = {0, 0, 0, 1, 0, 2, 0, 3, 0, 4,
+ 0, 5, 0, 6, 0, 7, 0, 8, 0, 9};
+
+ ScopedFPDFFont font(FPDFText_LoadCidType2Font(
+ document(), font_data.data(), font_data.size(), kToUnicodeCMap,
+ cid_to_gid_map.data(), cid_to_gid_map.size()));
+ ASSERT_TRUE(font);
+
+ FPDF_PAGEOBJECT text_object =
+ FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f);
+ EXPECT_TRUE(text_object);
+
+ // Test the characters which are either mapped to one single unicode or
+ // multiple unicodes in the embedded font.
+ ScopedFPDFWideString text = GetFPDFWideString(L"这是第一句。 这是第二行。");
+ EXPECT_TRUE(FPDFText_SetText(text_object, text.get()));
+
+ FPDFPageObj_Transform(text_object, 1, 0, 0, 1, 50, 200);
+ FPDFPage_InsertObject(page.get(), text_object);
+ EXPECT_TRUE(FPDFPage_GenerateContent(page.get()));
+
+ const char* checksum = []() {
+ if (CFX_DefaultRenderDevice::UseSkiaRenderer()) {
+#if BUILDFLAG(IS_WIN)
+ return "a1bc9e4007dc2155e9f56bf16234573e";
+#elif BUILDFLAG(IS_APPLE)
+ return "9a31fb87d1c6d2346bba22d1196041cd";
+#else
+ return "5bb65e15fc0a685934cd5006dec08a76";
+#endif
+ }
+ return "9a31fb87d1c6d2346bba22d1196041cd";
+ }();
+ ScopedFPDFBitmap page_bitmap = RenderPage(page.get());
+ CompareBitmap(page_bitmap.get(), 400, 400, checksum);
+
+ ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ VerifySavedDocument(400, 400, checksum);
+}
+
+TEST_F(FPDFEditEmbedderTest, LoadCidType2FontWithBadParameters) {
+ ASSERT_TRUE(CreateNewDocument());
+
+ const std::vector<uint8_t> dummy_vec(3);
+ const char kDummyString[] = "dummy";
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(nullptr, dummy_vec.data(),
+ dummy_vec.size(), kDummyString,
+ dummy_vec.data(), dummy_vec.size()));
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), nullptr, dummy_vec.size(),
+ kDummyString, dummy_vec.data(),
+ dummy_vec.size()));
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), 0,
+ kDummyString, dummy_vec.data(),
+ dummy_vec.size()));
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+ dummy_vec.size(), nullptr,
+ dummy_vec.data(), dummy_vec.size()));
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+ dummy_vec.size(), "", dummy_vec.data(),
+ dummy_vec.size()));
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+ dummy_vec.size(), kDummyString,
+ nullptr, dummy_vec.size()));
+ EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+ dummy_vec.size(), kDummyString,
+ dummy_vec.data(), 0));
+}
+
TEST_F(FPDFEditEmbedderTest, SaveAndRender) {
const char* checksum = []() {
if (CFX_DefaultRenderDevice::UseSkiaRenderer()) {
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp
index 1792b29..af6c3d8 100644
--- a/fpdfsdk/fpdf_edittext.cpp
+++ b/fpdfsdk/fpdf_edittext.cpp
@@ -497,6 +497,58 @@
return CPDF_DocPageData::FromDocument(doc)->GetFont(font_dict);
}
+RetainPtr<CPDF_Font> LoadCustomCompositeFont(
+ CPDF_Document* doc,
+ std::unique_ptr<CFX_Font> font,
+ pdfium::span<const uint8_t> font_span,
+ const char* to_unicode_cmap,
+ pdfium::span<const uint8_t> cid_to_gid_map_span) {
+ // If it doesn't have a single char, just fail.
+ RetainPtr<CFX_Face> face = font->GetFace();
+ if (face->GetGlyphCount() <= 0) {
+ return nullptr;
+ }
+
+ auto char_codes_and_indices =
+ face->GetCharCodesAndIndices(pdfium::kMaximumSupplementaryCodePoint);
+ if (char_codes_and_indices.empty()) {
+ return nullptr;
+ }
+
+ const ByteString name = BaseFontNameForType(font.get(), FPDF_FONT_TRUETYPE);
+ RetainPtr<CPDF_Dictionary> font_dict =
+ CreateCompositeFontDict(doc, font.get(), FPDF_FONT_TRUETYPE, name);
+
+ RetainPtr<CPDF_Dictionary> cid_font_dict =
+ CreateCidFontDict(doc, FPDF_FONT_TRUETYPE, name);
+
+ RetainPtr<CPDF_Dictionary> font_descriptor =
+ LoadFontDesc(doc, name, font.get(), font_span, FPDF_FONT_TRUETYPE);
+ cid_font_dict->SetNewFor<CPDF_Reference>("FontDescriptor", doc,
+ font_descriptor->GetObjNum());
+
+ std::map<uint32_t, uint32_t> widths;
+ for (const auto& item : char_codes_and_indices) {
+ if (!pdfium::Contains(widths, item.glyph_index)) {
+ widths[item.glyph_index] = font->GetGlyphWidth(item.glyph_index);
+ }
+ }
+ RetainPtr<CPDF_Array> widths_array = CreateWidthsArray(doc, widths);
+ cid_font_dict->SetNewFor<CPDF_Reference>("W", doc, widths_array->GetObjNum());
+
+ auto cid_to_gid_map = doc->NewIndirect<CPDF_Stream>(cid_to_gid_map_span);
+ cid_font_dict->SetNewFor<CPDF_Reference>("CIDToGIDMap", doc,
+ cid_to_gid_map->GetObjNum());
+
+ CreateDescendantFontsArray(doc, font_dict, cid_font_dict->GetObjNum());
+
+ auto to_unicode_stream =
+ doc->NewIndirect<CPDF_Stream>(ByteStringView(to_unicode_cmap).raw_span());
+ font_dict->SetNewFor<CPDF_Reference>("ToUnicode", doc,
+ to_unicode_stream->GetObjNum());
+ return CPDF_DocPageData::FromDocument(doc)->GetFont(font_dict);
+}
+
CPDF_TextObject* CPDFTextObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object) {
auto* obj = CPDFPageObjectFromFPDFPageObject(page_object);
return obj ? obj->AsText() : nullptr;
@@ -607,6 +659,37 @@
CPDF_Font::GetStockFont(pDoc, ByteStringView(font)).Leak());
}
+FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
+FPDFText_LoadCidType2Font(FPDF_DOCUMENT document,
+ const uint8_t* font_data,
+ uint32_t font_data_size,
+ FPDF_BYTESTRING to_unicode_cmap,
+ const uint8_t* cid_to_gid_map_data,
+ uint32_t cid_to_gid_map_data_size) {
+ CPDF_Document* doc = CPDFDocumentFromFPDFDocument(document);
+ if (!doc || !font_data || font_data_size == 0 || !to_unicode_cmap ||
+ strlen(to_unicode_cmap) == 0 || !cid_to_gid_map_data ||
+ cid_to_gid_map_data_size == 0) {
+ return nullptr;
+ }
+
+ auto font_span = pdfium::make_span(font_data, font_data_size);
+ auto font = std::make_unique<CFX_Font>();
+
+ // TODO(thestig): Consider checking the font format. See similar comment in
+ // FPDFText_LoadFont() above.
+ if (!font->LoadEmbedded(font_span, /*force_vertical=*/false,
+ /*object_tag=*/0)) {
+ return nullptr;
+ }
+
+ // Caller takes ownership.
+ return FPDFFontFromCPDFFont(
+ LoadCustomCompositeFont(doc, std::move(font), font_span, to_unicode_cmap,
+ {cid_to_gid_map_data, cid_to_gid_map_data_size})
+ .Leak());
+}
+
FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text, float* size) {
if (!size)
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index f02d501..4f8bccf 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -257,6 +257,7 @@
CHK(FPDFTextObj_GetText);
CHK(FPDFTextObj_GetTextRenderMode);
CHK(FPDFTextObj_SetTextRenderMode);
+ CHK(FPDFText_LoadCidType2Font);
CHK(FPDFText_LoadFont);
CHK(FPDFText_LoadStandardFont);
CHK(FPDFText_SetCharcodes);
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h
index d071cb2..082e39b 100644
--- a/public/fpdf_edit.h
+++ b/public/fpdf_edit.h
@@ -1183,16 +1183,16 @@
size_t count);
// Returns a font object loaded from a stream of data. The font is loaded
-// into the document.
+// into the document. Various font data structures, such as the ToUnicode data,
+// are auto-generated based on the inputs.
//
-// document - handle to the document.
-// data - the stream of data, which will be copied by the font object.
-// size - size of the stream, in bytes.
-// font_type - FPDF_FONT_TYPE1 or FPDF_FONT_TRUETYPE depending on the font
-// type.
-// cid - a boolean specifying if the font is a CID font or not.
+// document - handle to the document.
+// data - the stream of font data, which will be copied by the font object.
+// size - the size of the font data, in bytes.
+// font_type - FPDF_FONT_TYPE1 or FPDF_FONT_TRUETYPE depending on the font type.
+// cid - a boolean specifying if the font is a CID font or not.
//
-// The loaded font can be closed using FPDFFont_Close.
+// The loaded font can be closed using FPDFFont_Close().
//
// Returns NULL on failure
FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document,
@@ -1209,12 +1209,36 @@
// document - handle to the document.
// font - string containing the font name, without spaces.
//
-// The loaded font can be closed using FPDFFont_Close.
+// The loaded font can be closed using FPDFFont_Close().
//
// Returns NULL on failure.
FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
FPDFText_LoadStandardFont(FPDF_DOCUMENT document, FPDF_BYTESTRING font);
+// Experimental API.
+// Returns a font object loaded from a stream of data for a type 2 CID font. The
+// font is loaded into the document. Unlike FPDFText_LoadFont(), the ToUnicode
+// data and the CIDToGIDMap data are caller provided, instead of auto-generated.
+//
+// document - handle to the document.
+// font_data - the stream of font data, which will be copied by
+// the font object.
+// font_data_size - the size of the font data, in bytes.
+// to_unicode_cmap - the ToUnicode data.
+// cid_to_gid_map_data - the stream of CIDToGIDMap data.
+// cid_to_gid_map_data_size - the size of the CIDToGIDMap data, in bytes.
+//
+// The loaded font can be closed using FPDFFont_Close().
+//
+// Returns NULL on failure.
+FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
+FPDFText_LoadCidType2Font(FPDF_DOCUMENT document,
+ const uint8_t* font_data,
+ uint32_t font_data_size,
+ FPDF_BYTESTRING to_unicode_cmap,
+ const uint8_t* cid_to_gid_map_data,
+ uint32_t cid_to_gid_map_data_size);
+
// Get the font size of a text object.
//
// text - handle to a text.