Add FPDFText_LoadCidType2Font() API

Provide an API similar to FPDFText_LoadFont() that allows the caller to
specify custom ToUnicode and CIDToGIDMap data.

Fix some nits in existing API documentation and tests along the way.

Bug: pdfium:2120
Change-Id: I2ab9e16eeaa38f7d53fbb87066c295d9b8b80ec8
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/115694
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index a10ee2f..706ff72 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -296,6 +296,7 @@
   ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(),
                                         font_data.size(), FPDF_FONT_TRUETYPE,
                                         /*cid=*/true));
+  ASSERT_TRUE(font);
   FPDF_PAGEOBJECT text_object =
       FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f);
   EXPECT_TRUE(text_object);
@@ -341,6 +342,7 @@
   ScopedFPDFFont font(FPDFText_LoadFont(document(), font_data.data(),
                                         font_data.size(), FPDF_FONT_TRUETYPE,
                                         /*cid=*/true));
+  ASSERT_TRUE(font);
   FPDF_PAGEOBJECT text_object =
       FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f);
   EXPECT_TRUE(text_object);
@@ -3431,7 +3433,7 @@
   pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
   ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
                                         FPDF_FONT_TYPE1, false));
-  ASSERT_TRUE(font.get());
+  ASSERT_TRUE(font);
   CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
   EXPECT_TRUE(typed_font->IsType1Font());
 
@@ -3460,7 +3462,7 @@
   pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
   ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
                                         FPDF_FONT_TRUETYPE, false));
-  ASSERT_TRUE(font.get());
+  ASSERT_TRUE(font);
   CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
   EXPECT_TRUE(typed_font->IsTrueTypeFont());
 
@@ -3489,7 +3491,7 @@
   pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
   ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
                                         FPDF_FONT_TYPE1, 1));
-  ASSERT_TRUE(font.get());
+  ASSERT_TRUE(font);
   CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
   EXPECT_TRUE(typed_font->IsCIDFont());
 
@@ -3540,7 +3542,7 @@
   pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
   ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
                                         FPDF_FONT_TRUETYPE, 1));
-  ASSERT_TRUE(font.get());
+  ASSERT_TRUE(font);
   CPDF_Font* typed_font = CPDFFontFromFPDFFont(font.get());
   EXPECT_TRUE(typed_font->IsCIDFont());
 
@@ -3595,7 +3597,7 @@
     pdfium::span<const uint8_t> span = stock_font->GetFont()->GetFontSpan();
     ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
                                           FPDF_FONT_TRUETYPE, 0));
-    ASSERT_TRUE(font.get());
+    ASSERT_TRUE(font);
 
     // Add some text to the page
     FPDF_PAGEOBJECT text_object =
@@ -3677,7 +3679,7 @@
     // Load the data into a FPDF_Font.
     ScopedFPDFFont font(FPDFText_LoadFont(document(), span.data(), span.size(),
                                           FPDF_FONT_TRUETYPE, 1));
-    ASSERT_TRUE(font.get());
+    ASSERT_TRUE(font);
 
     // Add some text to the page
     FPDF_PAGEOBJECT text_object =
@@ -3724,6 +3726,113 @@
 }
 #endif  // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
 
+TEST_F(FPDFEditEmbedderTest, LoadCidType2FontCustom) {
+  // This is the same test as FPDFEditEmbedderTest.EmbedNotoSansSCFont, but some
+  // of the font data is provided by the caller, instead of being generated.
+  CreateEmptyDocument();
+  ScopedFPDFPage page(FPDFPage_New(document(), 0, 400, 400));
+  std::string font_path;
+  ASSERT_TRUE(PathService::GetThirdPartyFilePath(
+      "NotoSansCJK/NotoSansSC-Regular.subset.otf", &font_path));
+
+  std::vector<uint8_t> font_data = GetFileContents(font_path.c_str());
+  ASSERT_FALSE(font_data.empty());
+
+  static const char kToUnicodeCMap[] = R"(
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo <<
+  /Registry (Adobe)
+  /Ordering (Identity)
+  /Supplement 0
+>> def
+/CMapName /Adobe-Identity-H def
+/CMapType 2 def
+1 begincodespacerange
+<0000> <FFFF>
+endcodespacerange
+5 beginbfrange
+<0001> <0003> [<0020> <3002> <2F00>]
+<0003> <0004> [<4E00> <2F06>]
+<0004> <0005> [<4E8C> <53E5>]
+<0005> <0008> [<F906> <662F> <7B2C> <884C>]
+<0008> <0009> [<FA08> <8FD9>]
+endbfrange
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+)";
+
+  const std::vector<uint8_t> cid_to_gid_map = {0, 0, 0, 1, 0, 2, 0, 3, 0, 4,
+                                               0, 5, 0, 6, 0, 7, 0, 8, 0, 9};
+
+  ScopedFPDFFont font(FPDFText_LoadCidType2Font(
+      document(), font_data.data(), font_data.size(), kToUnicodeCMap,
+      cid_to_gid_map.data(), cid_to_gid_map.size()));
+  ASSERT_TRUE(font);
+
+  FPDF_PAGEOBJECT text_object =
+      FPDFPageObj_CreateTextObj(document(), font.get(), 20.0f);
+  EXPECT_TRUE(text_object);
+
+  // Test the characters which are either mapped to one single unicode or
+  // multiple unicodes in the embedded font.
+  ScopedFPDFWideString text = GetFPDFWideString(L"这是第一句。 这是第二行。");
+  EXPECT_TRUE(FPDFText_SetText(text_object, text.get()));
+
+  FPDFPageObj_Transform(text_object, 1, 0, 0, 1, 50, 200);
+  FPDFPage_InsertObject(page.get(), text_object);
+  EXPECT_TRUE(FPDFPage_GenerateContent(page.get()));
+
+  const char* checksum = []() {
+    if (CFX_DefaultRenderDevice::UseSkiaRenderer()) {
+#if BUILDFLAG(IS_WIN)
+      return "a1bc9e4007dc2155e9f56bf16234573e";
+#elif BUILDFLAG(IS_APPLE)
+      return "9a31fb87d1c6d2346bba22d1196041cd";
+#else
+      return "5bb65e15fc0a685934cd5006dec08a76";
+#endif
+    }
+    return "9a31fb87d1c6d2346bba22d1196041cd";
+  }();
+  ScopedFPDFBitmap page_bitmap = RenderPage(page.get());
+  CompareBitmap(page_bitmap.get(), 400, 400, checksum);
+
+  ASSERT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+  VerifySavedDocument(400, 400, checksum);
+}
+
+TEST_F(FPDFEditEmbedderTest, LoadCidType2FontWithBadParameters) {
+  ASSERT_TRUE(CreateNewDocument());
+
+  const std::vector<uint8_t> dummy_vec(3);
+  const char kDummyString[] = "dummy";
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(nullptr, dummy_vec.data(),
+                                         dummy_vec.size(), kDummyString,
+                                         dummy_vec.data(), dummy_vec.size()));
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), nullptr, dummy_vec.size(),
+                                         kDummyString, dummy_vec.data(),
+                                         dummy_vec.size()));
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(), 0,
+                                         kDummyString, dummy_vec.data(),
+                                         dummy_vec.size()));
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+                                         dummy_vec.size(), nullptr,
+                                         dummy_vec.data(), dummy_vec.size()));
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+                                         dummy_vec.size(), "", dummy_vec.data(),
+                                         dummy_vec.size()));
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+                                         dummy_vec.size(), kDummyString,
+                                         nullptr, dummy_vec.size()));
+  EXPECT_FALSE(FPDFText_LoadCidType2Font(document(), dummy_vec.data(),
+                                         dummy_vec.size(), kDummyString,
+                                         dummy_vec.data(), 0));
+}
+
 TEST_F(FPDFEditEmbedderTest, SaveAndRender) {
   const char* checksum = []() {
     if (CFX_DefaultRenderDevice::UseSkiaRenderer()) {
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp
index 1792b29..af6c3d8 100644
--- a/fpdfsdk/fpdf_edittext.cpp
+++ b/fpdfsdk/fpdf_edittext.cpp
@@ -497,6 +497,58 @@
   return CPDF_DocPageData::FromDocument(doc)->GetFont(font_dict);
 }
 
+RetainPtr<CPDF_Font> LoadCustomCompositeFont(
+    CPDF_Document* doc,
+    std::unique_ptr<CFX_Font> font,
+    pdfium::span<const uint8_t> font_span,
+    const char* to_unicode_cmap,
+    pdfium::span<const uint8_t> cid_to_gid_map_span) {
+  // If it doesn't have a single char, just fail.
+  RetainPtr<CFX_Face> face = font->GetFace();
+  if (face->GetGlyphCount() <= 0) {
+    return nullptr;
+  }
+
+  auto char_codes_and_indices =
+      face->GetCharCodesAndIndices(pdfium::kMaximumSupplementaryCodePoint);
+  if (char_codes_and_indices.empty()) {
+    return nullptr;
+  }
+
+  const ByteString name = BaseFontNameForType(font.get(), FPDF_FONT_TRUETYPE);
+  RetainPtr<CPDF_Dictionary> font_dict =
+      CreateCompositeFontDict(doc, font.get(), FPDF_FONT_TRUETYPE, name);
+
+  RetainPtr<CPDF_Dictionary> cid_font_dict =
+      CreateCidFontDict(doc, FPDF_FONT_TRUETYPE, name);
+
+  RetainPtr<CPDF_Dictionary> font_descriptor =
+      LoadFontDesc(doc, name, font.get(), font_span, FPDF_FONT_TRUETYPE);
+  cid_font_dict->SetNewFor<CPDF_Reference>("FontDescriptor", doc,
+                                           font_descriptor->GetObjNum());
+
+  std::map<uint32_t, uint32_t> widths;
+  for (const auto& item : char_codes_and_indices) {
+    if (!pdfium::Contains(widths, item.glyph_index)) {
+      widths[item.glyph_index] = font->GetGlyphWidth(item.glyph_index);
+    }
+  }
+  RetainPtr<CPDF_Array> widths_array = CreateWidthsArray(doc, widths);
+  cid_font_dict->SetNewFor<CPDF_Reference>("W", doc, widths_array->GetObjNum());
+
+  auto cid_to_gid_map = doc->NewIndirect<CPDF_Stream>(cid_to_gid_map_span);
+  cid_font_dict->SetNewFor<CPDF_Reference>("CIDToGIDMap", doc,
+                                           cid_to_gid_map->GetObjNum());
+
+  CreateDescendantFontsArray(doc, font_dict, cid_font_dict->GetObjNum());
+
+  auto to_unicode_stream =
+      doc->NewIndirect<CPDF_Stream>(ByteStringView(to_unicode_cmap).raw_span());
+  font_dict->SetNewFor<CPDF_Reference>("ToUnicode", doc,
+                                       to_unicode_stream->GetObjNum());
+  return CPDF_DocPageData::FromDocument(doc)->GetFont(font_dict);
+}
+
 CPDF_TextObject* CPDFTextObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object) {
   auto* obj = CPDFPageObjectFromFPDFPageObject(page_object);
   return obj ? obj->AsText() : nullptr;
@@ -607,6 +659,37 @@
       CPDF_Font::GetStockFont(pDoc, ByteStringView(font)).Leak());
 }
 
+FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
+FPDFText_LoadCidType2Font(FPDF_DOCUMENT document,
+                          const uint8_t* font_data,
+                          uint32_t font_data_size,
+                          FPDF_BYTESTRING to_unicode_cmap,
+                          const uint8_t* cid_to_gid_map_data,
+                          uint32_t cid_to_gid_map_data_size) {
+  CPDF_Document* doc = CPDFDocumentFromFPDFDocument(document);
+  if (!doc || !font_data || font_data_size == 0 || !to_unicode_cmap ||
+      strlen(to_unicode_cmap) == 0 || !cid_to_gid_map_data ||
+      cid_to_gid_map_data_size == 0) {
+    return nullptr;
+  }
+
+  auto font_span = pdfium::make_span(font_data, font_data_size);
+  auto font = std::make_unique<CFX_Font>();
+
+  // TODO(thestig): Consider checking the font format. See similar comment in
+  // FPDFText_LoadFont() above.
+  if (!font->LoadEmbedded(font_span, /*force_vertical=*/false,
+                          /*object_tag=*/0)) {
+    return nullptr;
+  }
+
+  // Caller takes ownership.
+  return FPDFFontFromCPDFFont(
+      LoadCustomCompositeFont(doc, std::move(font), font_span, to_unicode_cmap,
+                              {cid_to_gid_map_data, cid_to_gid_map_data_size})
+          .Leak());
+}
+
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
 FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text, float* size) {
   if (!size)
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index f02d501..4f8bccf 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -257,6 +257,7 @@
     CHK(FPDFTextObj_GetText);
     CHK(FPDFTextObj_GetTextRenderMode);
     CHK(FPDFTextObj_SetTextRenderMode);
+    CHK(FPDFText_LoadCidType2Font);
     CHK(FPDFText_LoadFont);
     CHK(FPDFText_LoadStandardFont);
     CHK(FPDFText_SetCharcodes);
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h
index d071cb2..082e39b 100644
--- a/public/fpdf_edit.h
+++ b/public/fpdf_edit.h
@@ -1183,16 +1183,16 @@
                       size_t count);
 
 // Returns a font object loaded from a stream of data. The font is loaded
-// into the document.
+// into the document. Various font data structures, such as the ToUnicode data,
+// are auto-generated based on the inputs.
 //
-// document   - handle to the document.
-// data       - the stream of data, which will be copied by the font object.
-// size       - size of the stream, in bytes.
-// font_type  - FPDF_FONT_TYPE1 or FPDF_FONT_TRUETYPE depending on the font
-// type.
-// cid        - a boolean specifying if the font is a CID font or not.
+// document  - handle to the document.
+// data      - the stream of font data, which will be copied by the font object.
+// size      - the size of the font data, in bytes.
+// font_type - FPDF_FONT_TYPE1 or FPDF_FONT_TRUETYPE depending on the font type.
+// cid       - a boolean specifying if the font is a CID font or not.
 //
-// The loaded font can be closed using FPDFFont_Close.
+// The loaded font can be closed using FPDFFont_Close().
 //
 // Returns NULL on failure
 FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document,
@@ -1209,12 +1209,36 @@
 // document   - handle to the document.
 // font       - string containing the font name, without spaces.
 //
-// The loaded font can be closed using FPDFFont_Close.
+// The loaded font can be closed using FPDFFont_Close().
 //
 // Returns NULL on failure.
 FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
 FPDFText_LoadStandardFont(FPDF_DOCUMENT document, FPDF_BYTESTRING font);
 
+// Experimental API.
+// Returns a font object loaded from a stream of data for a type 2 CID font. The
+// font is loaded into the document. Unlike FPDFText_LoadFont(), the ToUnicode
+// data and the CIDToGIDMap data are caller provided, instead of auto-generated.
+//
+// document                 - handle to the document.
+// font_data                - the stream of font data, which will be copied by
+//                            the font object.
+// font_data_size           - the size of the font data, in bytes.
+// to_unicode_cmap          - the ToUnicode data.
+// cid_to_gid_map_data      - the stream of CIDToGIDMap data.
+// cid_to_gid_map_data_size - the size of the CIDToGIDMap data, in bytes.
+//
+// The loaded font can be closed using FPDFFont_Close().
+//
+// Returns NULL on failure.
+FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
+FPDFText_LoadCidType2Font(FPDF_DOCUMENT document,
+                          const uint8_t* font_data,
+                          uint32_t font_data_size,
+                          FPDF_BYTESTRING to_unicode_cmap,
+                          const uint8_t* cid_to_gid_map_data,
+                          uint32_t cid_to_gid_map_data_size);
+
 // Get the font size of a text object.
 //
 //   text - handle to a text.