Add new FPDFFont_GetFontData API.

Given a FPDF_FONT, this API lets the caller retrieve the embedded font
data in the decoded form, or the substitution font data if the font is
not embedded.

Bug: pdfium:1833
Change-Id: I0c28bf6416f9f192595b41f595cbade366d7f0d3
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/91390
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Nigi <nigi@chromium.org>
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index 8942c28..e819ffc 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -2499,6 +2499,86 @@
     ASSERT_STREQ("x", font_name.data());
   }
 
+  {
+    // FPDFFont_GetFontData() positive testing.
+    constexpr size_t kExpectedSize = 8268;
+    std::vector<uint8_t> buf;
+    size_t buf_bytes_required = 123;
+    ASSERT_TRUE(FPDFFont_GetFontData(font, nullptr, 0, &buf_bytes_required));
+    ASSERT_EQ(kExpectedSize, buf_bytes_required);
+
+    buf.resize(kExpectedSize);
+    EXPECT_EQ("495800b8e56e2d37f3bc48a1b52db952", GenerateMD5Base16(buf));
+    buf_bytes_required = 234;
+    // Test with buffer that is too small. Make sure `buf` is unchanged.
+    EXPECT_TRUE(FPDFFont_GetFontData(font, buf.data(), buf.size() - 1,
+                                     &buf_bytes_required));
+    EXPECT_EQ("495800b8e56e2d37f3bc48a1b52db952", GenerateMD5Base16(buf));
+    EXPECT_EQ(kExpectedSize, buf_bytes_required);
+
+    // Test with buffer of the correct size.
+    buf_bytes_required = 234;
+    EXPECT_TRUE(FPDFFont_GetFontData(font, buf.data(), buf.size(),
+                                     &buf_bytes_required));
+    EXPECT_EQ("1a67be75f719b6c476804d85bb9e4844", GenerateMD5Base16(buf));
+    EXPECT_EQ(kExpectedSize, buf_bytes_required);
+
+    // FPDFFont_GetFontData() negative testing.
+    EXPECT_FALSE(FPDFFont_GetFontData(nullptr, nullptr, 0, nullptr));
+    EXPECT_FALSE(FPDFFont_GetFontData(font, nullptr, 0, nullptr));
+
+    buf_bytes_required = 345;
+    EXPECT_FALSE(
+        FPDFFont_GetFontData(nullptr, nullptr, 0, &buf_bytes_required));
+    EXPECT_EQ(345u, buf_bytes_required);
+
+    EXPECT_FALSE(
+        FPDFFont_GetFontData(nullptr, buf.data(), buf.size(), nullptr));
+    EXPECT_FALSE(FPDFFont_GetFontData(font, buf.data(), buf.size(), nullptr));
+
+    buf_bytes_required = 345;
+    EXPECT_FALSE(FPDFFont_GetFontData(nullptr, buf.data(), buf.size(),
+                                      &buf_bytes_required));
+    EXPECT_EQ(345u, buf_bytes_required);
+  }
+
+  UnloadPage(page);
+}
+
+TEST_F(FPDFEditEmbedderTest, NoEmbeddedFontData) {
+  ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+  ASSERT_EQ(2, FPDFPage_CountObjects(page));
+
+  // Since hello_world.pdf does not embed any font data, FPDFFont_GetFontData()
+  // will return the substitution font data. Since pdfium_embeddertest is
+  // hermetic, this first object consistently maps to Tinos-Regular.ttf.
+  constexpr size_t kTinosRegularSize = 469968;
+  FPDF_PAGEOBJECT text = FPDFPage_GetObject(page, 0);
+  ASSERT_TRUE(text);
+  FPDF_FONT font = FPDFTextObj_GetFont(text);
+  ASSERT_TRUE(font);
+  std::vector<uint8_t> buf;
+  buf.resize(kTinosRegularSize);
+  size_t buf_bytes_required;
+  ASSERT_TRUE(
+      FPDFFont_GetFontData(font, buf.data(), buf.size(), &buf_bytes_required));
+  EXPECT_EQ(kTinosRegularSize, buf_bytes_required);
+  EXPECT_EQ("2b019558f2c2de0b7cbc0a6e64b20599", GenerateMD5Base16(buf));
+
+  // Similarly, the second object consistently maps to Arimo-Regular.ttf.
+  constexpr size_t kArimoRegularSize = 436180;
+  text = FPDFPage_GetObject(page, 1);
+  ASSERT_TRUE(text);
+  font = FPDFTextObj_GetFont(text);
+  ASSERT_TRUE(font);
+  buf.resize(kArimoRegularSize);
+  ASSERT_TRUE(
+      FPDFFont_GetFontData(font, buf.data(), buf.size(), &buf_bytes_required));
+  EXPECT_EQ(kArimoRegularSize, buf_bytes_required);
+  EXPECT_EQ("7ac02a544211773d9636e056e9da6c35", GenerateMD5Base16(buf));
+
   UnloadPage(page);
 }
 
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp
index 37fac90..edb9ca2 100644
--- a/fpdfsdk/fpdf_edittext.cpp
+++ b/fpdfsdk/fpdf_edittext.cpp
@@ -25,6 +25,7 @@
 #include "core/fpdftext/cpdf_textpage.h"
 #include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_string_wrappers.h"
+#include "core/fxcrt/span_util.h"
 #include "core/fxcrt/stl_util.h"
 #include "core/fxge/cfx_fontmgr.h"
 #include "core/fxge/fx_font.h"
@@ -652,6 +653,21 @@
   return dwStringLen;
 }
 
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetFontData(FPDF_FONT font,
+                                                         uint8_t* buffer,
+                                                         size_t buflen,
+                                                         size_t* out_buflen) {
+  auto* cfont = CPDFFontFromFPDFFont(font);
+  if (!cfont || !out_buflen)
+    return false;
+
+  pdfium::span<uint8_t> data = cfont->GetFont()->GetFontSpan();
+  if (buffer && buflen >= data.size())
+    fxcrt::spancpy(pdfium::make_span(buffer, buflen), data);
+  *out_buflen = data.size();
+  return true;
+}
+
 FPDF_EXPORT int FPDF_CALLCONV FPDFFont_GetFlags(FPDF_FONT font) {
   auto* pFont = CPDFFontFromFPDFFont(font);
   if (!pFont)
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 9f0e2da..3eb4a39 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -159,6 +159,7 @@
     CHK(FPDFFont_GetAscent);
     CHK(FPDFFont_GetDescent);
     CHK(FPDFFont_GetFlags);
+    CHK(FPDFFont_GetFontData);
     CHK(FPDFFont_GetFontName);
     CHK(FPDFFont_GetGlyphPath);
     CHK(FPDFFont_GetGlyphWidth);
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h
index f41a211..c606e40 100644
--- a/public/fpdf_edit.h
+++ b/public/fpdf_edit.h
@@ -1243,6 +1243,30 @@
 FPDFFont_GetFontName(FPDF_FONT font, char* buffer, unsigned long length);
 
 // Experimental API.
+// Get the decoded data from the |font| object.
+//
+// font       - The handle to the font object. (Required)
+// buffer     - The address of a buffer that receives the font data.
+// buflen     - Length of the buffer.
+// out_buflen - Pointer to variable that will receive the minimum buffer size
+//              to contain the font data. Not filled if the return value is
+//              FALSE. (Required)
+//
+// Returns TRUE on success. In which case, |out_buflen| will be filled, and
+// |buffer| will be filled if it is large enough. Returns FALSE if any of the
+// required parameters are null.
+//
+// The decoded data is the uncompressed font data. i.e. the raw font data after
+// having all stream filters applied, when the data is embedded.
+//
+// If the font is not embedded, then this API will instead return the data for
+// the substitution font it is using.
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetFontData(FPDF_FONT font,
+                                                         uint8_t* buffer,
+                                                         size_t buflen,
+                                                         size_t* out_buflen);
+
+// Experimental API.
 // Get the descriptor flags of a font.
 //
 // font - the handle to the font object.