Add FPDFText_GetFontInfo

Bug: pdfium:929
Change-Id: I9da03a1e317cff69ec4c76b69289cfa753b6bb77
Reviewed-on: https://pdfium-review.googlesource.com/40531
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
Commit-Queue: Nicolás Peña Moreno <npm@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_font.h b/core/fpdfapi/font/cpdf_font.h
index 53e5b7b..3a91926 100644
--- a/core/fpdfapi/font/cpdf_font.h
+++ b/core/fpdfapi/font/cpdf_font.h
@@ -80,6 +80,7 @@
   uint32_t GetStringWidth(const ByteStringView& pString);
   uint32_t FallbackFontFromCharcode(uint32_t charcode);
   int FallbackGlyphFromCharcode(int fallbackFont, uint32_t charcode);
+  int GetFontFlags() const { return m_Flags; }
 
   virtual uint32_t GetCharWidthF(uint32_t charcode) = 0;
   virtual FX_RECT GetCharBBox(uint32_t charcode) = 0;
diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp
index d8d3b26..a39402e 100644
--- a/fpdfsdk/fpdf_text.cpp
+++ b/fpdfsdk/fpdf_text.cpp
@@ -10,7 +10,9 @@
 #include <memory>
 #include <vector>
 
+#include "core/fpdfapi/font/cpdf_font.h"
 #include "core/fpdfapi/page/cpdf_page.h"
+#include "core/fpdfapi/page/cpdf_textobject.h"
 #include "core/fpdfdoc/cpdf_viewerpreferences.h"
 #include "core/fpdftext/cpdf_linkextract.h"
 #include "core/fpdftext/cpdf_textpage.h"
@@ -88,6 +90,37 @@
   return charinfo.m_FontSize;
 }
 
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFText_GetFontInfo(FPDF_TEXTPAGE text_page,
+                     int index,
+                     void* buffer,
+                     unsigned long buflen,
+                     int* flags) {
+  if (!text_page)
+    return 0;
+  CPDF_TextPage* pTextObj = CPDFTextPageFromFPDFTextPage(text_page);
+
+  if (index < 0 || index >= pTextObj->CountChars())
+    return 0;
+
+  FPDF_CHAR_INFO charinfo;
+  pTextObj->GetCharInfo(index, &charinfo);
+  if (!charinfo.m_pTextObj)
+    return 0;
+
+  CPDF_Font* font = charinfo.m_pTextObj->GetFont();
+  if (!font)
+    return 0;
+
+  if (flags)
+    *flags = font->GetFontFlags();
+  ByteString basefont = font->GetBaseFont();
+  unsigned long length = basefont.GetLength() + 1;
+  if (buffer && buflen >= length)
+    memcpy(buffer, basefont.c_str(), length);
+  return length;
+}
+
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
                                                         int index,
                                                         double* left,
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index bf064d6..83b43d9 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "core/fxcrt/fx_memory.h"
+#include "core/fxge/fx_font.h"
 #include "public/cpp/fpdf_scopers.h"
 #include "public/fpdf_text.h"
 #include "public/fpdf_transformpage.h"
@@ -512,6 +513,88 @@
   UnloadPage(page);
 }
 
+TEST_F(FPDFTextEmbeddertest, GetFontInfo) {
+  ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+  ASSERT_TRUE(textpage);
+  std::vector<char> font_name;
+  size_t num_chars1 = strlen("Hello, world!");
+  const char kExpectedFontName1[] = "Times-Roman";
+
+  for (size_t i = 0; i < num_chars1; i++) {
+    int flags = -1;
+    unsigned long length =
+        FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
+    static constexpr unsigned long expected_length = sizeof(kExpectedFontName1);
+    ASSERT_EQ(expected_length, length);
+    EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
+    font_name.resize(length);
+    std::fill(font_name.begin(), font_name.end(), 'a');
+    flags = -1;
+    EXPECT_EQ(expected_length,
+              FPDFText_GetFontInfo(textpage, i, font_name.data(),
+                                   font_name.size(), &flags));
+    EXPECT_STREQ(kExpectedFontName1, font_name.data());
+    EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
+  }
+  // If the size of the buffer is not large enough, the buffer should remain
+  // unchanged.
+  font_name.pop_back();
+  std::fill(font_name.begin(), font_name.end(), 'a');
+  EXPECT_EQ(sizeof(kExpectedFontName1),
+            FPDFText_GetFontInfo(textpage, 0, font_name.data(),
+                                 font_name.size(), nullptr));
+  for (char a : font_name)
+    EXPECT_EQ('a', a);
+
+  // The text is "Hello, world!\r\nGoodbye, world!", so the next two characters
+  // do not have any font information.
+  EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1, font_name.data(),
+                                     font_name.size(), nullptr));
+  EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1 + 1, font_name.data(),
+                                     font_name.size(), nullptr));
+
+  size_t num_chars2 = strlen("Goodbye, world!");
+  const char kExpectedFontName2[] = "Helvetica";
+  for (size_t i = num_chars1 + 2; i < num_chars1 + num_chars2 + 2; i++) {
+    int flags = -1;
+    unsigned long length =
+        FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
+    static constexpr unsigned long expected_length = sizeof(kExpectedFontName2);
+    ASSERT_EQ(expected_length, length);
+    EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
+    font_name.resize(length);
+    std::fill(font_name.begin(), font_name.end(), 'a');
+    flags = -1;
+    EXPECT_EQ(expected_length,
+              FPDFText_GetFontInfo(textpage, i, font_name.data(),
+                                   font_name.size(), &flags));
+    EXPECT_STREQ(kExpectedFontName2, font_name.data());
+    EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
+  }
+
+  // Now try some out of bounds indices and null pointers to make sure we do not
+  // crash.
+  // No textpage.
+  EXPECT_EQ(0u, FPDFText_GetFontInfo(nullptr, 0, font_name.data(),
+                                     font_name.size(), nullptr));
+  // No buffer.
+  EXPECT_EQ(sizeof(kExpectedFontName1),
+            FPDFText_GetFontInfo(textpage, 0, nullptr, 0, nullptr));
+  // Negative index.
+  EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, -1, font_name.data(),
+                                     font_name.size(), nullptr));
+  // Out of bounds index.
+  EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, 1000, font_name.data(),
+                                     font_name.size(), nullptr));
+
+  FPDFText_ClosePage(textpage);
+  UnloadPage(page);
+}
+
 TEST_F(FPDFTextEmbeddertest, ToUnicode) {
   ASSERT_TRUE(OpenDocument("bug_583.pdf"));
   FPDF_PAGE page = LoadPage(0);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index d92ae78..799956b 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -324,6 +324,7 @@
     CHK(FPDFText_GetCharBox);
     CHK(FPDFText_GetCharIndexAtPos);
     CHK(FPDFText_GetCharOrigin);
+    CHK(FPDFText_GetFontInfo);
     CHK(FPDFText_GetFontSize);
     CHK(FPDFText_GetRect);
     CHK(FPDFText_GetSchCount);
diff --git a/public/fpdf_text.h b/public/fpdf_text.h
index 3502337..feb54fb 100644
--- a/public/fpdf_text.h
+++ b/public/fpdf_text.h
@@ -87,6 +87,31 @@
 FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
                                                       int index);
 
+// Experimental API.
+// Function: FPDFText_GetFontInfo
+//          Get the font name and flags of a particular character.
+// Parameters:
+//          text_page - Handle to a text page information structure.
+//          Returned by FPDFText_LoadPage function.
+//          index     - Zero-based index of the character.
+//          buffer    - A buffer receiving the font name.
+//          buflen    - The length of |buffer| in bytes.
+//          flags     - Optional pointer to an int receiving the font flags.
+//          These flags should be interpreted per PDF spec 1.7 Section 5.7.1
+//          Font Descriptor Flags.
+// Return value:
+//          On success, return the length of the font name, including the
+//          trailing NUL character, in bytes. If this length is less than or
+//          equal to |length|, |buffer| is set to the font name, |flags| is
+//          set to the font flags. |buffer| is in UTF-8 encoding. Return 0 on
+//          failure.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFText_GetFontInfo(FPDF_TEXTPAGE text_page,
+                     int index,
+                     void* buffer,
+                     unsigned long buflen,
+                     int* flags);
+
 // Function: FPDFText_GetCharBox
 //          Get bounding box of a particular character.
 // Parameters: