Generate Type 42 fonts in CFX_PSRenderer.

Add basic conversion code to generate Type 42 fonts from TrueType fonts.
The font data is encoded as ASCIIHex strings with the appropriate font
dictionary entries. Since a Type 42 font can only have 256 glyphs,
TrueType fonts are split into multiple Type 42 fonts, and then combined
together into a Type 0 composite font.

This does not yet handle the case where the TrueType font data is larger
than 64K in size. The ASCIIHex strings have to be split along font table
boundaries in that case. This also does not generate CID fonts with more
advanced encoding.

Bug: chromium:1232526
Change-Id: If176bbc1338b7a7290c1c18f8b5b6e46dadda223
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/85330
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fxcrt/fx_extension.h b/core/fxcrt/fx_extension.h
index 40015c8..3cb5226 100644
--- a/core/fxcrt/fx_extension.h
+++ b/core/fxcrt/fx_extension.h
@@ -20,9 +20,7 @@
 
 #define FX_INVALID_OFFSET static_cast<uint32_t>(-1)
 
-#ifdef PDF_ENABLE_XFA
 #define FX_IsOdd(a) ((a)&1)
-#endif  // PDF_ENABLE_XFA
 
 float FXSYS_wcstof(const wchar_t* pwsStr, int32_t iLength, int32_t* pUsedLen);
 wchar_t* FXSYS_wcsncpy(wchar_t* dstStr, const wchar_t* srcStr, size_t count);
diff --git a/core/fxge/BUILD.gn b/core/fxge/BUILD.gn
index 6483b95..8ccaf68 100644
--- a/core/fxge/BUILD.gn
+++ b/core/fxge/BUILD.gn
@@ -225,6 +225,10 @@
     "../fpdfapi/parser",
   ]
   pdfium_root_dir = "../../"
+
+  if (is_win) {
+    sources += [ "win32/cfx_psrenderer_unittest.cpp" ]
+  }
 }
 
 pdfium_embeddertest_source_set("embeddertests") {
diff --git a/core/fxge/cfx_font.cpp b/core/fxge/cfx_font.cpp
index 8eafb54..147e26e 100644
--- a/core/fxge/cfx_font.cpp
+++ b/core/fxge/cfx_font.cpp
@@ -570,20 +570,28 @@
   return ByteString();
 }
 
-Optional<FX_RECT> CFX_Font::GetBBox() {
+Optional<FX_RECT> CFX_Font::GetRawBBox() const {
   if (!m_Face)
     return pdfium::nullopt;
 
-  FX_RECT result(FXFT_Get_Face_xMin(m_Face->GetRec()),
+  return FX_RECT(FXFT_Get_Face_xMin(m_Face->GetRec()),
                  FXFT_Get_Face_yMin(m_Face->GetRec()),
                  FXFT_Get_Face_xMax(m_Face->GetRec()),
                  FXFT_Get_Face_yMax(m_Face->GetRec()));
+}
+
+Optional<FX_RECT> CFX_Font::GetBBox() const {
+  Optional<FX_RECT> result = GetRawBBox();
+  if (!result.has_value())
+    return result;
+
   int em = FXFT_Get_Face_UnitsPerEM(m_Face->GetRec());
   if (em != 0) {
-    result.left = (result.left * 1000) / em;
-    result.top = (result.top * 1000) / em;
-    result.right = (result.right * 1000) / em;
-    result.bottom = (result.bottom * 1000) / em;
+    FX_RECT& bbox = result.value();
+    bbox.left = (bbox.left * 1000) / em;
+    bbox.top = (bbox.top * 1000) / em;
+    bbox.right = (bbox.right * 1000) / em;
+    bbox.bottom = (bbox.bottom * 1000) / em;
   }
   return result;
 }
diff --git a/core/fxge/cfx_font.h b/core/fxge/cfx_font.h
index 3627a43..2b21c93 100644
--- a/core/fxge/cfx_font.h
+++ b/core/fxge/cfx_font.h
@@ -117,7 +117,13 @@
   ByteString GetFaceName() const;
   ByteString GetBaseFontName() const;
   bool IsTTFont() const;
-  Optional<FX_RECT> GetBBox();
+
+  // Raw bounding box.
+  Optional<FX_RECT> GetRawBBox() const;
+
+  // Bounding box adjusted for font units.
+  Optional<FX_RECT> GetBBox() const;
+
   bool IsEmbedded() const { return m_bEmbedded; }
   uint8_t* GetSubData() const { return m_pGsubData.get(); }
   void SetSubData(uint8_t* data) { m_pGsubData.reset(data); }
diff --git a/core/fxge/win32/cfx_psrenderer.cpp b/core/fxge/win32/cfx_psrenderer.cpp
index 58c6556..12289b3 100644
--- a/core/fxge/win32/cfx_psrenderer.cpp
+++ b/core/fxge/win32/cfx_psrenderer.cpp
@@ -17,6 +17,7 @@
 #include <utility>
 
 #include "core/fxcrt/bytestring.h"
+#include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_memory.h"
 #include "core/fxcrt/fx_memory_wrappers.h"
 #include "core/fxcrt/fx_stream.h"
@@ -33,7 +34,7 @@
 #include "core/fxge/fx_freetype.h"
 #include "core/fxge/text_char_pos.h"
 #include "core/fxge/win32/cfx_psfonttracker.h"
-#include "third_party/base/check.h"
+#include "third_party/base/check_op.h"
 
 namespace {
 
@@ -43,6 +44,146 @@
                     FT_FSTYPE_BITMAP_EMBEDDING_ONLY)) == 0;
 }
 
+Optional<ByteString> GenerateType42SfntData(
+    const ByteString& psname,
+    pdfium::span<const uint8_t> font_data) {
+  if (font_data.empty())
+    return pdfium::nullopt;
+
+  // Per Type 42 font spec.
+  constexpr size_t kMaxSfntStringSize = 65535;
+  if (font_data.size() > kMaxSfntStringSize) {
+    // TODO(thestig): Fonts that are too big need to be written out in sections.
+    return pdfium::nullopt;
+  }
+
+  // Each byte is written as 2 ASCIIHex characters, so really 64 chars per line.
+  constexpr size_t kMaxBytesPerLine = 32;
+  std::ostringstream output;
+  output << "/" << psname << "_sfnts [\n<\n";
+  size_t bytes_per_line = 0;
+  char buf[2];
+  for (uint8_t datum : font_data) {
+    FXSYS_IntToTwoHexChars(datum, buf);
+    output << buf[0];
+    output << buf[1];
+    bytes_per_line++;
+    if (bytes_per_line == kMaxBytesPerLine) {
+      output << "\n";
+      bytes_per_line = 0;
+    }
+  }
+
+  // Pad with ASCIIHex NUL character per Type 42 font spec if needed.
+  if (!FX_IsOdd(font_data.size()))
+    output << "00";
+
+  output << "\n>\n] def\n";
+  return ByteString(output);
+}
+
+// The value to use with GenerateType42FontDictionary() below, and the max
+// number of entries supported for non-CID fonts.
+// Also used to avoid buggy fonts by writing out at least this many entries,
+// per note in Poppler's Type 42 generation code.
+constexpr size_t kGlyphsPerDescendantFont = 256;
+
+ByteString GenerateType42FontDictionary(const ByteString& psname,
+                                        const FX_RECT& bbox,
+                                        size_t num_glyphs,
+                                        size_t glyphs_per_descendant_font) {
+  DCHECK_LE(glyphs_per_descendant_font, kGlyphsPerDescendantFont);
+  CHECK_GT(glyphs_per_descendant_font, 0u);
+
+  const size_t descendant_font_count =
+      (num_glyphs + glyphs_per_descendant_font - 1) /
+      glyphs_per_descendant_font;
+
+  std::ostringstream output;
+  for (size_t i = 0; i < descendant_font_count; ++i) {
+    output << "8 dict begin\n";
+    output << "/FontType 42 def\n";
+    output << "/FontMatrix [1 0 0 1 0 0] def\n";
+    output << "/FontName /" << psname << "_" << i << " def\n";
+
+    output << "/Encoding " << glyphs_per_descendant_font << " array\n";
+    for (size_t j = 0, pos = i * glyphs_per_descendant_font;
+         j < glyphs_per_descendant_font; ++j, ++pos) {
+      if (pos >= num_glyphs)
+        break;
+
+      output << ByteString::Format("dup %d /c%02x put\n", j, j);
+    }
+    output << "readonly def\n";
+
+    // Note: `bbox` is LTRB, while /FontBBox is LBRT. Writing it out as LTRB
+    // gets the correct values.
+    output << "/FontBBox [" << bbox.left << " " << bbox.top << " " << bbox.right
+           << " " << bbox.bottom << "] def\n";
+
+    output << "/PaintType 0 def\n";
+
+    output << "/CharStrings " << glyphs_per_descendant_font + 1
+           << " dict dup begin\n";
+    output << "/.notdef 0 def\n";
+    for (size_t j = 0, pos = i * glyphs_per_descendant_font;
+         j < glyphs_per_descendant_font; ++j, ++pos) {
+      if (pos >= num_glyphs)
+        break;
+
+      output << ByteString::Format("/c%02x %d def\n", j, pos);
+    }
+    output << "end readonly def\n";
+
+    output << "/sfnts " << psname << "_sfnts def\n";
+    output << "FontName currentdict end definefont pop\n";
+  }
+
+  output << "6 dict begin\n";
+  output << "/FontName /" << psname << " def\n";
+  output << "/FontType 0 def\n";
+  output << "/FontMatrix [1 0 0 1 0 0] def\n";
+  output << "/FMapType 2 def\n";
+
+  output << "/Encoding [\n";
+  for (size_t i = 0; i < descendant_font_count; ++i)
+    output << i << "\n";
+  output << "] def\n";
+
+  output << "/FDepVector [\n";
+  for (size_t i = 0; i < descendant_font_count; ++i)
+    output << "/" << psname << "_" << i << " findfont\n";
+  output << "] def\n";
+
+  output << "FontName currentdict end definefont pop\n";
+  output << "%%EndResource\n";
+
+  return ByteString(output);
+}
+
+ByteString GenerateType42FontData(const CFX_Font* font) {
+  const FXFT_FaceRec* font_face_rec = font->GetFaceRec();
+  if (!font_face_rec)
+    return ByteString();
+
+  const ByteString psname = font->GetPsName();
+  DCHECK(!psname.IsEmpty());
+
+  Optional<ByteString> sfnt_data =
+      GenerateType42SfntData(psname, font->GetFontSpan());
+  if (!sfnt_data.has_value())
+    return ByteString();
+
+  ByteString output = "%%BeginResource: font ";
+  output += psname;
+  output += "\n";
+  output += sfnt_data.value();
+  output += GenerateType42FontDictionary(psname, font->GetRawBBox().value(),
+                                         font_face_rec->num_glyphs,
+                                         kGlyphsPerDescendantFont);
+  return output;
+}
+
 }  // namespace
 
 struct CFX_PSRenderer::Glyph {
@@ -615,17 +756,23 @@
 
   bool is_existing_font = m_pFontTracker->SeenFontObject(font);
   if (!is_existing_font) {
-    // TODO(thestig): Generate font here.
-    bool generated_font = false;
-    if (!generated_font)
+    ByteString font_data = GenerateType42FontData(font);
+    if (font_data.IsEmpty())
       return false;
 
     m_pFontTracker->AddFontObject(font);
-    // TODO(thestig): Write out font here.
+    WritePreambleString(font_data.AsStringView());
   }
 
-  // TODO(thestig): Write out text here.
-  return false;
+  buf << "/" << font->GetPsName() << " " << font_size << " selectfont\n";
+  for (int i = 0; i < char_count; ++i) {
+    buf << char_pos[i].m_Origin.x << " " << char_pos[i].m_Origin.y << " m";
+    uint8_t hi = char_pos[i].m_GlyphIndex / 256;
+    uint8_t lo = char_pos[i].m_GlyphIndex % 256;
+    ByteString hex = ByteString::Format("<%02X%02X>", hi, lo);
+    buf << hex.AsStringView() << "Tj\n";
+  }
+  return true;
 }
 
 bool CFX_PSRenderer::DrawText(int nChars,
@@ -744,3 +891,20 @@
 void CFX_PSRenderer::WriteString(ByteStringView str) {
   m_Output << str;
 }
+
+// static
+Optional<ByteString> CFX_PSRenderer::GenerateType42SfntDataForTesting(
+    const ByteString& psname,
+    pdfium::span<const uint8_t> font_data) {
+  return GenerateType42SfntData(psname, font_data);
+}
+
+// static
+ByteString CFX_PSRenderer::GenerateType42FontDictionaryForTesting(
+    const ByteString& psname,
+    const FX_RECT& bbox,
+    size_t num_glyphs,
+    size_t glyphs_per_descendant_font) {
+  return GenerateType42FontDictionary(psname, bbox, num_glyphs,
+                                      glyphs_per_descendant_font);
+}
diff --git a/core/fxge/win32/cfx_psrenderer.h b/core/fxge/win32/cfx_psrenderer.h
index 2836398..67efeca 100644
--- a/core/fxge/win32/cfx_psrenderer.h
+++ b/core/fxge/win32/cfx_psrenderer.h
@@ -111,6 +111,16 @@
                 float font_size,
                 uint32_t color);
 
+  static Optional<ByteString> GenerateType42SfntDataForTesting(
+      const ByteString& psname,
+      pdfium::span<const uint8_t> font_data);
+
+  static ByteString GenerateType42FontDictionaryForTesting(
+      const ByteString& psname,
+      const FX_RECT& bbox,
+      size_t num_glyphs,
+      size_t glyphs_per_descendant_font);
+
  private:
   struct Glyph;
 
diff --git a/core/fxge/win32/cfx_psrenderer_unittest.cpp b/core/fxge/win32/cfx_psrenderer_unittest.cpp
new file mode 100644
index 0000000..3b53aeb
--- /dev/null
+++ b/core/fxge/win32/cfx_psrenderer_unittest.cpp
@@ -0,0 +1,140 @@
+// Copyright 2021 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fxge/win32/cfx_psrenderer.h"
+#include "core/fxcrt/bytestring.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/base/optional.h"
+#include "third_party/base/span.h"
+
+TEST(PSRendererTest, GenerateType42SfntData) {
+  Optional<ByteString> result;
+
+  result = CFX_PSRenderer::GenerateType42SfntDataForTesting("empty", {});
+  EXPECT_FALSE(result.has_value());
+
+  constexpr uint8_t kOddByteCountTestData[] = {0, 32, 55};
+  static constexpr char kExpectedOddByteCountResult[] = R"(/odd_sfnts [
+<
+002037
+>
+] def
+)";
+  result = CFX_PSRenderer::GenerateType42SfntDataForTesting(
+      "odd", kOddByteCountTestData);
+  ASSERT_TRUE(result.has_value());
+  EXPECT_STREQ(kExpectedOddByteCountResult, result.value().c_str());
+
+  // Requires padding.
+  constexpr uint8_t kEvenByteCountTestData[] = {0, 32, 66, 77};
+  static constexpr char kExpectedEvenByteCountResult[] = R"(/even_sfnts [
+<
+0020424D00
+>
+] def
+)";
+  result = CFX_PSRenderer::GenerateType42SfntDataForTesting(
+      "even", kEvenByteCountTestData);
+  ASSERT_TRUE(result.has_value());
+  EXPECT_STREQ(kExpectedEvenByteCountResult, result.value().c_str());
+}
+
+TEST(PSRendererTest, GenerateType42FontDictionary) {
+  ByteString result;
+
+  static constexpr char kExpected1DescendantFontResult[] = R"(8 dict begin
+/FontType 42 def
+/FontMatrix [1 0 0 1 0 0] def
+/FontName /1descendant_0 def
+/Encoding 3 array
+dup 0 /c00 put
+dup 1 /c01 put
+dup 2 /c02 put
+readonly def
+/FontBBox [1 2 3 4] def
+/PaintType 0 def
+/CharStrings 4 dict dup begin
+/.notdef 0 def
+/c00 0 def
+/c01 1 def
+/c02 2 def
+end readonly def
+/sfnts 1descendant_sfnts def
+FontName currentdict end definefont pop
+6 dict begin
+/FontName /1descendant def
+/FontType 0 def
+/FontMatrix [1 0 0 1 0 0] def
+/FMapType 2 def
+/Encoding [
+0
+] def
+/FDepVector [
+/1descendant_0 findfont
+] def
+FontName currentdict end definefont pop
+%%EndResource
+)";
+  result = CFX_PSRenderer::GenerateType42FontDictionaryForTesting(
+      "1descendant", FX_RECT(1, 2, 3, 4), /*num_glyphs=*/3,
+      /*glyphs_per_descendant_font=*/3);
+  EXPECT_STREQ(kExpected1DescendantFontResult, result.c_str());
+
+  static constexpr char kExpected2DescendantFontResult[] = R"(8 dict begin
+/FontType 42 def
+/FontMatrix [1 0 0 1 0 0] def
+/FontName /2descendant_0 def
+/Encoding 3 array
+dup 0 /c00 put
+dup 1 /c01 put
+dup 2 /c02 put
+readonly def
+/FontBBox [12 -5 34 199] def
+/PaintType 0 def
+/CharStrings 4 dict dup begin
+/.notdef 0 def
+/c00 0 def
+/c01 1 def
+/c02 2 def
+end readonly def
+/sfnts 2descendant_sfnts def
+FontName currentdict end definefont pop
+8 dict begin
+/FontType 42 def
+/FontMatrix [1 0 0 1 0 0] def
+/FontName /2descendant_1 def
+/Encoding 3 array
+dup 0 /c00 put
+dup 1 /c01 put
+readonly def
+/FontBBox [12 -5 34 199] def
+/PaintType 0 def
+/CharStrings 4 dict dup begin
+/.notdef 0 def
+/c00 3 def
+/c01 4 def
+end readonly def
+/sfnts 2descendant_sfnts def
+FontName currentdict end definefont pop
+6 dict begin
+/FontName /2descendant def
+/FontType 0 def
+/FontMatrix [1 0 0 1 0 0] def
+/FMapType 2 def
+/Encoding [
+0
+1
+] def
+/FDepVector [
+/2descendant_0 findfont
+/2descendant_1 findfont
+] def
+FontName currentdict end definefont pop
+%%EndResource
+)";
+  result = CFX_PSRenderer::GenerateType42FontDictionaryForTesting(
+      "2descendant", FX_RECT(12, -5, 34, 199), /*num_glyphs=*/5,
+      /*glyphs_per_descendant_font=*/3);
+  EXPECT_STREQ(kExpected2DescendantFontResult, result.c_str());
+}