Update rendering heuristic when a font is substituted

This CL refactors the heuristic condition into its own method
(introduced in: https://pdfium-review.googlesource.com/c/pdfium/+/6630),

Adds two more cases where we should skip this heuristic:
- When the actual font is loaded,
- When the font is substituted by one of the standard fonts

Bug: pdfium:1922
Change-Id: I5966bc7e49e02bac72ee364016f5c2cfb84eb30d
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/102070
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Nigi <nigi@chromium.org>
Reviewed-by: Nigi <nigi@chromium.org>
diff --git a/DEPS b/DEPS
index b4fb50c..71bb091 100644
--- a/DEPS
+++ b/DEPS
@@ -140,7 +140,7 @@
   # Three lines of non-changing comments so that
   # the commit queue can handle CLs rolling pdfium_tests
   # and whatever else without interference from each other.
-  'pdfium_tests_revision': '64f914cf7df677af6d8b43aebc6c8b8a5723b635',
+  'pdfium_tests_revision': '8270942034a1b3f626c8b1fcb4d317fad75c46a1',
   # Three lines of non-changing comments so that
   # the commit queue can handle CLs rolling resultdb
   # and whatever else without interference from each other.
diff --git a/core/fpdfapi/render/charposlist.cpp b/core/fpdfapi/render/charposlist.cpp
index 80b6b14..aba0ada 100644
--- a/core/fpdfapi/render/charposlist.cpp
+++ b/core/fpdfapi/render/charposlist.cpp
@@ -10,6 +10,7 @@
 #include "core/fpdfapi/font/cpdf_cidfont.h"
 #include "core/fpdfapi/font/cpdf_font.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fxge/cfx_fontmapper.h"
 #include "core/fxge/cfx_substfont.h"
 #include "core/fxge/text_char_pos.h"
 
@@ -37,6 +38,54 @@
   return glyph_id != 0 || has_to_unicode;
 }
 
+// The following is not a perfect solution and can be further improved.
+// For example, if `subst_font` is "Book" and the `base_font_name` is "Bookman",
+// this function will return "true" even though the actual font "Bookman"
+// is not loaded.
+// An exact string match is not possible here, because `subst_font_name`
+// will be the same value for different postscript names.
+// For example: "Times New Roman" as `subst_font_name` for all of these
+// `base_font_name` values: "TimesNewRoman,Bold", "TimesNewRomanPS-Bold",
+// "TimesNewRomanBold" and "TimesNewRoman-Bold".
+bool IsActualFontLoaded(const CFX_SubstFont* subst_font,
+                        const ByteString& base_font_name) {
+  // Skip if we loaded the actual font.
+  // example: TimesNewRoman,Bold -> Times New Roman
+  ByteString subst_font_name = subst_font->m_Family;
+  subst_font_name.Remove(' ');
+  subst_font_name.MakeLower();
+
+  absl::optional<size_t> find =
+      base_font_name.Find(subst_font_name.AsStringView());
+  return find.has_value() && find.value() == 0;
+}
+
+bool ApplyGlyphSpacingHeuristic(const CPDF_Font* font,
+                                const CFX_Font* current_font,
+                                bool is_vertical_writing) {
+  if (is_vertical_writing || font->IsEmbedded() || !font->HasFontWidths()) {
+    return false;
+  }
+
+  // Skip if we loaded a standard alternate font.
+  // example: Helvetica -> Arial
+  ByteString base_font_name = font->GetBaseFontName();
+  base_font_name.MakeLower();
+
+  auto standard_font_name =
+      CFX_FontMapper::GetStandardFontName(&base_font_name);
+  if (standard_font_name.has_value()) {
+    return false;
+  }
+
+  CFX_SubstFont* subst_font = current_font->GetSubstFont();
+  if (subst_font->IsBuiltInGenericFont()) {
+    return false;
+  }
+
+  return !IsActualFontLoaded(subst_font, base_font_name);
+}
+
 }  // namespace
 
 std::vector<TextCharPos> GetCharPosList(pdfium::span<const uint32_t> char_codes,
@@ -94,8 +143,7 @@
     text_char_pos.m_bGlyphAdjust = false;
 
     float scaling_factor = 1.0f;
-    if (!font->IsEmbedded() && font->HasFontWidths() && !is_vertical_writing &&
-        !current_font->GetSubstFont()->IsBuiltInGenericFont()) {
+    if (ApplyGlyphSpacingHeuristic(font, current_font, is_vertical_writing)) {
       int pdf_glyph_width = font->GetCharWidthF(char_code);
       int font_glyph_width =
           current_font->GetGlyphWidth(text_char_pos.m_GlyphIndex);
diff --git a/testing/resources/pixel/bug_1308_1_expected.pdf.0.png b/testing/resources/pixel/bug_1308_1_expected.pdf.0.png
index 688ec42..33a45c0 100644
--- a/testing/resources/pixel/bug_1308_1_expected.pdf.0.png
+++ b/testing/resources/pixel/bug_1308_1_expected.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_1308_1_expected_skia.pdf.0.png b/testing/resources/pixel/bug_1308_1_expected_skia.pdf.0.png
index d15e2a9..33a45c0 100644
--- a/testing/resources/pixel/bug_1308_1_expected_skia.pdf.0.png
+++ b/testing/resources/pixel/bug_1308_1_expected_skia.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_1922.in b/testing/resources/pixel/bug_1922.in
new file mode 100644
index 0000000..c7b005b
--- /dev/null
+++ b/testing/resources/pixel/bug_1922.in
@@ -0,0 +1,66 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [0 0 100 100]
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents 5 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+    >>
+  >>
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /TrueType
+  /BaseFont /TimesNewRoman,Bold
+  /Encoding /WinAnsiEncoding
+  /FirstChar 32
+  /LastChar 121
+  /Widths [
+    230.769 307.692 538.462 538.462 538.462 1153.846 846.154 307.692
+    307.692 307.692 538.462 538.462 230.769 307.692 230.769 307.692
+    538.462 538.462 538.462 538.462 538.462 538.462 538.462 538.462
+    538.462 538.462 307.692 307.692 538.462 538.462 538.462 538.462
+    923.077 692.308 692.308 692.308 692.308 538.462 538.462 692.308
+    692.308 307.692 538.462 692.308 615.385 846.154 692.308 769.231
+    615.385 769.231 692.308 615.385 615.385 615.385 692.308 1000.000
+    692.308 615.385 615.385 307.692 307.692 307.692 615.385 538.462
+    307.692 461.538 461.538 461.538 461.538 461.538 307.692 538.462
+    538.462 307.692 307.692 615.385 307.692 769.231 538.462 461.538
+    461.538 461.538 461.538 461.538 307.692 538.462 384.615 615.385
+    461.538 461.538
+  ]
+>>
+endobj
+{{object 5 0}} <<
+  {{streamlen}}
+>>
+stream
+2 J
+BT
+0 0 0 rg
+/F1 20 Tf
+10 40 Td
+[(U) -107.72357178 (n) -14.68963623 (i) 31.01132202
+(v) -125.67745972 (e) 14.6895752 (r) 35.90786743 (s) 57.12615967
+(i) 31.01132202 (t) -11.42520142 (y) -48.96524048]TJ
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
\ No newline at end of file
diff --git a/testing/resources/pixel/bug_1922_expected.pdf.0.png b/testing/resources/pixel/bug_1922_expected.pdf.0.png
new file mode 100644
index 0000000..73057ac
--- /dev/null
+++ b/testing/resources/pixel/bug_1922_expected.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_1922_expected_mac.pdf.0.png b/testing/resources/pixel/bug_1922_expected_mac.pdf.0.png
new file mode 100644
index 0000000..2caa41a
--- /dev/null
+++ b/testing/resources/pixel/bug_1922_expected_mac.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_1922_expected_skia.pdf.0.png b/testing/resources/pixel/bug_1922_expected_skia.pdf.0.png
new file mode 100644
index 0000000..73057ac
--- /dev/null
+++ b/testing/resources/pixel/bug_1922_expected_skia.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_846_expected.pdf.0.png b/testing/resources/pixel/bug_846_expected.pdf.0.png
index 4629962..1ff3944 100644
--- a/testing/resources/pixel/bug_846_expected.pdf.0.png
+++ b/testing/resources/pixel/bug_846_expected.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_846_expected_mac.pdf.0.png b/testing/resources/pixel/bug_846_expected_mac.pdf.0.png
index a1823b0..b405dce 100644
--- a/testing/resources/pixel/bug_846_expected_mac.pdf.0.png
+++ b/testing/resources/pixel/bug_846_expected_mac.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_846_expected_skia.pdf.0.png b/testing/resources/pixel/bug_846_expected_skia.pdf.0.png
index 47304a5..f7fa9ff 100644
--- a/testing/resources/pixel/bug_846_expected_skia.pdf.0.png
+++ b/testing/resources/pixel/bug_846_expected_skia.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_846_expected_skia_mac.pdf.0.png b/testing/resources/pixel/bug_846_expected_skia_mac.pdf.0.png
index d09a095..5635527 100644
--- a/testing/resources/pixel/bug_846_expected_skia_mac.pdf.0.png
+++ b/testing/resources/pixel/bug_846_expected_skia_mac.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_846_expected_skia_win.pdf.0.png b/testing/resources/pixel/bug_846_expected_skia_win.pdf.0.png
index d09a095..5635527 100644
--- a/testing/resources/pixel/bug_846_expected_skia_win.pdf.0.png
+++ b/testing/resources/pixel/bug_846_expected_skia_win.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_909762_expected.pdf.0.png b/testing/resources/pixel/bug_909762_expected.pdf.0.png
index 2f78df9..d01a7fc 100644
--- a/testing/resources/pixel/bug_909762_expected.pdf.0.png
+++ b/testing/resources/pixel/bug_909762_expected.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_909762_expected_skia.pdf.0.png b/testing/resources/pixel/bug_909762_expected_skia.pdf.0.png
index f6bef1b..d01a7fc 100644
--- a/testing/resources/pixel/bug_909762_expected_skia.pdf.0.png
+++ b/testing/resources/pixel/bug_909762_expected_skia.pdf.0.png
Binary files differ
diff --git a/testing/resources/pixel/bug_909762_expected_win.pdf.0.png b/testing/resources/pixel/bug_909762_expected_win.pdf.0.png
index c6d2727..d01a7fc 100644
--- a/testing/resources/pixel/bug_909762_expected_win.pdf.0.png
+++ b/testing/resources/pixel/bug_909762_expected_win.pdf.0.png
Binary files differ