Treat all Unicode character maps like MSUnicode

PDF 32000:2008 9.6.6.4 Encodings for TrueType Fonts outlines how to
handle TrueType with /Encoding. In this description, only the handling
of TrueType cmap format (3,1) is mandated. When nothing applies, "a
conforming reader may supply a mapping of its choosing." PDFium
currently chooses the encoding in the /ToUnicode.

Change this behavior to consider all Unicode character maps, which
includes TrueType cmap formats (3,1), (3,10), and (0,*). The /ToUnicode
encoding is still present as a fallback if no Unicode (or MacRoman)
character maps are present.

Bug: 378932943
Change-Id: Id22e3133d926efe52ec843257e3c1128f6a26ff1
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/133091
Reviewed-by: Ben Wagner <bungeman@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Ben Wagner <bungeman@google.com>
diff --git a/core/fpdfapi/font/cpdf_cidfont.cpp b/core/fpdfapi/font/cpdf_cidfont.cpp
index a1dc253..3d6a34e 100644
--- a/core/fpdfapi/font/cpdf_cidfont.cpp
+++ b/core/fpdfapi/font/cpdf_cidfont.cpp
@@ -719,7 +719,7 @@
 
       charcode += 31;
       RetainPtr<CFX_Face> face = font_.GetFace();
-      bool bMSUnicode = UseTTCharmapMSUnicode(face);
+      bool bMSUnicode = UseTTCharmapUnicode(face);
       bool bMacRoman = !bMSUnicode && UseTTCharmapMacRoman(face);
       FontEncoding base_encoding = FontEncoding::kStandard;
       if (bMSUnicode) {
diff --git a/core/fpdfapi/font/cpdf_font.cpp b/core/fpdfapi/font/cpdf_font.cpp
index 97535df..d8baa11 100644
--- a/core/fpdfapi/font/cpdf_font.cpp
+++ b/core/fpdfapi/font/cpdf_font.cpp
@@ -428,6 +428,16 @@
   return font_fallbacks_[position].get();
 }
 
+bool CPDF_Font::UseTTCharmapUnicode(const RetainPtr<CFX_Face>& face) {
+  for (size_t i = 0; i < face->GetCharMapCount(); i++) {
+    if (face->GetCharMapEncodingByIndex(i) == fxge::FontEncoding::kUnicode) {
+      face->SetCharMapByIndex(i);
+      return true;
+    }
+  }
+  return false;
+}
+
 // static
 bool CPDF_Font::UseTTCharmap(const RetainPtr<CFX_Face>& face,
                              int platform_id,
diff --git a/core/fpdfapi/font/cpdf_font.h b/core/fpdfapi/font/cpdf_font.h
index 8ffde74..3507817 100644
--- a/core/fpdfapi/font/cpdf_font.h
+++ b/core/fpdfapi/font/cpdf_font.h
@@ -140,10 +140,10 @@
   CPDF_Font(CPDF_Document* document, RetainPtr<CPDF_Dictionary> font_dict);
   ~CPDF_Font() override;
 
+  // Tries to select any Unicode character map.
+  static bool UseTTCharmapUnicode(const RetainPtr<CFX_Face>& face);
+
   // Commonly used wrappers for UseTTCharmap().
-  static bool UseTTCharmapMSUnicode(const RetainPtr<CFX_Face>& face) {
-    return UseTTCharmap(face, 3, 1);
-  }
   static bool UseTTCharmapMSSymbol(const RetainPtr<CFX_Face>& face) {
     return UseTTCharmap(face, 3, 0);
   }
diff --git a/core/fpdfapi/font/cpdf_truetypefont.cpp b/core/fpdfapi/font/cpdf_truetypefont.cpp
index 8fcdc41..edf921d 100644
--- a/core/fpdfapi/font/cpdf_truetypefont.cpp
+++ b/core/fpdfapi/font/cpdf_truetypefont.cpp
@@ -186,7 +186,7 @@
 }
 
 CPDF_TrueTypeFont::CharmapType CPDF_TrueTypeFont::DetermineCharmapType() const {
-  if (UseTTCharmapMSUnicode(font_.GetFace())) {
+  if (UseTTCharmapUnicode(font_.GetFace())) {
     return CharmapType::kMSUnicode;
   }