Ensure all TrueType Unicode cmap formats are equal

A few parts of the PDF specification specifically call out TrueType cmap
format (3,1) as the sole TrueType Unicode cmap format. However, from a
user perspective the format of the TrueType cmap table is irrelevant,
the only thing that matters is that the character map converts Unicode
code points into glyph ids.

Add a test that ensures TrueType cmap format (0,3) is treated the same
as TrueType cmap format (3,1) in the case where a /Encoding
/MacRomanEncoding and (an inaccurate) /ToUnicode is present.

Bug: 378932943
Change-Id: Ifc82c51e3fa06c0ad1c559468b26c3bf8fc88689
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/133210
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Ben Wagner <bungeman@google.com>
Commit-Queue: Ben Wagner <bungeman@google.com>
diff --git a/core/fpdfapi/font/BUILD.gn b/core/fpdfapi/font/BUILD.gn
index eb8dbcf..14c1e9c 100644
--- a/core/fpdfapi/font/BUILD.gn
+++ b/core/fpdfapi/font/BUILD.gn
@@ -61,6 +61,7 @@
     "cpdf_cmapparser_unittest.cpp",
     "cpdf_simplefont_unittest.cpp",
     "cpdf_tounicodemap_unittest.cpp",
+    "cpdf_truetypefont_unittest.cpp",
   ]
   deps = [
     ":font",
diff --git a/core/fpdfapi/font/cpdf_truetypefont_unittest.cpp b/core/fpdfapi/font/cpdf_truetypefont_unittest.cpp
new file mode 100644
index 0000000..4d4d508
--- /dev/null
+++ b/core/fpdfapi/font/cpdf_truetypefont_unittest.cpp
@@ -0,0 +1,194 @@
+// Copyright 2025 The PDFium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/font/cpdf_truetypefont.h"
+
+#include <stdint.h>
+
+#include <array>
+#include <utility>
+
+#include "core/fpdfapi/page/test_with_page_module.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fpdfapi/parser/cpdf_test_document.h"
+#include "core/fxcrt/check_op.h"
+#include "core/fxcrt/retain_ptr.h"
+#include "core/fxcrt/span.h"
+#include "core/fxge/fontdata/chromefontdata/chromefontdata.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+using CPDFTrueTypeFontTest = TestWithPageModule;
+
+// The following ttf data are all identical except for the format of the single
+// Unicode cmap which maps U+2E to glyph 1.
+
+constexpr std::array<uint8_t, 508> kCmap03Ttf = {
+    {0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x80, 0x00, 0x03, 0x00, 0x00,
+     0x63, 0x6d, 0x61, 0x70, 0x00, 0x09, 0x00, 0x63, 0x00, 0x00, 0x01, 0x10,
+     0x00, 0x00, 0x00, 0x2c, 0x67, 0x6c, 0x79, 0x66, 0x15, 0x59, 0x2e, 0xc4,
+     0x00, 0x00, 0x01, 0x44, 0x00, 0x00, 0x00, 0x30, 0x68, 0x65, 0x61, 0x64,
+     0xf4, 0x41, 0xf2, 0xdf, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x36,
+     0x68, 0x68, 0x65, 0x61, 0x0d, 0x3d, 0x05, 0x03, 0x00, 0x00, 0x00, 0xc4,
+     0x00, 0x00, 0x00, 0x24, 0x68, 0x6d, 0x74, 0x78, 0x08, 0x39, 0x01, 0xba,
+     0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x08, 0x6c, 0x6f, 0x63, 0x61,
+     0x00, 0x18, 0x00, 0x0d, 0x00, 0x00, 0x01, 0x3c, 0x00, 0x00, 0x00, 0x06,
+     0x6d, 0x61, 0x78, 0x70, 0x04, 0xd2, 0x04, 0x52, 0x00, 0x00, 0x00, 0xe8,
+     0x00, 0x00, 0x00, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x02, 0x5f, 0x01, 0xe1,
+     0x00, 0x00, 0x01, 0x74, 0x00, 0x00, 0x00, 0x88, 0x00, 0x01, 0x00, 0x00,
+     0x00, 0x05, 0x02, 0x8f, 0x18, 0x96, 0xe8, 0x3b, 0x5f, 0x0f, 0x3c, 0xf5,
+     0x08, 0x1b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa2, 0xe3, 0x27, 0x2a,
+     0x00, 0x00, 0x00, 0x00, 0xe4, 0x73, 0x7f, 0x28, 0x00, 0xba, 0x00, 0x00,
+     0x05, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x07, 0x3e, 0xfe, 0x4e,
+     0x00, 0x43, 0x06, 0x00, 0x00, 0xba, 0x00, 0xb2, 0x05, 0x00, 0x00, 0x01,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04,
+     0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, 0x2f,
+     0x00, 0x56, 0x00, 0x00, 0x04, 0x68, 0x04, 0x1d, 0x00, 0x00, 0x00, 0x00,
+     0x06, 0x00, 0x01, 0x00, 0x02, 0x39, 0x00, 0xba, 0x00, 0x00, 0x00, 0x01,
+     0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x20,
+     0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2e,
+     0xff, 0xff, 0x00, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0xd3, 0x00, 0x01,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x18, 0x00, 0x00,
+     0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x00, 0x03,
+     0x00, 0x00, 0x21, 0x11, 0x21, 0x11, 0x01, 0x00, 0x04, 0x00, 0x05, 0x00,
+     0xfb, 0x00, 0x00, 0x01, 0x00, 0xba, 0x00, 0x00, 0x01, 0x87, 0x00, 0xcd,
+     0x00, 0x03, 0x00, 0x00, 0x33, 0x35, 0x33, 0x15, 0xba, 0xcd, 0xcd, 0xcd,
+     0x00, 0x00, 0x00, 0x09, 0x00, 0x72, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x03, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x05, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x06, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x07, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x08, 0x00, 0x06, 0x00, 0x10, 0x00, 0x41, 0x00, 0x52, 0x00, 0x41,
+     0x00, 0x52, 0x00, 0x56, 0x00, 0x41, 0x00, 0x4d, 0x00, 0x54, 0x00, 0x54,
+     0x00, 0x4d, 0x00, 0x43}};
+constexpr std::array<uint8_t, 508> kCmap31Ttf = {
+    {0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x80, 0x00, 0x03, 0x00, 0x00,
+     0x63, 0x6d, 0x61, 0x70, 0x00, 0x0c, 0x00, 0x61, 0x00, 0x00, 0x01, 0x10,
+     0x00, 0x00, 0x00, 0x2c, 0x67, 0x6c, 0x79, 0x66, 0x15, 0x59, 0x2e, 0xc4,
+     0x00, 0x00, 0x01, 0x44, 0x00, 0x00, 0x00, 0x30, 0x68, 0x65, 0x61, 0x64,
+     0xf4, 0x41, 0xf4, 0x22, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x36,
+     0x68, 0x68, 0x65, 0x61, 0x0d, 0x3d, 0x05, 0x03, 0x00, 0x00, 0x00, 0xc4,
+     0x00, 0x00, 0x00, 0x24, 0x68, 0x6d, 0x74, 0x78, 0x08, 0x39, 0x01, 0xba,
+     0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x08, 0x6c, 0x6f, 0x63, 0x61,
+     0x00, 0x18, 0x00, 0x0d, 0x00, 0x00, 0x01, 0x3c, 0x00, 0x00, 0x00, 0x06,
+     0x6d, 0x61, 0x78, 0x70, 0x04, 0xd2, 0x04, 0x52, 0x00, 0x00, 0x00, 0xe8,
+     0x00, 0x00, 0x00, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x02, 0x5f, 0x01, 0xe1,
+     0x00, 0x00, 0x01, 0x74, 0x00, 0x00, 0x00, 0x88, 0x00, 0x01, 0x00, 0x00,
+     0x00, 0x05, 0x02, 0x8f, 0x18, 0x90, 0xe5, 0xb9, 0x5f, 0x0f, 0x3c, 0xf5,
+     0x08, 0x1b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa2, 0xe3, 0x27, 0x2a,
+     0x00, 0x00, 0x00, 0x00, 0xe4, 0x73, 0x80, 0x6b, 0x00, 0xba, 0x00, 0x00,
+     0x05, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x07, 0x3e, 0xfe, 0x4e,
+     0x00, 0x43, 0x06, 0x00, 0x00, 0xba, 0x00, 0xb2, 0x05, 0x00, 0x00, 0x01,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04,
+     0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, 0x2f,
+     0x00, 0x56, 0x00, 0x00, 0x04, 0x68, 0x04, 0x1d, 0x00, 0x00, 0x00, 0x00,
+     0x06, 0x00, 0x01, 0x00, 0x02, 0x39, 0x00, 0xba, 0x00, 0x00, 0x00, 0x01,
+     0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x20,
+     0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2e,
+     0xff, 0xff, 0x00, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0xd3, 0x00, 0x01,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x18, 0x00, 0x00,
+     0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x00, 0x03,
+     0x00, 0x00, 0x21, 0x11, 0x21, 0x11, 0x01, 0x00, 0x04, 0x00, 0x05, 0x00,
+     0xfb, 0x00, 0x00, 0x01, 0x00, 0xba, 0x00, 0x00, 0x01, 0x87, 0x00, 0xcd,
+     0x00, 0x03, 0x00, 0x00, 0x33, 0x35, 0x33, 0x15, 0xba, 0xcd, 0xcd, 0xcd,
+     0x00, 0x00, 0x00, 0x09, 0x00, 0x72, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x03, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x05, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x06, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x07, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+     0x00, 0x08, 0x00, 0x06, 0x00, 0x10, 0x00, 0x41, 0x00, 0x52, 0x00, 0x41,
+     0x00, 0x52, 0x00, 0x56, 0x00, 0x41, 0x00, 0x4d, 0x00, 0x54, 0x00, 0x54,
+     0x00, 0x4d, 0x00, 0x43}};
+
+}  // namespace
+
+TEST_F(CPDFTrueTypeFontTest, AllUnicodeCmapsTreatedEqually) {
+  auto make_font =
+      [](CPDF_TestDocument& doc,
+         pdfium::span<const uint8_t> font_data) -> RetainPtr<CPDF_Font> {
+    auto font_file_stream = doc.NewIndirect<CPDF_Stream>(
+        DataVector<uint8_t>(std::begin(font_data), std::end(font_data)),
+        pdfium::MakeRetain<CPDF_Dictionary>());
+    const uint32_t font_file_stream_object_number =
+        font_file_stream->GetObjNum();
+    CHECK_GT(font_file_stream_object_number, 0u);
+    auto font_file_stream_ref = pdfium::MakeRetain<CPDF_Reference>(
+        &doc, font_file_stream_object_number);
+
+    auto font_descriptor_dict = pdfium::MakeRetain<CPDF_Dictionary>();
+    font_descriptor_dict->SetNewFor<CPDF_Name>("Type", "FontDescriptor");
+    font_descriptor_dict->SetFor("FontFile2", std::move(font_file_stream_ref));
+
+    // The MacRoman 0x2E char code to U+01 mapping is intentionally
+    // non-standard.
+    static constexpr char kToUnicode[] = (R"(
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo <<
+    /Registry (Adobe)
+    /Ordering (UCS)
+    /Supplement 0
+>> def
+/CMapName /Adobe-Identity-UCS def
+/CMapType 2 def
+1 begincodespacerange
+<00><FF>
+endcodespacerange
+1 beginbfrange
+<2E><2E><0001>
+endbfrange
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end)");
+    auto to_unicode_stream = doc.NewIndirect<CPDF_Stream>(
+        DataVector<uint8_t>(std::begin(kToUnicode), std::end(kToUnicode)),
+        pdfium::MakeRetain<CPDF_Dictionary>());
+    const uint32_t to_unicode_object_number = to_unicode_stream->GetObjNum();
+    CHECK_GT(to_unicode_object_number, 0u);
+    auto to_unicode_ref =
+        pdfium::MakeRetain<CPDF_Reference>(&doc, to_unicode_object_number);
+
+    auto font_dict = pdfium::MakeRetain<CPDF_Dictionary>();
+    font_dict->SetNewFor<CPDF_Name>("BaseFont", "CHEESE+Swiss");
+    font_dict->SetNewFor<CPDF_Name>("Type", "Font");
+    font_dict->SetNewFor<CPDF_Name>("Subtype", "TrueType");
+    font_dict->SetNewFor<CPDF_Name>("Encoding", "MacRomanEncoding");
+    font_dict->SetFor("ToUnicode", std::move(to_unicode_ref));
+    font_dict->SetFor("FontDescriptor", std::move(font_descriptor_dict));
+
+    return CPDF_Font::Create(&doc, std::move(font_dict), nullptr);
+  };
+
+  CPDF_TestDocument doc;
+  auto cmap31 = make_font(doc, kCmap31Ttf);
+  auto cmap03 = make_font(doc, kCmap03Ttf);
+  ASSERT_TRUE(cmap31);
+  ASSERT_TRUE(cmap03);
+
+  uint32_t period = 0x2E;
+  bool unused_is_vert;
+  int cmap31_glyph = cmap31->GlyphFromCharCode(period, &unused_is_vert);
+  int cmap03_glyph = cmap03->GlyphFromCharCode(period, &unused_is_vert);
+
+  EXPECT_EQ(cmap31_glyph, 1);
+  EXPECT_EQ(cmap31_glyph, cmap03_glyph);
+}