blob: b16163fa1269dbd702b35a2e1a196262c9342986 [file] [log] [blame]
// Copyright 2015 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "core/fpdfapi/font/cpdf_tounicodemap.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fxcrt/retain_ptr.h"
#include "core/fxcrt/span.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
TEST(CPDFToUnicodeMapTest, StringToCode) {
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
testing::Optional(2802u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
testing::Optional(4294967295u));
// Whitespaces within the string are ignored.
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<00\n0\r1>"),
testing::Optional(1u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c 2>"),
testing::Optional(194u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2\r\n>"),
testing::Optional(162u));
// Integer overflow
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
// Invalid string
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
}
TEST(CPDFToUnicodeMapTest, StringToWideString) {
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
WideString res = L"\xc2ab";
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
res += L"\xfaab";
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFCharBadCount) {
{
static constexpr uint8_t kInput1[] =
"1 beginbfchar<1><0041><2><0042>endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(0u, map.ReverseLookup(0x0042));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(1u));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(2u));
}
{
static constexpr uint8_t kInput2[] =
"3 beginbfchar<1><0041><2><0042>endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(0u, map.ReverseLookup(0x0042));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(1u));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(2u));
}
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFCharTolerateOutOfSpecCount) {
// Tolerate more than 100 entries.
static constexpr uint8_t kTooManyEntriesInput[] =
"112 beginbfchar"
"<0000><0008>"
"<0001><0009>"
"<0002><000A>"
"<0003><000B>"
"<0004><000C>"
"<0005><000D>"
"<0006><000E>"
"<0007><000F>"
"<0008><0000>"
"<0009><0001>"
"<000A><0002>"
"<000B><0003>"
"<000C><0004>"
"<000D><0005>"
"<000E><0006>"
"<000F><0007>"
"<0010><0018>"
"<0011><0019>"
"<0012><001A>"
"<0013><001B>"
"<0014><001C>"
"<0015><001D>"
"<0016><001E>"
"<0017><001F>"
"<0018><0010>"
"<0019><0011>"
"<001A><0012>"
"<001B><0013>"
"<001C><0014>"
"<001D><0015>"
"<001E><0016>"
"<001F><0017>"
"<0020><0028>"
"<0021><0029>"
"<0022><002A>"
"<0023><002B>"
"<0024><002C>"
"<0025><002D>"
"<0026><002E>"
"<0027><002F>"
"<0028><0020>"
"<0029><0021>"
"<002A><0022>"
"<002B><0023>"
"<002C><0024>"
"<002D><0025>"
"<002E><0026>"
"<002F><0027>"
"<0030><0038>"
"<0031><0039>"
"<0032><003A>"
"<0033><003B>"
"<0034><003C>"
"<0035><003D>"
"<0036><003E>"
"<0037><003F>"
"<0038><0030>"
"<0039><0031>"
"<003A><0032>"
"<003B><0033>"
"<003C><0034>"
"<003D><0035>"
"<003E><0036>"
"<003F><0037>"
"<0040><0048>"
"<0041><0049>"
"<0042><004A>"
"<0043><004B>"
"<0044><004C>"
"<0045><004D>"
"<0046><004E>"
"<0047><004F>"
"<0048><0040>"
"<0049><0041>"
"<004A><0042>"
"<004B><0043>"
"<004C><0044>"
"<004D><0045>"
"<004E><0046>"
"<004F><0047>"
"<0050><0058>"
"<0051><0059>"
"<0052><005A>"
"<0053><005B>"
"<0054><005C>"
"<0055><005D>"
"<0056><005E>"
"<0057><005F>"
"<0058><0050>"
"<0059><0051>"
"<005A><0052>"
"<005B><0053>"
"<005C><0054>"
"<005D><0055>"
"<005E><0056>"
"<005F><0057>"
"<0060><0068>"
"<0061><0069>"
"<0062><006A>"
"<0063><006B>"
"<0064><006C>"
"<0065><006D>"
"<0066><006E>"
"<0067><006F>"
"<0068><0060>"
"<0069><0061>"
"<006A><0062>"
"<006B><0063>"
"<006C><0064>"
"<006D><0065>"
"<006E><0066>"
"<006F><0067>"
"endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kTooManyEntriesInput);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(9u, map.ReverseLookup(0x0001));
EXPECT_EQ(111u, map.ReverseLookup(0x0067));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(111u));
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeRejectsInvalidCidValues) {
{
static constexpr uint8_t kInput1[] =
"1 beginbfrange<FFFFFFFF><FFFFFFFF>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
}
{
static constexpr uint8_t kInput2[] =
"1 beginbfrange<FFFFFFFF><FFFFFFFF><0042>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
}
{
static constexpr uint8_t kInput3[] =
"1 beginbfrange<FFFFFFFF><FFFFFFFF><00410042>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput3);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
}
{
static constexpr uint8_t kInput4[] =
"1 beginbfrange<0001><10000>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput4);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
EXPECT_EQ(L"", map.Lookup(0x0001));
EXPECT_EQ(L"", map.Lookup(0xffff));
EXPECT_EQ(L"", map.Lookup(0x10000));
}
{
static constexpr uint8_t kInput5[] =
"1 beginbfrange<10000><10001>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput5);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0x10000));
EXPECT_EQ(L"", map.Lookup(0x10001));
}
{
static constexpr uint8_t kInput6[] =
"1 beginbfrange<0006><0004>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput6);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0x0004));
EXPECT_EQ(L"", map.Lookup(0x0005));
EXPECT_EQ(L"", map.Lookup(0x0006));
}
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeRejectsMismatchedBracket) {
static constexpr uint8_t kInput[] = "1 beginbfrange<3><3>[<0041>}endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(3u));
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeBadCount) {
{
static constexpr uint8_t kInput1[] =
"1 beginbfrange<1><2><0040><4><5><0050>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
CPDF_ToUnicodeMap map(stream);
for (wchar_t unicode = 0x0039; unicode < 0x0053; ++unicode) {
EXPECT_EQ(0u, map.ReverseLookup(unicode));
}
for (uint32_t charcode = 0; charcode < 7; ++charcode) {
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(charcode));
}
}
{
static constexpr uint8_t kInput2[] =
"3 beginbfrange<1><2><0040><4><5><0050>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
CPDF_ToUnicodeMap map(stream);
for (wchar_t unicode = 0x0039; unicode < 0x0053; ++unicode) {
EXPECT_EQ(0u, map.ReverseLookup(unicode));
}
for (uint32_t charcode = 0; charcode < 7; ++charcode) {
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(charcode));
}
}
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeGoodCount) {
static constexpr uint8_t kInput[] =
"2 beginbfrange<1><2><0040><4><5><0050>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0039));
EXPECT_EQ(1u, map.ReverseLookup(0x0040));
EXPECT_EQ(2u, map.ReverseLookup(0x0041));
EXPECT_EQ(0u, map.ReverseLookup(0x0042));
EXPECT_EQ(0u, map.ReverseLookup(0x0049));
EXPECT_EQ(4u, map.ReverseLookup(0x0050));
EXPECT_EQ(5u, map.ReverseLookup(0x0051));
EXPECT_EQ(0u, map.ReverseLookup(0x0052));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(0u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(2u));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(3u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(4u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(5u));
EXPECT_EQ(0u, map.GetUnicodeCountByCharcodeForTesting(6u));
}
TEST(CPDFToUnicodeMapTest, InsertIntoMultimap) {
{
// Both the CIDs and the unicodes are different.
static constexpr uint8_t kInput1[] =
"2 beginbfchar<1><0041><2><0042>endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(1u, map.ReverseLookup(0x0041));
EXPECT_EQ(2u, map.ReverseLookup(0x0042));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(2u));
}
{
// The same CID with different unicodes.
static constexpr uint8_t kInput2[] =
"2 beginbfrange<0><0><0041><0><0><0042>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(0u, map.ReverseLookup(0x0042));
EXPECT_EQ(2u, map.GetUnicodeCountByCharcodeForTesting(0u));
}
{
// Duplicate mappings of CID 0 to unicode "A". There should be only 1 entry
// in `m_Multimap`.
static constexpr uint8_t kInput3[] =
"1 beginbfrange<0><0>[<0041>]endbfrange\n"
"1 beginbfchar<0><0041>endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput3);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(0u));
}
}
TEST(CPDFToUnicodeMapTest, NonBmpUnicodeLookup) {
static constexpr uint8_t kInput[] = "1 beginbfchar<01><d841de76>endbfchar";
CPDF_ToUnicodeMap map(pdfium::MakeRetain<CPDF_Stream>(kInput));
EXPECT_EQ(L"\xd841\xde76", map.Lookup(0x01));
#if defined(WCHAR_T_IS_32_BIT)
// TODO(crbug.com/374947848): Should work if wchar_t is 16-bit.
// TODO(crbug.com/374947848): Should return 1u.
EXPECT_EQ(0u, map.ReverseLookup(0x20676));
#endif
}