blob: cdd6ee85878f4b0a13dbe628a8b75ff0304d0257 [file] [log] [blame]
// Copyright 2015 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "core/fpdfapi/font/cpdf_tounicodemap.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fxcrt/retain_ptr.h"
#include "core/fxcrt/span.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
TEST(CPDFToUnicodeMapTest, StringToCode) {
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
testing::Optional(2802u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
testing::Optional(4294967295u));
// Whitespaces within the string are ignored.
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<00\n0\r1>"),
testing::Optional(1u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c 2>"),
testing::Optional(194u));
EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2\r\n>"),
testing::Optional(162u));
// Integer overflow
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
// Invalid string
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
}
TEST(CPDFToUnicodeMapTest, StringToWideString) {
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
WideString res = L"\xc2ab";
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
res += L"\xfaab";
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
}
TEST(CPDFToUnicodeMapTest, HandleBeginBFRangeRejectsInvalidCidValues) {
{
static constexpr uint8_t kInput1[] =
"beginbfrange<FFFFFFFF><FFFFFFFF>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
}
{
static constexpr uint8_t kInput2[] =
"beginbfrange<FFFFFFFF><FFFFFFFF><0042>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
}
{
static constexpr uint8_t kInput3[] =
"beginbfrange<FFFFFFFF><FFFFFFFF><00410042>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput3);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
}
{
static constexpr uint8_t kInput4[] =
"beginbfrange<0001><10000>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput4);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0xffffffff));
EXPECT_EQ(L"", map.Lookup(0x0001));
EXPECT_EQ(L"", map.Lookup(0xffff));
EXPECT_EQ(L"", map.Lookup(0x10000));
}
{
static constexpr uint8_t kInput5[] =
"beginbfrange<10000><10001>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput5);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0x10000));
EXPECT_EQ(L"", map.Lookup(0x10001));
}
{
static constexpr uint8_t kInput6[] =
"beginbfrange<0006><0004>[<0041>]endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput6);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(L"", map.Lookup(0x0004));
EXPECT_EQ(L"", map.Lookup(0x0005));
EXPECT_EQ(L"", map.Lookup(0x0006));
}
}
TEST(CPDFToUnicodeMapTest, InsertIntoMultimap) {
{
// Both the CIDs and the unicodes are different.
static constexpr uint8_t kInput1[] =
"beginbfchar<1><0041><2><0042>endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput1);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(1u, map.ReverseLookup(0x0041));
EXPECT_EQ(2u, map.ReverseLookup(0x0042));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(2u));
}
{
// The same CID with different unicodes.
static constexpr uint8_t kInput2[] =
"beginbfrange<0><0><0041><0><0><0042>endbfrange";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput2);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(0u, map.ReverseLookup(0x0042));
EXPECT_EQ(2u, map.GetUnicodeCountByCharcodeForTesting(0u));
}
{
// Duplicate mappings of CID 0 to unicode "A". There should be only 1 entry
// in `m_Multimap`.
static constexpr uint8_t kInput3[] =
"beginbfrange<0><0>[<0041>]endbfrange\n"
"beginbfchar<0><0041>endbfchar";
auto stream = pdfium::MakeRetain<CPDF_Stream>(kInput3);
CPDF_ToUnicodeMap map(stream);
EXPECT_EQ(0u, map.ReverseLookup(0x0041));
EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(0u));
}
}
TEST(CPDFToUnicodeMapTest, NonBmpUnicodeLookup) {
static constexpr uint8_t kInput[] = "beginbfchar<01><d841de76>endbfchar";
CPDF_ToUnicodeMap map(pdfium::MakeRetain<CPDF_Stream>(kInput));
EXPECT_EQ(L"\xd841\xde76", map.Lookup(0x01));
#if defined(WCHAR_T_IS_32_BIT)
// TODO(crbug.com/374947848): Should work if wchar_t is 16-bit.
// TODO(crbug.com/374947848): Should return 1u.
EXPECT_EQ(0u, map.ReverseLookup(0x20676));
#endif
}