| // Copyright 2017 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "core/fpdfapi/font/cpdf_tounicodemap.h" |
| |
| #include "core/fpdfapi/cpdf_modulemgr.h" |
| #include "core/fpdfapi/font/cpdf_cid2unicodemap.h" |
| #include "core/fpdfapi/page/cpdf_pagemodule.h" |
| #include "core/fpdfapi/parser/cpdf_simple_parser.h" |
| #include "core/fxcrt/fx_extension.h" |
| #include "core/fxcrt/fx_safe_types.h" |
| #include "third_party/base/numerics/safe_conversions.h" |
| |
| CFX_WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const { |
| auto it = m_Map.find(charcode); |
| if (it != m_Map.end()) { |
| uint32_t value = it->second; |
| wchar_t unicode = (wchar_t)(value & 0xffff); |
| if (unicode != 0xffff) { |
| return unicode; |
| } |
| const wchar_t* buf = m_MultiCharBuf.GetBuffer(); |
| uint32_t buf_len = m_MultiCharBuf.GetLength(); |
| if (!buf || buf_len == 0) { |
| return CFX_WideString(); |
| } |
| uint32_t index = value >> 16; |
| if (index >= buf_len) { |
| return CFX_WideString(); |
| } |
| uint32_t len = buf[index]; |
| if (index + len < index || index + len >= buf_len) { |
| return CFX_WideString(); |
| } |
| return CFX_WideString(buf + index + 1, len); |
| } |
| if (m_pBaseMap) { |
| return m_pBaseMap->UnicodeFromCID((uint16_t)charcode); |
| } |
| return CFX_WideString(); |
| } |
| |
| uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const { |
| for (const auto& pair : m_Map) { |
| if (pair.second == static_cast<uint32_t>(unicode)) |
| return pair.first; |
| } |
| return 0; |
| } |
| |
| // Static. |
| uint32_t CPDF_ToUnicodeMap::StringToCode(const CFX_ByteStringC& str) { |
| int len = str.GetLength(); |
| if (len == 0) |
| return 0; |
| |
| uint32_t result = 0; |
| if (str[0] == '<') { |
| for (int i = 1; i < len && std::isxdigit(str[i]); ++i) |
| result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i)); |
| return result; |
| } |
| |
| for (int i = 0; i < len && std::isdigit(str[i]); ++i) |
| result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i)); |
| |
| return result; |
| } |
| |
| static CFX_WideString StringDataAdd(CFX_WideString str) { |
| CFX_WideString ret; |
| int len = str.GetLength(); |
| wchar_t value = 1; |
| for (int i = len - 1; i >= 0; --i) { |
| wchar_t ch = str[i] + value; |
| if (ch < str[i]) { |
| ret.InsertAtFront(0); |
| } else { |
| ret.InsertAtFront(ch); |
| value = 0; |
| } |
| } |
| if (value) |
| ret.InsertAtFront(value); |
| return ret; |
| } |
| |
| // Static. |
| CFX_WideString CPDF_ToUnicodeMap::StringToWideString( |
| const CFX_ByteStringC& str) { |
| int len = str.GetLength(); |
| if (len == 0) |
| return CFX_WideString(); |
| |
| CFX_WideString result; |
| if (str[0] == '<') { |
| int byte_pos = 0; |
| wchar_t ch = 0; |
| for (int i = 1; i < len && std::isxdigit(str[i]); ++i) { |
| ch = ch * 16 + FXSYS_HexCharToInt(str[i]); |
| byte_pos++; |
| if (byte_pos == 4) { |
| result += ch; |
| byte_pos = 0; |
| ch = 0; |
| } |
| } |
| return result; |
| } |
| return result; |
| } |
| |
| CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {} |
| |
| CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {} |
| |
| uint32_t CPDF_ToUnicodeMap::GetUnicode() { |
| FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength(); |
| uni = uni * 0x10000 + 0xffff; |
| return uni.ValueOrDefault(0); |
| } |
| |
| void CPDF_ToUnicodeMap::Load(CPDF_Stream* pStream) { |
| CIDSet cid_set = CIDSET_UNKNOWN; |
| auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream); |
| pAcc->LoadAllData(false); |
| CPDF_SimpleParser parser(pAcc->GetData(), pAcc->GetSize()); |
| while (1) { |
| CFX_ByteStringC word = parser.GetWord(); |
| if (word.IsEmpty()) { |
| break; |
| } |
| if (word == "beginbfchar") { |
| while (1) { |
| word = parser.GetWord(); |
| if (word.IsEmpty() || word == "endbfchar") { |
| break; |
| } |
| uint32_t srccode = StringToCode(word); |
| word = parser.GetWord(); |
| CFX_WideString destcode = StringToWideString(word); |
| int len = destcode.GetLength(); |
| if (len == 0) { |
| continue; |
| } |
| if (len == 1) { |
| m_Map[srccode] = destcode.GetAt(0); |
| } else { |
| m_Map[srccode] = GetUnicode(); |
| m_MultiCharBuf.AppendChar(destcode.GetLength()); |
| m_MultiCharBuf << destcode; |
| } |
| } |
| } else if (word == "beginbfrange") { |
| while (1) { |
| CFX_ByteString low, high; |
| low = parser.GetWord(); |
| if (low.IsEmpty() || low == "endbfrange") { |
| break; |
| } |
| high = parser.GetWord(); |
| uint32_t lowcode = StringToCode(low.AsStringC()); |
| uint32_t highcode = |
| (lowcode & 0xffffff00) | (StringToCode(high.AsStringC()) & 0xff); |
| if (highcode == (uint32_t)-1) { |
| break; |
| } |
| CFX_ByteString start(parser.GetWord()); |
| if (start == "[") { |
| for (uint32_t code = lowcode; code <= highcode; code++) { |
| CFX_ByteString dest(parser.GetWord()); |
| CFX_WideString destcode = StringToWideString(dest.AsStringC()); |
| int len = destcode.GetLength(); |
| if (len == 0) { |
| continue; |
| } |
| if (len == 1) { |
| m_Map[code] = destcode.GetAt(0); |
| } else { |
| m_Map[code] = GetUnicode(); |
| m_MultiCharBuf.AppendChar(destcode.GetLength()); |
| m_MultiCharBuf << destcode; |
| } |
| } |
| parser.GetWord(); |
| } else { |
| CFX_WideString destcode = StringToWideString(start.AsStringC()); |
| int len = destcode.GetLength(); |
| uint32_t value = 0; |
| if (len == 1) { |
| value = StringToCode(start.AsStringC()); |
| for (uint32_t code = lowcode; code <= highcode; code++) { |
| m_Map[code] = value++; |
| } |
| } else { |
| for (uint32_t code = lowcode; code <= highcode; code++) { |
| CFX_WideString retcode; |
| if (code == lowcode) { |
| retcode = destcode; |
| } else { |
| retcode = StringDataAdd(destcode); |
| } |
| m_Map[code] = GetUnicode(); |
| m_MultiCharBuf.AppendChar(retcode.GetLength()); |
| m_MultiCharBuf << retcode; |
| destcode = retcode; |
| } |
| } |
| } |
| } |
| } else if (word == "/Adobe-Korea1-UCS2") { |
| cid_set = CIDSET_KOREA1; |
| } else if (word == "/Adobe-Japan1-UCS2") { |
| cid_set = CIDSET_JAPAN1; |
| } else if (word == "/Adobe-CNS1-UCS2") { |
| cid_set = CIDSET_CNS1; |
| } else if (word == "/Adobe-GB1-UCS2") { |
| cid_set = CIDSET_GB1; |
| } |
| } |
| if (cid_set) { |
| m_pBaseMap = CPDF_ModuleMgr::Get() |
| ->GetPageModule() |
| ->GetFontGlobals() |
| ->m_CMapManager.GetCID2UnicodeMap(cid_set, false); |
| } else { |
| m_pBaseMap = nullptr; |
| } |
| } |