Clean up CPDF_ToUnicodeMap.
- Make Load() private and call it from the ctor.
- Rearrange some methods to do early returns.
- Fix various nits.
Change-Id: Id3058dcbdda63ec2bbf92e4f18368c4b498590f2
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/59259
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_font.cpp b/core/fpdfapi/font/cpdf_font.cpp
index e075484..28e5ffb 100644
--- a/core/fpdfapi/font/cpdf_font.cpp
+++ b/core/fpdfapi/font/cpdf_font.cpp
@@ -273,8 +273,7 @@
if (!pStream)
return;
- m_pToUnicodeMap = pdfium::MakeUnique<CPDF_ToUnicodeMap>();
- m_pToUnicodeMap->Load(pStream);
+ m_pToUnicodeMap = pdfium::MakeUnique<CPDF_ToUnicodeMap>(pStream);
}
uint32_t CPDF_Font::GetStringWidth(ByteStringView pString) {
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index ce503b5..aa6a421 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -20,11 +20,10 @@
WideString StringDataAdd(WideString str) {
WideString ret;
- int len = str.GetLength();
wchar_t value = 1;
- for (int i = len - 1; i >= 0; --i) {
- wchar_t ch = str[i] + value;
- if (ch < str[i]) {
+ for (size_t i = str.GetLength(); i > 0; --i) {
+ wchar_t ch = str[i - 1] + value;
+ if (ch < str[i - 1]) {
ret.InsertAtFront(0);
} else {
ret.InsertAtFront(ch);
@@ -38,37 +37,39 @@
} // namespace
-CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {}
+CPDF_ToUnicodeMap::CPDF_ToUnicodeMap(const CPDF_Stream* pStream) {
+ Load(pStream);
+}
-CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {}
+CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() = default;
WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const {
auto it = m_Map.find(charcode);
- if (it != m_Map.end()) {
- uint32_t value = it->second;
- wchar_t unicode = (wchar_t)(value & 0xffff);
- if (unicode != 0xffff) {
- return unicode;
- }
- const wchar_t* buf = m_MultiCharBuf.GetBuffer();
- uint32_t buf_len = m_MultiCharBuf.GetLength();
- if (!buf || buf_len == 0) {
+ if (it == m_Map.end()) {
+ if (!m_pBaseMap)
return WideString();
- }
- uint32_t index = value >> 16;
- if (index >= buf_len) {
- return WideString();
- }
- uint32_t len = buf[index];
- if (index + len < index || index + len >= buf_len) {
- return WideString();
- }
- return WideString(buf + index + 1, len);
+ return m_pBaseMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
}
- if (m_pBaseMap) {
- return m_pBaseMap->UnicodeFromCID((uint16_t)charcode);
- }
- return WideString();
+
+ uint32_t value = it->second;
+ wchar_t unicode = static_cast<wchar_t>(value & 0xffff);
+ if (unicode != 0xffff)
+ return unicode;
+
+ const wchar_t* buf = m_MultiCharBuf.GetBuffer();
+ uint32_t buf_len = m_MultiCharBuf.GetLength();
+ if (!buf || buf_len == 0)
+ return WideString();
+
+ uint32_t index = value >> 16;
+ if (index >= buf_len)
+ return WideString();
+
+ uint32_t len = buf[index];
+ if (index + len < index || index + len >= buf_len)
+ return WideString();
+
+ return WideString(buf + index + 1, len);
}
uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
@@ -79,45 +80,42 @@
return 0;
}
-// Static.
+// static
uint32_t CPDF_ToUnicodeMap::StringToCode(ByteStringView str) {
- int len = str.GetLength();
+ size_t len = str.GetLength();
if (len == 0)
return 0;
uint32_t result = 0;
if (str[0] == '<') {
- for (int i = 1; i < len && std::isxdigit(str[i]); ++i)
+ for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i)
result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
return result;
}
- for (int i = 0; i < len && std::isdigit(str[i]); ++i)
+ for (size_t i = 0; i < len && std::isdigit(str[i]); ++i)
result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i));
return result;
}
-// Static.
+// static
WideString CPDF_ToUnicodeMap::StringToWideString(ByteStringView str) {
- int len = str.GetLength();
- if (len == 0)
+ size_t len = str.GetLength();
+ if (len == 0 || str[0] != '<')
return WideString();
WideString result;
- if (str[0] == '<') {
- int byte_pos = 0;
- wchar_t ch = 0;
- for (int i = 1; i < len && std::isxdigit(str[i]); ++i) {
- ch = ch * 16 + FXSYS_HexCharToInt(str[i]);
- byte_pos++;
- if (byte_pos == 4) {
- result += ch;
- byte_pos = 0;
- ch = 0;
- }
+ int byte_pos = 0;
+ wchar_t ch = 0;
+ for (size_t i = 1; i < len && std::isxdigit(str[i]); ++i) {
+ ch = ch * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+ byte_pos++;
+ if (byte_pos == 4) {
+ result += ch;
+ byte_pos = 0;
+ ch = 0;
}
- return result;
}
return result;
}
@@ -129,84 +127,79 @@
CPDF_SimpleParser parser(pAcc->GetSpan());
while (1) {
ByteStringView word = parser.GetWord();
- if (word.IsEmpty()) {
+ if (word.IsEmpty())
break;
- }
+
if (word == "beginbfchar") {
while (1) {
word = parser.GetWord();
- if (word.IsEmpty() || word == "endbfchar") {
+ if (word.IsEmpty() || word == "endbfchar")
break;
- }
+
uint32_t srccode = StringToCode(word);
word = parser.GetWord();
WideString destcode = StringToWideString(word);
- int len = destcode.GetLength();
- if (len == 0) {
+ size_t len = destcode.GetLength();
+ if (len == 0)
continue;
- }
+
if (len == 1) {
m_Map[srccode] = destcode[0];
} else {
m_Map[srccode] = GetUnicode();
- m_MultiCharBuf.AppendChar(destcode.GetLength());
+ m_MultiCharBuf.AppendChar(len);
m_MultiCharBuf << destcode;
}
}
} else if (word == "beginbfrange") {
while (1) {
- ByteString low, high;
- low = parser.GetWord();
- if (low.IsEmpty() || low == "endbfrange") {
+ ByteStringView low = parser.GetWord();
+ if (low.IsEmpty() || low == "endbfrange")
break;
- }
- high = parser.GetWord();
- uint32_t lowcode = StringToCode(low.AsStringView());
+
+ ByteStringView high = parser.GetWord();
+ uint32_t lowcode = StringToCode(low);
uint32_t highcode =
- (lowcode & 0xffffff00) | (StringToCode(high.AsStringView()) & 0xff);
- if (highcode == (uint32_t)-1) {
+ (lowcode & 0xffffff00) | (StringToCode(high) & 0xff);
+ if (highcode == 0xffffffff)
break;
- }
- ByteString start(parser.GetWord());
+
+ ByteStringView start = parser.GetWord();
if (start == "[") {
for (uint32_t code = lowcode; code <= highcode; code++) {
- ByteString dest(parser.GetWord());
- WideString destcode = StringToWideString(dest.AsStringView());
- int len = destcode.GetLength();
- if (len == 0) {
+ ByteStringView dest = parser.GetWord();
+ WideString destcode = StringToWideString(dest);
+ size_t len = destcode.GetLength();
+ if (len == 0)
continue;
- }
+
if (len == 1) {
m_Map[code] = destcode[0];
} else {
m_Map[code] = GetUnicode();
- m_MultiCharBuf.AppendChar(destcode.GetLength());
+ m_MultiCharBuf.AppendChar(len);
m_MultiCharBuf << destcode;
}
}
parser.GetWord();
+ continue;
+ }
+
+ WideString destcode = StringToWideString(start);
+ size_t len = destcode.GetLength();
+ uint32_t value = 0;
+ if (len == 1) {
+ value = StringToCode(start);
+ for (uint32_t code = lowcode; code <= highcode; code++)
+ m_Map[code] = value++;
} else {
- WideString destcode = StringToWideString(start.AsStringView());
- int len = destcode.GetLength();
- uint32_t value = 0;
- if (len == 1) {
- value = StringToCode(start.AsStringView());
- for (uint32_t code = lowcode; code <= highcode; code++) {
- m_Map[code] = value++;
- }
- } else {
- for (uint32_t code = lowcode; code <= highcode; code++) {
- WideString retcode;
- if (code == lowcode) {
- retcode = destcode;
- } else {
- retcode = StringDataAdd(destcode);
- }
- m_Map[code] = GetUnicode();
- m_MultiCharBuf.AppendChar(retcode.GetLength());
- m_MultiCharBuf << retcode;
- destcode = std::move(retcode);
- }
+ for (uint32_t code = lowcode; code <= highcode; code++) {
+ WideString retcode =
+ code == lowcode ? destcode : StringDataAdd(destcode);
+ m_Map[code] = GetUnicode();
+ m_MultiCharBuf.AppendChar(retcode.GetLength());
+ m_MultiCharBuf << retcode;
+ destcode = std::move(retcode);
}
}
}
@@ -221,15 +214,12 @@
}
}
if (cid_set) {
- m_pBaseMap =
- CPDF_FontGlobals::GetInstance()->GetCMapManager()->GetCID2UnicodeMap(
- cid_set);
- } else {
- m_pBaseMap = nullptr;
+ auto* manager = CPDF_FontGlobals::GetInstance()->GetCMapManager();
+ m_pBaseMap = manager->GetCID2UnicodeMap(cid_set);
}
}
-uint32_t CPDF_ToUnicodeMap::GetUnicode() {
+uint32_t CPDF_ToUnicodeMap::GetUnicode() const {
FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
uni = uni * 0x10000 + 0xffff;
return uni.ValueOrDefault(0);
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index 7be9d19..753a985 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -17,11 +17,9 @@
class CPDF_ToUnicodeMap {
public:
- CPDF_ToUnicodeMap();
+ explicit CPDF_ToUnicodeMap(const CPDF_Stream* pStream);
~CPDF_ToUnicodeMap();
- void Load(const CPDF_Stream* pStream);
-
WideString Lookup(uint32_t charcode) const;
uint32_t ReverseLookup(wchar_t unicode) const;
@@ -32,7 +30,8 @@
static uint32_t StringToCode(ByteStringView str);
static WideString StringToWideString(ByteStringView str);
- uint32_t GetUnicode();
+ void Load(const CPDF_Stream* pStream);
+ uint32_t GetUnicode() const;
std::map<uint32_t, uint32_t> m_Map;
UnownedPtr<const CPDF_CID2UnicodeMap> m_pBaseMap;