Avoid interspersed length/value buffer in CPDF_ToUnicodeMap.
Somewhat less memory efficient, but simpler and safer, avoids casting
character values to lengths. Avoids allocations and copies from buffer
when returning multi-char strings.
-- Fix some IWYU in headers.
-- Explicitly construct single-char strings.
-- Rename GetUnicode() method to describe what it actually returns.
Change-Id: I36a8a4d7d79321e01b5f3e8bd20fa917c7f92850
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/88070
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index d1df1f8..99409e2 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -54,13 +54,10 @@
uint32_t value = it->second;
wchar_t unicode = static_cast<wchar_t>(value & 0xffff);
if (unicode != 0xffff)
- return unicode;
+ return WideString(unicode);
- WideStringView buf = m_MultiCharBuf.AsStringView();
size_t index = value >> 16;
- if (!buf.IsValidIndex(index))
- return WideString();
- return WideString(buf.Substr(index + 1, buf[index]));
+ return index < m_MultiCharVec.size() ? m_MultiCharVec[index] : WideString();
}
uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
@@ -195,17 +192,16 @@
for (uint32_t code = lowcode; code <= highcode; code++) {
WideString retcode =
code == lowcode ? destcode : StringDataAdd(destcode);
- m_Multimap.emplace(code, GetUnicode());
- m_MultiCharBuf.AppendChar(retcode.GetLength());
- m_MultiCharBuf << retcode;
+ m_Multimap.emplace(code, GetMultiCharIndexIndicator());
+ m_MultiCharVec.push_back(retcode);
destcode = std::move(retcode);
}
}
}
}
-uint32_t CPDF_ToUnicodeMap::GetUnicode() const {
- FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
+uint32_t CPDF_ToUnicodeMap::GetMultiCharIndexIndicator() const {
+ FX_SAFE_UINT32 uni = m_MultiCharVec.size();
uni = uni * 0x10000 + 0xffff;
return uni.ValueOrDefault(0);
}
@@ -218,8 +214,7 @@
if (len == 1) {
m_Multimap.emplace(srccode, destcode[0]);
} else {
- m_Multimap.emplace(srccode, GetUnicode());
- m_MultiCharBuf.AppendChar(len);
- m_MultiCharBuf << destcode;
+ m_Multimap.emplace(srccode, GetMultiCharIndexIndicator());
+ m_MultiCharVec.push_back(destcode);
}
}
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.h b/core/fpdfapi/font/cpdf_tounicodemap.h
index bd6f075..106bb00 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.h
+++ b/core/fpdfapi/font/cpdf_tounicodemap.h
@@ -8,9 +8,11 @@
#define CORE_FPDFAPI_FONT_CPDF_TOUNICODEMAP_H_
#include <map>
+#include <vector>
-#include "core/fxcrt/cfx_widetextbuf.h"
+#include "core/fxcrt/fx_string.h"
#include "core/fxcrt/unowned_ptr.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
class CPDF_CID2UnicodeMap;
class CPDF_SimpleParser;
@@ -34,12 +36,12 @@
void Load(const CPDF_Stream* pStream);
void HandleBeginBFChar(CPDF_SimpleParser* pParser);
void HandleBeginBFRange(CPDF_SimpleParser* pParser);
- uint32_t GetUnicode() const;
+ uint32_t GetMultiCharIndexIndicator() const;
void SetCode(uint32_t srccode, WideString destcode);
std::multimap<uint32_t, uint32_t> m_Multimap;
UnownedPtr<const CPDF_CID2UnicodeMap> m_pBaseMap;
- CFX_WideTextBuf m_MultiCharBuf;
+ std::vector<WideString> m_MultiCharVec;
};
#endif // CORE_FPDFAPI_FONT_CPDF_TOUNICODEMAP_H_