Optimize CPDF_CMapParser
In this parser, a vector of CodeRange values are built up during
operations that need to be sync'd with the CPDF_CMap that is being
initialized. In the existing implementation, the vector being built as
a member var for the parser, and copying the values over to the cmap
whenever there is a change. When profiling, this copy is where the code
spends most of its time.
The code has been rewritten to have the parser reference/modify the
instance of the vector in the cmap instead of having its own copy. This
removes all of the copies and significantly speeds things up.
BUG=chromium:881678
Change-Id: Ib8e75962507ca3d3b1ed066fd1faa4fbb7141122
Reviewed-on: https://pdfium-review.googlesource.com/42350
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_cmap.h b/core/fpdfapi/font/cpdf_cmap.h
index 228c207..68ad26c 100644
--- a/core/fpdfapi/font/cpdf_cmap.h
+++ b/core/fpdfapi/font/cpdf_cmap.h
@@ -66,8 +66,11 @@
void SetVertical(bool vert) { m_bVertical = vert; }
void SetCodingScheme(CodingScheme scheme) { m_CodingScheme = scheme; }
- void SetMixedFourByteLeadingRanges(std::vector<CodeRange> range) {
- m_MixedFourByteLeadingRanges = range;
+ const std::vector<CodeRange>& GetMixedFourByteLeadingRanges() {
+ return m_MixedFourByteLeadingRanges;
+ }
+ void AppendMixedFourByteLeadingRanges(const CodeRange& range) {
+ m_MixedFourByteLeadingRanges.push_back(range);
}
int GetCoding() const { return m_Coding; }
diff --git a/core/fpdfapi/font/cpdf_cmapparser.cpp b/core/fpdfapi/font/cpdf_cmapparser.cpp
index a88448c..5bfe17b 100644
--- a/core/fpdfapi/font/cpdf_cmapparser.cpp
+++ b/core/fpdfapi/font/cpdf_cmapparser.cpp
@@ -109,14 +109,17 @@
m_Status = 0;
} else if (m_Status == 7) {
if (word == "endcodespacerange") {
- size_t nSegs = m_CodeRanges.size();
+ auto code_ranges = m_pCMap->GetMixedFourByteLeadingRanges();
+ size_t nSegs = code_ranges.size();
if (nSegs == 1) {
- m_pCMap->SetCodingScheme((m_CodeRanges[0].m_CharSize == 2)
+ m_pCMap->SetCodingScheme((code_ranges[0].m_CharSize == 2)
? CPDF_CMap::TwoBytes
: CPDF_CMap::OneByte);
} else if (nSegs > 1) {
m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
- m_pCMap->SetMixedFourByteLeadingRanges(m_CodeRanges);
+ for (auto range : m_PendingRanges)
+ m_pCMap->AppendMixedFourByteLeadingRanges(range);
+ m_PendingRanges.clear();
}
m_Status = 0;
} else {
@@ -126,7 +129,7 @@
if (m_CodeSeq % 2) {
CPDF_CMap::CodeRange range;
if (GetCodeRange(range, m_LastWord.AsStringView(), word))
- m_CodeRanges.push_back(range);
+ m_PendingRanges.push_back(range);
}
m_CodeSeq++;
}
diff --git a/core/fpdfapi/font/cpdf_cmapparser.h b/core/fpdfapi/font/cpdf_cmapparser.h
index c70d408..fae65cf 100644
--- a/core/fpdfapi/font/cpdf_cmapparser.h
+++ b/core/fpdfapi/font/cpdf_cmapparser.h
@@ -40,7 +40,7 @@
UnownedPtr<CPDF_CMap> const m_pCMap;
int m_Status;
int m_CodeSeq;
- std::vector<CPDF_CMap::CodeRange> m_CodeRanges;
+ std::vector<CPDF_CMap::CodeRange> m_PendingRanges;
std::vector<CPDF_CMap::CIDRange> m_AdditionalCharcodeToCIDMappings;
ByteString m_LastWord;
uint32_t m_CodePoints[4];