Convert CFX_XMLParser::current_text_ to WideString
Avoid use of UNSAFE_BUFFERS in the future by using our checked
string representation rather than just pointers into a DataVector<>.
-- Note that character-by-character append to string is not costly.
Change-Id: I24034dd01be19d0e4b1bb73490a337f8c41f0a71
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/116735
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
diff --git a/core/fxcrt/string_template.cpp b/core/fxcrt/string_template.cpp
index 672f7cb..45339b5 100644
--- a/core/fxcrt/string_template.cpp
+++ b/core/fxcrt/string_template.cpp
@@ -357,21 +357,19 @@
if (!pSrcData || nSrcLen == 0) {
return;
}
-
if (!m_pData) {
m_pData = StringData::Create({pSrcData, nSrcLen});
return;
}
-
if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
m_pData->CopyContentsAt(m_pData->m_nDataLength, {pSrcData, nSrcLen});
m_pData->m_nDataLength += nSrcLen;
return;
}
-
- size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
+ // Increase size by at least 50% to amortize repeated concatenations.
+ size_t nGrowLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
RetainPtr<StringData> pNewData =
- StringData::Create(m_pData->m_nDataLength + nConcatLen);
+ StringData::Create(m_pData->m_nDataLength + nGrowLen);
pNewData->CopyContents(*m_pData);
pNewData->CopyContentsAt(m_pData->m_nDataLength, {pSrcData, nSrcLen});
pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 274c757..07fba43 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -78,7 +78,7 @@
xml_plane_size_ = std::min(xml_plane_size_,
pdfium::checked_cast<size_t>(stream_->GetSize()));
- current_text_.reserve(kCurrentTextReserve);
+ current_text_.Reserve(kCurrentTextReserve);
}
CFX_XMLParser::~CFX_XMLParser() = default;
@@ -130,7 +130,7 @@
switch (current_parser_state) {
case FDE_XmlSyntaxState::Text:
if (ch == L'<') {
- if (!current_text_.empty()) {
+ if (!current_text_.IsEmpty()) {
current_node_->AppendLastChild(
doc->CreateNode<CFX_XMLText>(GetTextData()));
} else {
@@ -163,9 +163,10 @@
}
break;
case FDE_XmlSyntaxState::Target:
- if (!IsXMLNameChar(ch, current_text_.empty())) {
- if (current_text_.empty())
+ if (!IsXMLNameChar(ch, current_text_.IsEmpty())) {
+ if (current_text_.IsEmpty()) {
return false;
+ }
current_parser_state = FDE_XmlSyntaxState::TargetData;
@@ -177,14 +178,15 @@
current_node_ = node;
}
} else {
- current_text_.push_back(ch);
+ current_text_ += ch;
current_buffer_idx++;
}
break;
case FDE_XmlSyntaxState::Tag:
- if (!IsXMLNameChar(ch, current_text_.empty())) {
- if (current_text_.empty())
+ if (!IsXMLNameChar(ch, current_text_.IsEmpty())) {
+ if (current_text_.IsEmpty()) {
return false;
+ }
current_parser_state = FDE_XmlSyntaxState::AttriName;
@@ -192,17 +194,17 @@
current_node_->AppendLastChild(child);
current_node_ = child;
} else {
- current_text_.push_back(ch);
+ current_text_ += ch;
current_buffer_idx++;
}
break;
case FDE_XmlSyntaxState::AttriName:
- if (current_text_.empty() && IsXMLWhiteSpace(ch)) {
+ if (current_text_.IsEmpty() && IsXMLWhiteSpace(ch)) {
current_buffer_idx++;
break;
}
- if (!IsXMLNameChar(ch, current_text_.empty())) {
- if (current_text_.empty()) {
+ if (!IsXMLNameChar(ch, current_text_.IsEmpty())) {
+ if (current_text_.IsEmpty()) {
if (node_type_stack.top() == CFX_XMLNode::Type::kElement) {
if (ch == L'>' || ch == L'/') {
current_parser_state = FDE_XmlSyntaxState::BreakElement;
@@ -230,7 +232,7 @@
current_attribute_name = GetTextData();
}
} else {
- current_text_.push_back(ch);
+ current_text_ += ch;
current_buffer_idx++;
}
break;
@@ -284,9 +286,9 @@
break;
case FDE_XmlSyntaxState::CloseInstruction:
if (ch != L'>') {
- current_text_.push_back(ch);
+ current_text_ += ch;
current_parser_state = FDE_XmlSyntaxState::TargetData;
- } else if (!current_text_.empty()) {
+ } else if (!current_text_.IsEmpty()) {
ProcessTargetData();
} else {
current_buffer_idx++;
@@ -312,7 +314,7 @@
current_buffer_idx++;
break;
case FDE_XmlSyntaxState::CloseElement:
- if (!IsXMLNameChar(ch, current_text_.empty())) {
+ if (!IsXMLNameChar(ch, current_text_.IsEmpty())) {
if (ch == L'>') {
if (node_type_stack.empty())
return false;
@@ -335,7 +337,7 @@
return false;
}
} else {
- current_text_.push_back(ch);
+ current_text_ += ch;
}
current_buffer_idx++;
break;
@@ -364,7 +366,7 @@
current_node_->AppendLastChild(
doc->CreateNode<CFX_XMLCharData>(GetTextData()));
} else {
- current_text_.push_back(ch);
+ current_text_ += ch;
current_buffer_idx++;
}
break;
@@ -430,7 +432,7 @@
}
case FDE_XmlSyntaxState::TargetData:
if (IsXMLWhiteSpace(ch)) {
- if (current_text_.empty()) {
+ if (current_text_.IsEmpty()) {
current_buffer_idx++;
break;
}
@@ -455,7 +457,7 @@
return false;
}
} else {
- current_text_.push_back(ch);
+ current_text_ += ch;
current_buffer_idx++;
}
break;
@@ -467,16 +469,18 @@
}
void CFX_XMLParser::ProcessTextChar(wchar_t character) {
- current_text_.push_back(character);
+ current_text_ += character;
if (entity_start_.has_value() && character == L';') {
- // Copy the entity out into a string and remove from the vector. When we
- // copy the entity we don't want to copy out the & or the ; so we start
+ // Copy the entity out into a string and remove from the current text. When
+ // we copy the entity we don't want to copy out the & or the ; so we start
// shifted by one and want to copy 2 less characters in total.
- WideString csEntity(current_text_.data() + entity_start_.value() + 1,
- current_text_.size() - entity_start_.value() - 2);
- current_text_.erase(current_text_.begin() + entity_start_.value(),
- current_text_.end());
+ WideString csEntity = current_text_.Substr(
+ entity_start_.value() + 1,
+ current_text_.GetLength() - entity_start_.value() - 2);
+
+ current_text_.Delete(entity_start_.value(),
+ current_text_.GetLength() - entity_start_.value());
size_t iLen = csEntity.GetLength();
if (iLen > 0) {
@@ -500,24 +504,24 @@
character = static_cast<wchar_t>(ch);
if (character != 0)
- current_text_.push_back(character);
+ current_text_ += character;
} else {
if (csEntity == L"amp") {
- current_text_.push_back(L'&');
+ current_text_ += L'&';
} else if (csEntity == L"lt") {
- current_text_.push_back(L'<');
+ current_text_ += L'<';
} else if (csEntity == L"gt") {
- current_text_.push_back(L'>');
+ current_text_ += L'>';
} else if (csEntity == L"apos") {
- current_text_.push_back(L'\'');
+ current_text_ += L'\'';
} else if (csEntity == L"quot") {
- current_text_.push_back(L'"');
+ current_text_ += L'"';
}
}
}
entity_start_ = std::nullopt;
} else if (!entity_start_.has_value() && character == L'&') {
- entity_start_ = current_text_.size() - 1;
+ entity_start_ = current_text_.GetLength() - 1;
}
}
@@ -532,9 +536,8 @@
}
WideString CFX_XMLParser::GetTextData() {
- WideString ret(current_text_.data(), current_text_.size());
+ WideString ret = std::move(current_text_);
+ current_text_.Reserve(kCurrentTextReserve);
entity_start_ = std::nullopt;
- current_text_.clear();
- current_text_.reserve(kCurrentTextReserve);
return ret;
}
diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h
index 34f743b..8915455 100644
--- a/core/fxcrt/xml/cfx_xmlparser.h
+++ b/core/fxcrt/xml/cfx_xmlparser.h
@@ -10,7 +10,6 @@
#include <memory>
#include <optional>
-#include "core/fxcrt/data_vector.h"
#include "core/fxcrt/retain_ptr.h"
#include "core/fxcrt/unowned_ptr.h"
#include "core/fxcrt/widestring.h"
@@ -56,7 +55,7 @@
UnownedPtr<CFX_XMLNode> current_node_;
RetainPtr<CFX_SeekableStreamProxy> stream_;
- DataVector<wchar_t> current_text_;
+ WideString current_text_;
size_t xml_plane_size_ = 1024;
std::optional<size_t> entity_start_;
};