blob: 7d67743759a6f953178ea9a8938a353241567719 [file] [log] [blame]
// Copyright 2014 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/bytestring.h"
#include <ctype.h>
#include <stddef.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <utility>
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_memcpy_wrappers.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/fx_system.h"
#include "core/fxcrt/span_util.h"
#include "core/fxcrt/string_pool_template.h"
#include "third_party/base/check.h"
#include "third_party/base/check_op.h"
#include "third_party/base/containers/span.h"
template class fxcrt::StringDataTemplate<char>;
template class fxcrt::StringViewTemplate<char>;
template class fxcrt::StringPoolTemplate<ByteString>;
template struct std::hash<ByteString>;
namespace {
constexpr char kTrimChars[] = "\x09\x0a\x0b\x0c\x0d\x20";
std::optional<size_t> FX_strpos(pdfium::span<const char> haystack,
pdfium::span<const char> needle) {
if (needle.empty() || needle.size() > haystack.size()) {
return std::nullopt;
}
// After `end_pos`, not enough characters remain in `haystack` for
// a full match to occur.
size_t end_pos = haystack.size() - needle.size();
for (size_t haystack_pos = 0; haystack_pos <= end_pos; ++haystack_pos) {
auto candidate = haystack.subspan(haystack_pos, needle.size());
if (fxcrt::span_equals(candidate, needle)) {
return haystack_pos;
}
}
return std::nullopt;
}
} // namespace
namespace fxcrt {
static_assert(sizeof(ByteString) <= sizeof(char*),
"Strings must not require more space than pointers");
// static
ByteString ByteString::FormatInteger(int i) {
char buf[32];
FXSYS_snprintf(buf, sizeof(buf), "%d", i);
return ByteString(buf);
}
// static
ByteString ByteString::FormatFloat(float f) {
char buf[32];
return ByteString(buf, FloatToString(f, buf));
}
// static
ByteString ByteString::FormatV(const char* pFormat, va_list argList) {
va_list argListCopy;
va_copy(argListCopy, argList);
int nMaxLen = vsnprintf(nullptr, 0, pFormat, argListCopy);
va_end(argListCopy);
if (nMaxLen <= 0)
return ByteString();
ByteString ret;
{
// Span's lifetime must end before ReleaseBuffer() below.
pdfium::span<char> buf = ret.GetBuffer(nMaxLen);
// In the following two calls, there's always space in the buffer for
// a terminating NUL that's not included in nMaxLen.
memset(buf.data(), 0, nMaxLen + 1);
va_copy(argListCopy, argList);
vsnprintf(buf.data(), nMaxLen + 1, pFormat, argListCopy);
va_end(argListCopy);
}
ret.ReleaseBuffer(ret.GetStringLength());
return ret;
}
// static
ByteString ByteString::Format(const char* pFormat, ...) {
va_list argList;
va_start(argList, pFormat);
ByteString ret = FormatV(pFormat, argList);
va_end(argList);
return ret;
}
ByteString::ByteString(const char* pStr, size_t nLen) {
if (nLen) {
m_pData = StringData::Create({pStr, nLen});
}
}
ByteString::ByteString(const uint8_t* pStr, size_t nLen)
: ByteString(reinterpret_cast<const char*>(pStr), nLen) {}
ByteString::ByteString() = default;
ByteString::ByteString(const ByteString& other) = default;
ByteString::ByteString(ByteString&& other) noexcept = default;
ByteString::ByteString(char ch) {
m_pData = StringData::Create(1);
m_pData->m_String[0] = ch;
}
ByteString::ByteString(const char* ptr)
: ByteString(ptr, ptr ? strlen(ptr) : 0) {}
ByteString::ByteString(ByteStringView bstrc) {
if (!bstrc.IsEmpty()) {
m_pData = StringData::Create(bstrc.span());
}
}
ByteString::ByteString(ByteStringView str1, ByteStringView str2) {
FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
nSafeLen += str2.GetLength();
size_t nNewLen = nSafeLen.ValueOrDie();
if (nNewLen == 0)
return;
m_pData = StringData::Create(nNewLen);
m_pData->CopyContents(str1.span());
m_pData->CopyContentsAt(str1.GetLength(), str2.span());
}
ByteString::ByteString(const std::initializer_list<ByteStringView>& list) {
FX_SAFE_SIZE_T nSafeLen = 0;
for (const auto& item : list)
nSafeLen += item.GetLength();
size_t nNewLen = nSafeLen.ValueOrDie();
if (nNewLen == 0)
return;
m_pData = StringData::Create(nNewLen);
size_t nOffset = 0;
for (const auto& item : list) {
m_pData->CopyContentsAt(nOffset, item.span());
nOffset += item.GetLength();
}
}
ByteString::ByteString(const fxcrt::ostringstream& outStream) {
auto str = outStream.str();
if (!str.empty()) {
m_pData = StringData::Create({str.c_str(), str.size()});
}
}
ByteString::~ByteString() = default;
void ByteString::clear() {
if (m_pData && m_pData->CanOperateInPlace(0)) {
m_pData->m_nDataLength = 0;
return;
}
m_pData.Reset();
}
ByteString& ByteString::operator=(const char* str) {
if (!str || !str[0])
clear();
else
AssignCopy(str, strlen(str));
return *this;
}
ByteString& ByteString::operator=(ByteStringView str) {
if (str.IsEmpty())
clear();
else
AssignCopy(str.unterminated_c_str(), str.GetLength());
return *this;
}
ByteString& ByteString::operator=(const ByteString& that) {
if (m_pData != that.m_pData)
m_pData = that.m_pData;
return *this;
}
ByteString& ByteString::operator=(ByteString&& that) noexcept {
if (m_pData != that.m_pData)
m_pData = std::move(that.m_pData);
return *this;
}
ByteString& ByteString::operator+=(const char* str) {
if (str)
Concat(str, strlen(str));
return *this;
}
ByteString& ByteString::operator+=(char ch) {
Concat(&ch, 1);
return *this;
}
ByteString& ByteString::operator+=(const ByteString& str) {
if (str.m_pData)
Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
return *this;
}
ByteString& ByteString::operator+=(ByteStringView str) {
if (!str.IsEmpty())
Concat(str.unterminated_c_str(), str.GetLength());
return *this;
}
bool ByteString::operator==(const char* ptr) const {
if (!m_pData)
return !ptr || !ptr[0];
if (!ptr)
return m_pData->m_nDataLength == 0;
return strlen(ptr) == m_pData->m_nDataLength &&
FXSYS_memcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
}
bool ByteString::operator==(ByteStringView str) const {
if (!m_pData)
return str.IsEmpty();
return m_pData->m_nDataLength == str.GetLength() &&
FXSYS_memcmp(m_pData->m_String, str.unterminated_c_str(),
str.GetLength()) == 0;
}
bool ByteString::operator==(const ByteString& other) const {
if (m_pData == other.m_pData)
return true;
if (IsEmpty())
return other.IsEmpty();
if (other.IsEmpty())
return false;
return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
memcmp(other.m_pData->m_String, m_pData->m_String,
m_pData->m_nDataLength) == 0;
}
bool ByteString::operator<(const char* ptr) const {
if (!m_pData && !ptr)
return false;
if (c_str() == ptr)
return false;
size_t len = GetLength();
size_t other_len = ptr ? strlen(ptr) : 0;
int result = FXSYS_memcmp(c_str(), ptr, std::min(len, other_len));
return result < 0 || (result == 0 && len < other_len);
}
bool ByteString::operator<(ByteStringView str) const {
return Compare(str) < 0;
}
bool ByteString::operator<(const ByteString& other) const {
if (m_pData == other.m_pData)
return false;
size_t len = GetLength();
size_t other_len = other.GetLength();
int result = FXSYS_memcmp(c_str(), other.c_str(), std::min(len, other_len));
return result < 0 || (result == 0 && len < other_len);
}
bool ByteString::EqualNoCase(ByteStringView str) const {
if (!m_pData)
return str.IsEmpty();
size_t len = str.GetLength();
if (m_pData->m_nDataLength != len)
return false;
const uint8_t* pThis = (const uint8_t*)m_pData->m_String;
const uint8_t* pThat = str.raw_str();
for (size_t i = 0; i < len; i++) {
if ((*pThis) != (*pThat)) {
uint8_t this_char = tolower(*pThis);
uint8_t that_char = tolower(*pThat);
if (this_char != that_char) {
return false;
}
}
pThis++;
pThat++;
}
return true;
}
void ByteString::AssignCopy(const char* pSrcData, size_t nSrcLen) {
AllocBeforeWrite(nSrcLen);
m_pData->CopyContents({pSrcData, nSrcLen});
m_pData->m_nDataLength = nSrcLen;
}
void ByteString::ReallocBeforeWrite(size_t nNewLength) {
if (m_pData && m_pData->CanOperateInPlace(nNewLength))
return;
if (nNewLength == 0) {
clear();
return;
}
RetainPtr<StringData> pNewData = StringData::Create(nNewLength);
if (m_pData) {
size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
pNewData->CopyContents({m_pData->m_String, nCopyLength});
pNewData->m_nDataLength = nCopyLength;
} else {
pNewData->m_nDataLength = 0;
}
pNewData->m_String[pNewData->m_nDataLength] = 0;
m_pData = std::move(pNewData);
}
void ByteString::AllocBeforeWrite(size_t nNewLength) {
if (m_pData && m_pData->CanOperateInPlace(nNewLength)) {
return;
}
if (nNewLength == 0) {
clear();
return;
}
m_pData = StringData::Create(nNewLength);
}
void ByteString::ReleaseBuffer(size_t nNewLength) {
if (!m_pData)
return;
nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
if (nNewLength == 0) {
clear();
return;
}
DCHECK_EQ(m_pData->m_nRefs, 1);
m_pData->m_nDataLength = nNewLength;
m_pData->m_String[nNewLength] = 0;
if (m_pData->m_nAllocLength - nNewLength >= 32) {
// Over arbitrary threshold, so pay the price to relocate. Force copy to
// always occur by holding a second reference to the string.
ByteString preserve(*this);
ReallocBeforeWrite(nNewLength);
}
}
void ByteString::Reserve(size_t len) {
GetBuffer(len);
}
pdfium::span<char> ByteString::GetBuffer(size_t nMinBufLength) {
if (!m_pData) {
if (nMinBufLength == 0) {
return pdfium::span<char>();
}
m_pData = StringData::Create(nMinBufLength);
m_pData->m_nDataLength = 0;
m_pData->m_String[0] = 0;
return pdfium::span<char>(m_pData->m_String, m_pData->m_nAllocLength);
}
if (m_pData->CanOperateInPlace(nMinBufLength))
return pdfium::span<char>(m_pData->m_String, m_pData->m_nAllocLength);
nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
if (nMinBufLength == 0)
return pdfium::span<char>();
RetainPtr<StringData> pNewData = StringData::Create(nMinBufLength);
pNewData->CopyContents(*m_pData);
pNewData->m_nDataLength = m_pData->m_nDataLength;
m_pData = std::move(pNewData);
return pdfium::span<char>(m_pData->m_String, m_pData->m_nAllocLength);
}
size_t ByteString::Delete(size_t index, size_t count) {
if (!m_pData)
return 0;
size_t old_length = m_pData->m_nDataLength;
if (count == 0 || index != std::clamp<size_t>(index, 0, old_length)) {
return old_length;
}
size_t removal_length = index + count;
if (removal_length > old_length)
return old_length;
ReallocBeforeWrite(old_length);
size_t chars_to_copy = old_length - removal_length + 1;
FXSYS_memmove(m_pData->m_String + index, m_pData->m_String + removal_length,
chars_to_copy);
m_pData->m_nDataLength = old_length - count;
return m_pData->m_nDataLength;
}
void ByteString::Concat(const char* pSrcData, size_t nSrcLen) {
if (!pSrcData || nSrcLen == 0)
return;
if (!m_pData) {
m_pData = StringData::Create({pSrcData, nSrcLen});
return;
}
if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
m_pData->CopyContentsAt(m_pData->m_nDataLength, {pSrcData, nSrcLen});
m_pData->m_nDataLength += nSrcLen;
return;
}
size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
RetainPtr<StringData> pNewData =
StringData::Create(m_pData->m_nDataLength + nConcatLen);
pNewData->CopyContents(*m_pData);
pNewData->CopyContentsAt(m_pData->m_nDataLength, {pSrcData, nSrcLen});
pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
m_pData = std::move(pNewData);
}
intptr_t ByteString::ReferenceCountForTesting() const {
return m_pData ? m_pData->m_nRefs : 0;
}
ByteString ByteString::Substr(size_t offset) const {
// Unsigned underflow is well-defined and out-of-range is handled by Substr().
return Substr(offset, GetLength() - offset);
}
ByteString ByteString::Substr(size_t first, size_t count) const {
if (!m_pData) {
return ByteString();
}
if (first == 0 && count == m_pData->m_nDataLength) {
return *this;
}
return ByteString(AsStringView().Substr(first, count));
}
ByteString ByteString::First(size_t count) const {
return Substr(0, count);
}
ByteString ByteString::Last(size_t count) const {
// Unsigned underflow is well-defined and out-of-range is handled by Substr().
return Substr(GetLength() - count, count);
}
void ByteString::SetAt(size_t index, char c) {
DCHECK(IsValidIndex(index));
ReallocBeforeWrite(m_pData->m_nDataLength);
m_pData->m_String[index] = c;
}
size_t ByteString::Insert(size_t index, char ch) {
const size_t cur_length = GetLength();
if (!IsValidLength(index))
return cur_length;
const size_t new_length = cur_length + 1;
ReallocBeforeWrite(new_length);
FXSYS_memmove(m_pData->m_String + index + 1, m_pData->m_String + index,
new_length - index);
m_pData->m_String[index] = ch;
m_pData->m_nDataLength = new_length;
return new_length;
}
std::optional<size_t> ByteString::Find(char ch, size_t start) const {
if (!m_pData)
return std::nullopt;
if (!IsValidIndex(start))
return std::nullopt;
const char* pStr = static_cast<const char*>(FXSYS_memchr(
m_pData->m_String + start, ch, m_pData->m_nDataLength - start));
return pStr ? std::optional<size_t>(
static_cast<size_t>(pStr - m_pData->m_String))
: std::nullopt;
}
std::optional<size_t> ByteString::Find(ByteStringView subStr,
size_t start) const {
if (!m_pData) {
return std::nullopt;
}
if (!IsValidIndex(start)) {
return std::nullopt;
}
std::optional<size_t> result =
FX_strpos(m_pData->span().subspan(start), subStr.span());
if (!result.has_value()) {
return std::nullopt;
}
return start + result.value();
}
std::optional<size_t> ByteString::ReverseFind(char ch) const {
if (!m_pData)
return std::nullopt;
size_t nLength = m_pData->m_nDataLength;
while (nLength--) {
if (m_pData->m_String[nLength] == ch)
return nLength;
}
return std::nullopt;
}
void ByteString::MakeLower() {
if (IsEmpty())
return;
ReallocBeforeWrite(m_pData->m_nDataLength);
FXSYS_strlwr(m_pData->m_String);
}
void ByteString::MakeUpper() {
if (IsEmpty())
return;
ReallocBeforeWrite(m_pData->m_nDataLength);
FXSYS_strupr(m_pData->m_String);
}
size_t ByteString::Remove(char chRemove) {
if (IsEmpty())
return 0;
char* pstrSource = m_pData->m_String;
char* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
while (pstrSource < pstrEnd) {
if (*pstrSource == chRemove)
break;
pstrSource++;
}
if (pstrSource == pstrEnd)
return 0;
ptrdiff_t copied = pstrSource - m_pData->m_String;
ReallocBeforeWrite(m_pData->m_nDataLength);
pstrSource = m_pData->m_String + copied;
pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
char* pstrDest = pstrSource;
while (pstrSource < pstrEnd) {
if (*pstrSource != chRemove) {
*pstrDest = *pstrSource;
pstrDest++;
}
pstrSource++;
}
*pstrDest = 0;
size_t nCount = static_cast<size_t>(pstrSource - pstrDest);
m_pData->m_nDataLength -= nCount;
return nCount;
}
size_t ByteString::Replace(ByteStringView pOld, ByteStringView pNew) {
if (!m_pData || pOld.IsEmpty())
return 0;
size_t nCount = 0;
{
// Limit span lifetime.
pdfium::span<char> search_span = m_pData->span();
while (true) {
std::optional<size_t> found = FX_strpos(search_span, pOld.span());
if (!found.has_value()) {
break;
}
nCount++;
search_span = search_span.subspan(found.value() + pOld.GetLength());
}
}
if (nCount == 0)
return 0;
size_t nNewLength =
m_pData->m_nDataLength + nCount * (pNew.GetLength() - pOld.GetLength());
if (nNewLength == 0) {
clear();
return nCount;
}
RetainPtr<StringData> pNewData = StringData::Create(nNewLength);
{
// Spans can't outlive the StringData buffers.
pdfium::span<const char> search_span = m_pData->span();
pdfium::span<char> dest_span = pNewData->span();
for (size_t i = 0; i < nCount; i++) {
size_t found = FX_strpos(search_span, pOld.span()).value();
dest_span = fxcrt::spancpy(dest_span, search_span.first(found));
dest_span = fxcrt::spancpy(dest_span, pNew.span());
search_span = search_span.subspan(found + pOld.GetLength());
}
dest_span = fxcrt::spancpy(dest_span, search_span);
CHECK(dest_span.empty());
}
m_pData = std::move(pNewData);
return nCount;
}
int ByteString::Compare(ByteStringView str) const {
if (!m_pData)
return str.IsEmpty() ? 0 : -1;
size_t this_len = m_pData->m_nDataLength;
size_t that_len = str.GetLength();
size_t min_len = std::min(this_len, that_len);
int result =
FXSYS_memcmp(m_pData->m_String, str.unterminated_c_str(), min_len);
if (result != 0)
return result;
if (this_len == that_len)
return 0;
return this_len < that_len ? -1 : 1;
}
void ByteString::Trim() {
TrimRight(kTrimChars);
TrimLeft(kTrimChars);
}
void ByteString::Trim(char target) {
ByteStringView targets(target);
TrimRight(targets);
TrimLeft(targets);
}
void ByteString::Trim(ByteStringView targets) {
TrimRight(targets);
TrimLeft(targets);
}
void ByteString::TrimLeft() {
TrimLeft(kTrimChars);
}
void ByteString::TrimLeft(char target) {
TrimLeft(ByteStringView(target));
}
void ByteString::TrimLeft(ByteStringView targets) {
if (!m_pData || targets.IsEmpty())
return;
size_t len = GetLength();
if (len == 0)
return;
size_t pos = 0;
while (pos < len) {
size_t i = 0;
while (i < targets.GetLength() && targets[i] != m_pData->m_String[pos])
i++;
if (i == targets.GetLength())
break;
pos++;
}
if (pos) {
ReallocBeforeWrite(len);
size_t nDataLength = len - pos;
FXSYS_memmove(m_pData->m_String, m_pData->m_String + pos,
(nDataLength + 1) * sizeof(char));
m_pData->m_nDataLength = nDataLength;
}
}
void ByteString::TrimRight() {
TrimRight(kTrimChars);
}
void ByteString::TrimRight(char target) {
TrimRight(ByteStringView(target));
}
void ByteString::TrimRight(ByteStringView targets) {
if (!m_pData || targets.IsEmpty())
return;
size_t pos = GetLength();
if (pos == 0)
return;
while (pos) {
size_t i = 0;
while (i < targets.GetLength() && targets[i] != m_pData->m_String[pos - 1])
i++;
if (i == targets.GetLength())
break;
pos--;
}
if (pos < m_pData->m_nDataLength) {
ReallocBeforeWrite(m_pData->m_nDataLength);
m_pData->m_String[pos] = 0;
m_pData->m_nDataLength = pos;
}
}
std::ostream& operator<<(std::ostream& os, const ByteString& str) {
return os.write(str.c_str(), str.GetLength());
}
std::ostream& operator<<(std::ostream& os, ByteStringView str) {
return os.write(str.unterminated_c_str(), str.GetLength());
}
} // namespace fxcrt
uint32_t FX_HashCode_GetA(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str)
dwHashCode = 31 * dwHashCode + c;
return dwHashCode;
}
uint32_t FX_HashCode_GetLoweredA(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str)
dwHashCode = 31 * dwHashCode + tolower(c);
return dwHashCode;
}
uint32_t FX_HashCode_GetAsIfW(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str)
dwHashCode = 1313 * dwHashCode + c;
return dwHashCode;
}
uint32_t FX_HashCode_GetLoweredAsIfW(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str)
dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
return dwHashCode;
}