blob: d1bd19570f4e2f8692d3943133ef57496ba54b41 [file] [log] [blame]
// Copyright 2014 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/bytestring.h"
#include <ctype.h>
#include <stddef.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <utility>
#include "core/fxcrt/check.h"
#include "core/fxcrt/check_op.h"
#include "core/fxcrt/compiler_specific.h"
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_memcpy_wrappers.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/fx_system.h"
#include "core/fxcrt/span.h"
#include "core/fxcrt/span_util.h"
#include "core/fxcrt/string_pool_template.h"
// Instantiate.
template class fxcrt::StringViewTemplate<char>;
template class fxcrt::StringPoolTemplate<ByteString>;
template struct std::hash<ByteString>;
namespace {
constexpr char kTrimChars[] = "\x09\x0a\x0b\x0c\x0d\x20";
} // namespace
namespace fxcrt {
static_assert(sizeof(ByteString) <= sizeof(char*),
"Strings must not require more space than pointers");
// static
ByteString ByteString::FormatInteger(int i) {
char buf[32];
FXSYS_snprintf(buf, sizeof(buf), "%d", i);
return ByteString(buf);
}
// static
// TODO(tsepez): Should be UNSAFE_BUFFER_USAGE.
ByteString ByteString::FormatV(const char* pFormat, va_list argList) {
va_list argListCopy;
va_copy(argListCopy, argList);
// SAFETY: required from caller.
int nMaxLen = UNSAFE_BUFFERS(vsnprintf(nullptr, 0, pFormat, argListCopy));
va_end(argListCopy);
if (nMaxLen <= 0) {
return ByteString();
}
ByteString ret;
{
// Span's lifetime must end before ReleaseBuffer() below.
pdfium::span<char> buf = ret.GetBuffer(nMaxLen);
// SAFETY: In the following two calls, there's always space in the buffer
// for a terminating NUL that's not included in nMaxLen, and hence not
// included in the span.
UNSAFE_BUFFERS(FXSYS_memset(buf.data(), 0, nMaxLen + 1));
va_copy(argListCopy, argList);
UNSAFE_TODO(vsnprintf(buf.data(), nMaxLen + 1, pFormat, argListCopy));
va_end(argListCopy);
}
ret.ReleaseBuffer(ret.GetStringLength());
return ret;
}
// static
ByteString ByteString::Format(const char* pFormat, ...) {
va_list argList;
va_start(argList, pFormat);
ByteString ret = FormatV(pFormat, argList);
va_end(argList);
return ret;
}
ByteString::ByteString(const char* pStr, size_t nLen) {
if (nLen) {
// SAFETY: caller ensures `pStr` points to at least `nLen` chars.
data_ = StringData::Create(UNSAFE_BUFFERS(pdfium::make_span(pStr, nLen)));
}
}
ByteString::ByteString(const uint8_t* pStr, size_t nLen)
// SAFETY: caller ensures `pStr` points to at least `nLen` chars.
: UNSAFE_BUFFERS(ByteString(reinterpret_cast<const char*>(pStr), nLen)) {}
ByteString::ByteString(char ch) {
data_ = StringData::Create(1);
data_->string_[0] = ch;
}
ByteString::ByteString(const char* ptr)
// SAFETY: caller ensures `ptr` is NUL-terminated.
: UNSAFE_BUFFERS(ByteString(ptr, ptr ? strlen(ptr) : 0)) {}
ByteString::ByteString(ByteStringView bstrc) {
if (!bstrc.IsEmpty()) {
data_ = StringData::Create(bstrc.span());
}
}
ByteString::ByteString(ByteStringView str1, ByteStringView str2) {
FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
nSafeLen += str2.GetLength();
size_t nNewLen = nSafeLen.ValueOrDie();
if (nNewLen == 0) {
return;
}
data_ = StringData::Create(nNewLen);
data_->CopyContents(str1.span());
data_->CopyContentsAt(str1.GetLength(), str2.span());
}
ByteString::ByteString(const std::initializer_list<ByteStringView>& list) {
FX_SAFE_SIZE_T nSafeLen = 0;
for (const auto& item : list) {
nSafeLen += item.GetLength();
}
size_t nNewLen = nSafeLen.ValueOrDie();
if (nNewLen == 0) {
return;
}
data_ = StringData::Create(nNewLen);
size_t nOffset = 0;
for (const auto& item : list) {
data_->CopyContentsAt(nOffset, item.span());
nOffset += item.GetLength();
}
}
ByteString::ByteString(const fxcrt::ostringstream& outStream) {
auto str = outStream.str();
if (!str.empty()) {
data_ = StringData::Create(pdfium::make_span(str));
}
}
// TODO(tsepez): Should be UNSAFE_BUFFER_USAGE.
ByteString& ByteString::operator=(const char* str) {
if (!str || !str[0]) {
clear();
} else {
// SAFETY: required from caller.
AssignCopy(str, UNSAFE_BUFFERS(strlen(str)));
}
return *this;
}
ByteString& ByteString::operator=(ByteStringView str) {
if (str.IsEmpty()) {
clear();
} else {
AssignCopy(str.unterminated_c_str(), str.GetLength());
}
return *this;
}
ByteString& ByteString::operator=(const ByteString& that) {
if (data_ != that.data_) {
data_ = that.data_;
}
return *this;
}
ByteString& ByteString::operator=(ByteString&& that) noexcept {
if (data_ != that.data_) {
data_ = std::move(that.data_);
}
return *this;
}
// TODO(tsepez): Should be UNSAFE_BUFFER_USAGE
ByteString& ByteString::operator+=(const char* str) {
if (str) {
// SAFETY: required from caller.
Concat(str, UNSAFE_BUFFERS(strlen(str)));
}
return *this;
}
ByteString& ByteString::operator+=(char ch) {
Concat(&ch, 1);
return *this;
}
ByteString& ByteString::operator+=(const ByteString& str) {
if (str.data_) {
Concat(str.data_->string_, str.data_->data_length_);
}
return *this;
}
ByteString& ByteString::operator+=(ByteStringView str) {
if (!str.IsEmpty()) {
Concat(str.unterminated_c_str(), str.GetLength());
}
return *this;
}
// TODO(tsepez): Should be UNSAFE_BUFFER_USAGE
bool ByteString::operator==(const char* ptr) const {
if (!data_) {
return !ptr || !ptr[0];
}
if (!ptr) {
return data_->data_length_ == 0;
}
// SAFETY: `data_length_` is within `string_`, and the strlen() call
// (whose own safety is required from the caller) ensures there are
// `data_length_` bytes at `ptr` before the terminator.
return UNSAFE_BUFFERS(strlen(ptr)) == data_->data_length_ &&
UNSAFE_BUFFERS(
FXSYS_memcmp(ptr, data_->string_, data_->data_length_)) == 0;
}
bool ByteString::operator==(ByteStringView str) const {
if (!data_) {
return str.IsEmpty();
}
// SAFETY: `str` has `GetLength()` valid bytes in `unterminated_c_str()`,
// `data_length_` is within `string_`, and equality comparison.
return data_->data_length_ == str.GetLength() &&
UNSAFE_BUFFERS(FXSYS_memcmp(data_->string_, str.unterminated_c_str(),
str.GetLength())) == 0;
}
bool ByteString::operator==(const ByteString& other) const {
if (data_ == other.data_) {
return true;
}
if (IsEmpty()) {
return other.IsEmpty();
}
if (other.IsEmpty()) {
return false;
}
// SAFETY: data_length_ describes the length of string_.
return other.data_->data_length_ == data_->data_length_ &&
UNSAFE_BUFFERS(memcmp(other.data_->string_, data_->string_,
data_->data_length_)) == 0;
}
// TODO(tsepez): Should be UNSAFE_BUFFER_USAGE.
bool ByteString::operator<(const char* ptr) const {
if (!data_ && !ptr) {
return false;
}
if (c_str() == ptr) {
return false;
}
// SAFETY: required from caller.
size_t other_len = ptr ? UNSAFE_BUFFERS(strlen(ptr)) : 0;
size_t len = GetLength();
// SAFETY: Comparison limited to minimum valid length of either argument.
int result =
UNSAFE_BUFFERS(FXSYS_memcmp(c_str(), ptr, std::min(len, other_len)));
return result < 0 || (result == 0 && len < other_len);
}
bool ByteString::operator<(ByteStringView str) const {
return Compare(str) < 0;
}
bool ByteString::operator<(const ByteString& other) const {
if (data_ == other.data_) {
return false;
}
size_t len = GetLength();
size_t other_len = other.GetLength();
// SAFETY: Comparison limited to minimum valid length of either argument.
int result = UNSAFE_BUFFERS(
FXSYS_memcmp(c_str(), other.c_str(), std::min(len, other_len)));
return result < 0 || (result == 0 && len < other_len);
}
bool ByteString::EqualNoCase(ByteStringView str) const {
if (!data_) {
return str.IsEmpty();
}
if (data_->data_length_ != str.GetLength()) {
return false;
}
pdfium::span<const uint8_t> this_span = pdfium::as_bytes(data_->span());
pdfium::span<const uint8_t> that_span = str.unsigned_span();
while (!this_span.empty()) {
uint8_t this_char = this_span.front();
uint8_t that_char = that_span.front();
if (this_char != that_char && tolower(this_char) != tolower(that_char)) {
return false;
}
this_span = this_span.subspan(1);
that_span = that_span.subspan(1);
}
return true;
}
intptr_t ByteString::ReferenceCountForTesting() const {
return data_ ? data_->refs_ : 0;
}
ByteString ByteString::Substr(size_t offset) const {
// Unsigned underflow is well-defined and out-of-range is handled by Substr().
return Substr(offset, GetLength() - offset);
}
ByteString ByteString::Substr(size_t first, size_t count) const {
if (!data_) {
return ByteString();
}
if (first == 0 && count == data_->data_length_) {
return *this;
}
return ByteString(AsStringView().Substr(first, count));
}
ByteString ByteString::First(size_t count) const {
return Substr(0, count);
}
ByteString ByteString::Last(size_t count) const {
// Unsigned underflow is well-defined and out-of-range is handled by Substr().
return Substr(GetLength() - count, count);
}
void ByteString::MakeLower() {
if (IsEmpty()) {
return;
}
ReallocBeforeWrite(data_->data_length_);
FXSYS_strlwr(data_->string_);
}
void ByteString::MakeUpper() {
if (IsEmpty()) {
return;
}
ReallocBeforeWrite(data_->data_length_);
FXSYS_strupr(data_->string_);
}
int ByteString::Compare(ByteStringView str) const {
if (!data_) {
return str.IsEmpty() ? 0 : -1;
}
size_t this_len = data_->data_length_;
size_t that_len = str.GetLength();
size_t min_len = std::min(this_len, that_len);
// SAFETY: Comparison limited to minimum valid length of either argument.
int result = UNSAFE_BUFFERS(
FXSYS_memcmp(data_->string_, str.unterminated_c_str(), min_len));
if (result != 0) {
return result;
}
if (this_len == that_len) {
return 0;
}
return this_len < that_len ? -1 : 1;
}
void ByteString::TrimWhitespace() {
TrimWhitespaceBack();
TrimWhitespaceFront();
}
void ByteString::TrimWhitespaceFront() {
TrimFront(kTrimChars);
}
void ByteString::TrimWhitespaceBack() {
TrimBack(kTrimChars);
}
std::ostream& operator<<(std::ostream& os, const ByteString& str) {
return os.write(str.c_str(), str.GetLength());
}
std::ostream& operator<<(std::ostream& os, ByteStringView str) {
return os.write(str.unterminated_c_str(), str.GetLength());
}
} // namespace fxcrt
uint32_t FX_HashCode_GetA(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str) {
dwHashCode = 31 * dwHashCode + c;
}
return dwHashCode;
}
uint32_t FX_HashCode_GetLoweredA(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str) {
dwHashCode = 31 * dwHashCode + tolower(c);
}
return dwHashCode;
}
uint32_t FX_HashCode_GetAsIfW(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str) {
dwHashCode = 1313 * dwHashCode + c;
}
return dwHashCode;
}
uint32_t FX_HashCode_GetLoweredAsIfW(ByteStringView str) {
uint32_t dwHashCode = 0;
for (ByteStringView::UnsignedType c : str) {
dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
}
return dwHashCode;
}