blob: e7d6e1b314a06156b5f706419145374d8a3dc835 [file] [log] [blame] [edit]
// Copyright 2017 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/fx_string.h"
#include <stdint.h>
#include <iterator>
#include "build/build_config.h"
#include "core/fxcrt/bytestring.h"
#include "core/fxcrt/code_point_view.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/span_util.h"
#include "core/fxcrt/string_view_template.h"
#include "core/fxcrt/utf16.h"
#include "core/fxcrt/widestring.h"
#include "third_party/base/compiler_specific.h"
#include "third_party/base/span.h"
namespace {
// Appends a Unicode code point to a `ByteString` using UTF-8.
//
// TODO(crbug.com/pdfium/2041): Migrate to `ByteString`.
void AppendCodePointToByteString(char32_t code_point, ByteString& buffer) {
if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
// Invalid code point above U+10FFFF.
return;
}
if (code_point < 0x80) {
// 7-bit code points are unchanged in UTF-8.
buffer += code_point;
return;
}
int byte_size;
if (code_point < 0x800) {
byte_size = 2;
} else if (code_point < 0x10000) {
byte_size = 3;
} else {
byte_size = 4;
}
static constexpr uint8_t kPrefix[] = {0xc0, 0xe0, 0xf0};
int order = 1 << ((byte_size - 1) * 6);
buffer += kPrefix[byte_size - 2] | (code_point / order);
for (int i = 0; i < byte_size - 1; i++) {
code_point = code_point % order;
order >>= 6;
buffer += 0x80 | (code_point / order);
}
}
// Appends a Unicode code point to a `WideString` using either UTF-16 or UTF-32,
// depending on the platform's definition of `wchar_t`.
//
// TODO(crbug.com/pdfium/2031): Always use UTF-16.
// TODO(crbug.com/pdfium/2041): Migrate to `WideString`.
void AppendCodePointToWideString(char32_t code_point, WideString& buffer) {
if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
// Invalid code point above U+10FFFF.
return;
}
#if defined(WCHAR_T_IS_UTF16)
if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
buffer += static_cast<wchar_t>(code_point);
} else {
// Encode as UTF-16 surrogate pair.
pdfium::SurrogatePair surrogate_pair(code_point);
buffer += surrogate_pair.high();
buffer += surrogate_pair.low();
}
#else
buffer += static_cast<wchar_t>(code_point);
#endif // defined(WCHAR_T_IS_UTF16)
}
} // namespace
ByteString FX_UTF8Encode(WideStringView wsStr) {
ByteString buffer;
for (char32_t code_point : pdfium::CodePointView(wsStr)) {
AppendCodePointToByteString(code_point, buffer);
}
return buffer;
}
WideString FX_UTF8Decode(ByteStringView bsStr) {
WideString buffer;
int remaining = 0;
char32_t code_point = 0;
for (char byte : bsStr) {
uint8_t code_unit = static_cast<uint8_t>(byte);
if (code_unit < 0x80) {
remaining = 0;
AppendCodePointToWideString(code_unit, buffer);
} else if (code_unit < 0xc0) {
if (remaining > 0) {
--remaining;
code_point = (code_point << 6) | (code_unit & 0x3f);
if (remaining == 0) {
AppendCodePointToWideString(code_point, buffer);
}
}
} else if (code_unit < 0xe0) {
remaining = 1;
code_point = code_unit & 0x1f;
} else if (code_unit < 0xf0) {
remaining = 2;
code_point = code_unit & 0x0f;
} else if (code_unit < 0xf8) {
remaining = 3;
code_point = code_unit & 0x07;
} else {
remaining = 0;
}
}
return buffer;
}
namespace {
constexpr float kFractionScalesFloat[] = {
0.1f, 0.01f, 0.001f, 0.0001f,
0.00001f, 0.000001f, 0.0000001f, 0.00000001f,
0.000000001f, 0.0000000001f, 0.00000000001f};
const double kFractionScalesDouble[] = {
0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001,
0.0000001, 0.00000001, 0.000000001, 0.0000000001, 0.00000000001};
template <class T>
T StringTo(ByteStringView strc, pdfium::span<const T> fractional_scales) {
if (strc.IsEmpty())
return 0;
bool bNegative = false;
size_t cc = 0;
size_t len = strc.GetLength();
if (strc[0] == '+') {
cc++;
} else if (strc[0] == '-') {
bNegative = true;
cc++;
}
while (cc < len) {
if (strc[cc] != '+' && strc[cc] != '-')
break;
cc++;
}
T value = 0;
while (cc < len) {
if (strc[cc] == '.')
break;
value = value * 10 + FXSYS_DecimalCharToInt(strc.CharAt(cc));
cc++;
}
size_t scale = 0;
if (cc < len && strc[cc] == '.') {
cc++;
while (cc < len) {
value +=
fractional_scales[scale] * FXSYS_DecimalCharToInt(strc.CharAt(cc));
scale++;
if (scale == fractional_scales.size())
break;
cc++;
}
}
return bNegative ? -value : value;
}
template <class T>
size_t ToString(T value, int (*round_func)(T), pdfium::span<char> buf) {
buf[0] = '0';
buf[1] = '\0';
if (value == 0) {
return 1;
}
bool bNegative = false;
if (value < 0) {
bNegative = true;
value = -value;
}
int scale = 1;
int scaled = round_func(value);
while (scaled < 100000) {
if (scale == 1000000) {
break;
}
scale *= 10;
scaled = round_func(value * scale);
}
if (scaled == 0) {
return 1;
}
char buf2[32];
size_t buf_size = 0;
if (bNegative) {
buf[buf_size++] = '-';
}
int i = scaled / scale;
FXSYS_itoa(i, buf2, 10);
size_t len = strlen(buf2);
fxcrt::spancpy(buf.subspan(buf_size), pdfium::make_span(buf2).first(len));
buf_size += len;
int fraction = scaled % scale;
if (fraction == 0) {
return buf_size;
}
buf[buf_size++] = '.';
scale /= 10;
while (fraction) {
buf[buf_size++] = '0' + fraction / scale;
fraction %= scale;
scale /= 10;
}
return buf_size;
}
} // namespace
float StringToFloat(ByteStringView strc) {
return StringTo<float>(strc, kFractionScalesFloat);
}
float StringToFloat(WideStringView wsStr) {
return StringToFloat(FX_UTF8Encode(wsStr).AsStringView());
}
size_t FloatToString(float f, pdfium::span<char> buf) {
return ToString<float>(f, FXSYS_roundf, buf);
}
double StringToDouble(ByteStringView strc) {
return StringTo<double>(strc, kFractionScalesDouble);
}
double StringToDouble(WideStringView wsStr) {
return StringToDouble(FX_UTF8Encode(wsStr).AsStringView());
}
size_t DoubleToString(double d, pdfium::span<char> buf) {
return ToString<double>(d, FXSYS_round, buf);
}
namespace fxcrt {
template std::vector<ByteString> Split<ByteString>(const ByteString& that,
ByteString::CharType ch);
template std::vector<WideString> Split<WideString>(const WideString& that,
WideString::CharType ch);
} // namespace fxcrt