blob: b66605ac185f71adb03a8d50a5e3b8ecce4bd51f [file] [log] [blame]
// Copyright 2017 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/cfx_utf8decoder.h"
#include <stdint.h>
#include <utility>
#include "build/build_config.h"
#include "core/fxcrt/string_view_template.h"
#include "core/fxcrt/utf16.h"
#include "core/fxcrt/widestring.h"
CFX_UTF8Decoder::CFX_UTF8Decoder(ByteStringView input) {
int remaining = 0;
char32_t code_point = 0;
for (char byte : input) {
uint8_t code_unit = static_cast<uint8_t>(byte);
if (code_unit < 0x80) {
remaining = 0;
AppendCodePoint(code_unit);
} else if (code_unit < 0xc0) {
if (remaining > 0) {
--remaining;
code_point = (code_point << 6) | (code_unit & 0x3f);
if (remaining == 0) {
AppendCodePoint(code_point);
}
}
} else if (code_unit < 0xe0) {
remaining = 1;
code_point = code_unit & 0x1f;
} else if (code_unit < 0xf0) {
remaining = 2;
code_point = code_unit & 0x0f;
} else if (code_unit < 0xf8) {
remaining = 3;
code_point = code_unit & 0x07;
} else {
remaining = 0;
}
}
}
CFX_UTF8Decoder::~CFX_UTF8Decoder() = default;
WideString CFX_UTF8Decoder::TakeResult() {
return std::move(buffer_);
}
void CFX_UTF8Decoder::AppendCodePoint(char32_t code_point) {
if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
// Invalid code point above U+10FFFF.
return;
}
#if defined(WCHAR_T_IS_UTF16)
if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
buffer_ += static_cast<wchar_t>(code_point);
} else {
// Encode as UTF-16 surrogate pair.
pdfium::SurrogatePair surrogate_pair(code_point);
buffer_ += surrogate_pair.high();
buffer_ += surrogate_pair.low();
}
#else
buffer_ += static_cast<wchar_t>(code_point);
#endif // defined(WCHAR_T_IS_UTF16)
}