blob: a36f0724e52a975d462dff63b662ed7d9053cf68 [file] [log] [blame]
// Copyright 2017 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/cfx_utf8decoder.h"
#include <stdint.h>
#include <utility>
#include "build/build_config.h"
#include "core/fxcrt/string_view_template.h"
#include "core/fxcrt/widestring.h"
CFX_UTF8Decoder::CFX_UTF8Decoder(ByteStringView input) {
int remaining = 0;
char32_t code_point = 0;
for (char byte : input) {
uint8_t code_unit = static_cast<uint8_t>(byte);
if (code_unit < 0x80) {
remaining = 0;
AppendCodePoint(code_unit);
} else if (code_unit < 0xc0) {
if (remaining > 0) {
--remaining;
code_point = (code_point << 6) | (code_unit & 0x3f);
if (remaining == 0) {
AppendCodePoint(code_point);
}
}
} else if (code_unit < 0xe0) {
remaining = 1;
code_point = code_unit & 0x1f;
} else if (code_unit < 0xf0) {
remaining = 2;
code_point = code_unit & 0x0f;
} else if (code_unit < 0xf8) {
remaining = 3;
code_point = code_unit & 0x07;
} else {
remaining = 0;
}
}
}
CFX_UTF8Decoder::~CFX_UTF8Decoder() = default;
WideString CFX_UTF8Decoder::TakeResult() {
return std::move(buffer_);
}
void CFX_UTF8Decoder::AppendCodePoint(char32_t code_point) {
if (code_point > 0x10ffff) {
// Invalid code point above U+10FFFF.
return;
}
#if defined(WCHAR_T_IS_UTF16)
if (code_point < 0x10000) {
buffer_ += static_cast<wchar_t>(code_point);
} else {
// Encode as UTF-16 surrogate pair.
code_point -= 0x10000;
buffer_ += 0xd800 | (code_point >> 10);
buffer_ += 0xdc00 | (code_point & 0x3ff);
}
#else
buffer_ += static_cast<wchar_t>(code_point);
#endif // defined(WCHAR_T_IS_UTF16)
}