blob: e0b7ee54dffd4382d044e15bf867f045705d3ea1 [file] [log] [blame]
// Copyright 2018 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/cfx_utf8encoder.h"
#include <stdint.h>
#include "build/build_config.h"
CFX_UTF8Encoder::CFX_UTF8Encoder() = default;
CFX_UTF8Encoder::~CFX_UTF8Encoder() = default;
void CFX_UTF8Encoder::Input(wchar_t code_unit) {
#if defined(WCHAR_T_IS_UTF16)
if (code_unit >= 0xd800 && code_unit < 0xdc00) {
// High surrogate.
high_surrogate_ = code_unit;
} else if (code_unit >= 0xdc00 && code_unit <= 0xdfff) {
// Low surrogate.
if (high_surrogate_) {
char32_t code_point = code_unit & 0x3ff;
code_point |= (high_surrogate_ & 0x3ff) << 10;
code_point += 0x10000;
high_surrogate_ = 0;
AppendCodePoint(code_point);
}
} else {
high_surrogate_ = 0;
AppendCodePoint(code_unit);
}
#else
AppendCodePoint(code_unit);
#endif // defined(WCHAR_T_IS_UTF16)
}
void CFX_UTF8Encoder::AppendCodePoint(char32_t code_point) {
if (code_point > 0x10ffff) {
// Invalid code point above U+10FFFF.
return;
}
if (code_point < 0x80) {
// 7-bit code points are unchanged in UTF-8.
buffer_.push_back(code_point);
return;
}
int byte_size;
if (code_point < 0x800) {
byte_size = 2;
} else if (code_point < 0x10000) {
byte_size = 3;
} else {
byte_size = 4;
}
static constexpr uint8_t kPrefix[] = {0xc0, 0xe0, 0xf0};
int order = 1 << ((byte_size - 1) * 6);
buffer_.push_back(kPrefix[byte_size - 2] | (code_point / order));
for (int i = 0; i < byte_size - 1; i++) {
code_point = code_point % order;
order >>= 6;
buffer_.push_back(0x80 | (code_point / order));
}
}