blob: aa69686b02d49c1207a204cd0a5931ce9bb06c8b [file] [log] [blame]
// Copyright 2018 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/cfx_utf8encoder.h"
#include <stdint.h>
#include <utility>
#include "core/fxcrt/bytestring.h"
#include "core/fxcrt/code_point_view.h"
#include "core/fxcrt/string_view_template.h"
#include "core/fxcrt/utf16.h"
CFX_UTF8Encoder::CFX_UTF8Encoder(WideStringView input) {
for (char32_t code_point : pdfium::CodePointView(input)) {
AppendCodePoint(code_point);
}
}
CFX_UTF8Encoder::~CFX_UTF8Encoder() = default;
ByteString CFX_UTF8Encoder::TakeResult() {
return std::move(buffer_);
}
void CFX_UTF8Encoder::AppendCodePoint(char32_t code_point) {
if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
// Invalid code point above U+10FFFF.
return;
}
if (code_point < 0x80) {
// 7-bit code points are unchanged in UTF-8.
buffer_ += code_point;
return;
}
int byte_size;
if (code_point < 0x800) {
byte_size = 2;
} else if (code_point < 0x10000) {
byte_size = 3;
} else {
byte_size = 4;
}
static constexpr uint8_t kPrefix[] = {0xc0, 0xe0, 0xf0};
int order = 1 << ((byte_size - 1) * 6);
buffer_ += kPrefix[byte_size - 2] | (code_point / order);
for (int i = 0; i < byte_size - 1; i++) {
code_point = code_point % order;
order >>= 6;
buffer_ += 0x80 | (code_point / order);
}
}