blob: 9940c9ae96dbe685f8f449d82d74e853993d033f [file] [log] [blame]
// Copyright 2016 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/page/cpdf_textobject.h"
#include <algorithm>
#include "core/fpdfapi/font/cpdf_cidfont.h"
#include "core/fpdfapi/font/cpdf_font.h"
#include "core/fxcrt/check.h"
#include "core/fxcrt/fx_coordinates.h"
#include "core/fxcrt/span.h"
#include "core/fxcrt/span_util.h"
#define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
namespace {
bool IsVertWritingCIDFont(const CPDF_CIDFont* font) {
return font && font->IsVertWriting();
}
float GetWordSpaceIfNeeded(const CPDF_CIDFont* cid_font,
uint32_t char_code,
float word_space) {
if (char_code == ' ' && (!cid_font || cid_font->GetCharSize(' ') == 1)) {
return word_space;
}
return 0;
}
} // namespace
CPDF_TextObject::Item::Item() = default;
CPDF_TextObject::Item::Item(const Item& that) = default;
CPDF_TextObject::Item::~Item() = default;
CPDF_TextObject::CPDF_TextObject(int32_t content_stream)
: CPDF_PageObject(content_stream) {}
CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {}
CPDF_TextObject::~CPDF_TextObject() = default;
size_t CPDF_TextObject::CountItems() const {
return char_codes_.size();
}
CPDF_TextObject::Item CPDF_TextObject::GetItemInfo(size_t index) const {
DCHECK(index < char_codes_.size());
Item info;
info.char_code_ = char_codes_[index];
info.origin_ = CFX_PointF(char_positions_[index], 0);
RetainPtr<CPDF_Font> font = GetFont();
const CPDF_CIDFont* cid_font = font->AsCIDFont();
if (!IsVertWritingCIDFont(cid_font)) {
return info;
}
uint16_t cid = cid_font->CIDFromCharCode(info.char_code_);
info.origin_ = CFX_PointF(0, info.origin_.x);
CFX_Point16 vertical_origin = cid_font->GetVertOrigin(cid);
float font_size = GetFontSize();
info.origin_.x -= font_size * vertical_origin.x / 1000;
info.origin_.y -= font_size * vertical_origin.y / 1000;
return info;
}
uint32_t CPDF_TextObject::GetCharCode(size_t index) const {
return index < char_codes_.size() ? char_codes_[index]
: CPDF_Font::kInvalidCharCode;
}
CPDF_TextObject::Item CPDF_TextObject::GetCharInfo(size_t index) const {
return index < char_codes_.size() ? GetItemInfo(index) : Item();
}
int CPDF_TextObject::CountWords() const {
RetainPtr<CPDF_Font> font = GetFont();
bool is_in_latin_word = false;
int word_count = 0;
for (uint32_t char_code : char_codes_) {
WideString unicode_str = font->UnicodeFromCharCode(char_code);
uint16_t unicode = 0;
if (unicode_str.GetLength() > 0) {
unicode = unicode_str[0];
}
bool is_latin = ISLATINWORD(unicode);
if (is_latin && is_in_latin_word) {
continue;
}
is_in_latin_word = is_latin;
if (unicode != 0x20) {
word_count++;
}
}
return word_count;
}
WideString CPDF_TextObject::GetWordString(int word_index) const {
RetainPtr<CPDF_Font> font = GetFont();
WideString result_str;
int word_count = 0;
bool is_in_latin_word = false;
for (uint32_t char_code : char_codes_) {
WideString unicode_str = font->UnicodeFromCharCode(char_code);
uint16_t unicode = 0;
if (unicode_str.GetLength() > 0) {
unicode = unicode_str[0];
}
bool is_latin = ISLATINWORD(unicode);
if (!is_latin || !is_in_latin_word) {
is_in_latin_word = is_latin;
if (unicode != 0x20) {
word_count++;
}
}
if (word_count - 1 == word_index) {
result_str += unicode;
}
}
return result_str;
}
std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
auto obj = std::make_unique<CPDF_TextObject>();
obj->CopyData(this);
obj->char_codes_ = char_codes_;
obj->char_kernings_ = char_kernings_;
obj->char_positions_ = char_positions_;
obj->pos_ = pos_;
return obj;
}
CPDF_PageObject::Type CPDF_TextObject::GetType() const {
return Type::kText;
}
void CPDF_TextObject::Transform(const CFX_Matrix& matrix) {
SetTextMatrix(GetTextMatrix() * matrix);
SetDirty(true);
}
bool CPDF_TextObject::IsText() const {
return true;
}
CPDF_TextObject* CPDF_TextObject::AsText() {
return this;
}
const CPDF_TextObject* CPDF_TextObject::AsText() const {
return this;
}
CFX_Matrix CPDF_TextObject::GetTextMatrix() const {
pdfium::span<const float> text_matrix = text_state().GetMatrix();
return CFX_Matrix(text_matrix[0], text_matrix[2], text_matrix[1],
text_matrix[3], pos_.x, pos_.y);
}
void CPDF_TextObject::SetTextMatrix(const CFX_Matrix& matrix) {
pdfium::span<float> text_matrix = mutable_text_state().GetMutableMatrix();
text_matrix[0] = matrix.a;
text_matrix[1] = matrix.c;
text_matrix[2] = matrix.b;
text_matrix[3] = matrix.d;
pos_ = CFX_PointF(matrix.e, matrix.f);
CalcPositionDataInternal(GetFont());
}
void CPDF_TextObject::SetSegments(pdfium::span<const ByteString> strings,
pdfium::span<const float> kernings) {
size_t segments_count = strings.size();
CHECK(segments_count);
char_codes_.clear();
char_kernings_.clear();
char_positions_.clear();
RetainPtr<CPDF_Font> font = GetFont();
size_t char_count = 0;
for (const auto& str : strings) {
char_count += font->CountChar(str.AsStringView());
}
CHECK(char_count);
char_codes_.resize(char_count);
char_kernings_.resize(char_count);
char_positions_.resize(char_count);
size_t index = 0;
for (size_t i = 0; i < segments_count; ++i) {
ByteStringView segment = strings[i].AsStringView();
size_t offset = 0;
while (offset < segment.GetLength()) {
DCHECK(index < char_codes_.size());
char_codes_[index++] = font->GetNextChar(segment, &offset);
}
if (i != segments_count - 1) {
char_kernings_[index - 1] = kernings[i];
}
}
}
void CPDF_TextObject::SetText(const ByteString& str) {
SetSegments(pdfium::span_from_ref(str), pdfium::span<float>());
CalcPositionDataInternal(GetFont());
SetDirty(true);
}
float CPDF_TextObject::GetCharWidth(uint32_t char_code) const {
const float font_size = GetFontSize() / 1000;
RetainPtr<CPDF_Font> font = GetFont();
const CPDF_CIDFont* cid_font = font->AsCIDFont();
if (!IsVertWritingCIDFont(cid_font)) {
return font->GetCharWidth(char_code) * font_size;
}
uint16_t cid = cid_font->CIDFromCharCode(char_code);
return cid_font->GetVertWidth(cid) * font_size;
}
RetainPtr<CPDF_Font> CPDF_TextObject::GetFont() const {
return text_state().GetFont();
}
float CPDF_TextObject::GetFontSize() const {
return text_state().GetFontSize();
}
TextRenderingMode CPDF_TextObject::GetTextRenderMode() const {
return text_state().GetTextMode();
}
void CPDF_TextObject::SetTextRenderMode(TextRenderingMode mode) {
mutable_text_state().SetTextMode(mode);
SetDirty(true);
}
CFX_PointF CPDF_TextObject::CalcPositionData(float horz_scale) {
RetainPtr<CPDF_Font> font = GetFont();
const float current_position = CalcPositionDataInternal(font);
if (IsVertWritingCIDFont(font->AsCIDFont())) {
return {0, current_position};
}
return {current_position * horz_scale, 0};
}
float CPDF_TextObject::CalcPositionDataInternal(
const RetainPtr<CPDF_Font>& font) {
float current_position = 0;
float min_x = 10000.0f;
float max_x = -10000.0f;
float min_y = 10000.0f;
float max_y = -10000.0f;
const CPDF_CIDFont* cid_font = font->AsCIDFont();
const bool is_vertical_writing = IsVertWritingCIDFont(cid_font);
const float font_size = GetFontSize();
const float char_space = text_state().GetCharSpace();
const float word_space = text_state().GetWordSpace();
for (size_t i = 0; i < char_codes_.size(); ++i) {
const uint32_t char_code = char_codes_[i];
char_positions_[i] = current_position;
FX_RECT char_rect = font->GetCharBBox(char_code);
float char_width;
if (is_vertical_writing) {
uint16_t cid = cid_font->CIDFromCharCode(char_code);
CFX_Point16 vertical_origin = cid_font->GetVertOrigin(cid);
char_rect.Offset(-vertical_origin.x, -vertical_origin.y);
min_x = std::min({min_x, static_cast<float>(char_rect.left),
static_cast<float>(char_rect.right)});
max_x = std::max({max_x, static_cast<float>(char_rect.left),
static_cast<float>(char_rect.right)});
const float char_top =
current_position + char_rect.top * font_size / 1000;
const float char_bottom =
current_position + char_rect.bottom * font_size / 1000;
min_y = std::min({min_y, char_top, char_bottom});
max_y = std::max({max_y, char_top, char_bottom});
char_width = cid_font->GetVertWidth(cid) * font_size / 1000;
} else {
min_y = std::min({min_y, static_cast<float>(char_rect.top),
static_cast<float>(char_rect.bottom)});
max_y = std::max({max_y, static_cast<float>(char_rect.top),
static_cast<float>(char_rect.bottom)});
const float char_left =
current_position + char_rect.left * font_size / 1000;
const float char_right =
current_position + char_rect.right * font_size / 1000;
min_x = std::min({min_x, char_left, char_right});
max_x = std::max({max_x, char_left, char_right});
char_width = font->GetCharWidth(char_code) * font_size / 1000;
}
current_position += char_width;
current_position += GetWordSpaceIfNeeded(cid_font, char_code, word_space);
current_position += char_space;
current_position -= (char_kernings_[i] * font_size) / 1000;
}
if (is_vertical_writing) {
min_x = min_x * font_size / 1000;
max_x = max_x * font_size / 1000;
} else {
min_y = min_y * font_size / 1000;
max_y = max_y * font_size / 1000;
}
SetOriginalRect(CFX_FloatRect(min_x, min_y, max_x, max_y));
CFX_FloatRect rect = GetTextMatrix().TransformRect(GetOriginalRect());
if (TextRenderingModeIsStrokeMode(text_state().GetTextMode())) {
// TODO(crbug.com/42270854): Does the original rect need a similar
// adjustment?
const float half_width = graph_state().GetLineWidth() / 2;
rect.Inflate(half_width, half_width);
}
SetRect(rect);
return current_position;
}