blob: e6f018c8f1f324bf80138ba1def22ebdb71c3cba [file] [log] [blame]
// Copyright 2014 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/font/cpdf_cmapparser.h"
#include <array>
#include <iterator>
#include "core/fpdfapi/cmaps/fpdf_cmaps.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_simple_parser.h"
#include "core/fxcrt/check.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
namespace {
ByteStringView CMap_GetString(ByteStringView word) {
if (word.GetLength() <= 2) {
return ByteStringView();
}
return word.Last(word.GetLength() - 2);
}
} // namespace
CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap) : cmap_(pCMap) {}
CPDF_CMapParser::~CPDF_CMapParser() {
cmap_->SetAdditionalMappings(std::move(additional_charcode_to_cidmappings_));
cmap_->SetMixedFourByteLeadingRanges(std::move(ranges_));
}
void CPDF_CMapParser::ParseWord(ByteStringView word) {
DCHECK(!word.IsEmpty());
if (word == "begincidchar") {
status_ = kProcessingCidChar;
code_seq_ = 0;
} else if (word == "begincidrange") {
status_ = kProcessingCidRange;
code_seq_ = 0;
} else if (word == "endcidrange" || word == "endcidchar") {
status_ = kStart;
} else if (word == "/WMode") {
status_ = kProcessingWMode;
} else if (word == "/Registry") {
status_ = kProcessingRegistry;
} else if (word == "/Ordering") {
status_ = kProcessingOrdering;
} else if (word == "/Supplement") {
status_ = kProcessingSupplement;
} else if (word == "begincodespacerange") {
status_ = kProcessingCodeSpaceRange;
code_seq_ = 0;
} else if (word == "usecmap") {
} else if (status_ == kProcessingCidChar) {
HandleCid(word);
} else if (status_ == kProcessingCidRange) {
HandleCid(word);
} else if (status_ == kProcessingRegistry) {
status_ = kStart;
} else if (status_ == kProcessingOrdering) {
cmap_->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
status_ = kStart;
} else if (status_ == kProcessingSupplement) {
status_ = kStart;
} else if (status_ == kProcessingWMode) {
cmap_->SetVertical(GetCode(word) != 0);
status_ = kStart;
} else if (status_ == kProcessingCodeSpaceRange) {
HandleCodeSpaceRange(word);
}
last_word_ = word;
}
void CPDF_CMapParser::HandleCid(ByteStringView word) {
DCHECK(status_ == kProcessingCidChar || status_ == kProcessingCidRange);
bool bChar = status_ == kProcessingCidChar;
code_points_[code_seq_] = GetCode(word);
code_seq_++;
int nRequiredCodePoints = bChar ? 2 : 3;
if (code_seq_ < nRequiredCodePoints) {
return;
}
uint32_t StartCode = code_points_[0];
uint32_t EndCode;
uint16_t StartCID;
if (bChar) {
EndCode = StartCode;
StartCID = static_cast<uint16_t>(code_points_[1]);
} else {
EndCode = code_points_[1];
StartCID = static_cast<uint16_t>(code_points_[2]);
}
if (EndCode < CPDF_CMap::kDirectMapTableSize) {
cmap_->SetDirectCharcodeToCIDTableRange(StartCode, EndCode, StartCID);
} else {
additional_charcode_to_cidmappings_.push_back(
{StartCode, EndCode, StartCID});
}
code_seq_ = 0;
}
void CPDF_CMapParser::HandleCodeSpaceRange(ByteStringView word) {
if (word != "endcodespacerange") {
if (word.IsEmpty() || word[0] != '<') {
return;
}
if (code_seq_ % 2) {
std::optional<CPDF_CMap::CodeRange> range =
GetCodeRange(last_word_.AsStringView(), word);
if (range.has_value()) {
pending_ranges_.push_back(range.value());
}
}
code_seq_++;
return;
}
size_t nSegs = ranges_.size() + pending_ranges_.size();
if (nSegs == 1) {
const auto& first_range =
!ranges_.empty() ? ranges_[0] : pending_ranges_[0];
cmap_->SetCodingScheme(first_range.char_size_ == 2 ? CPDF_CMap::TwoBytes
: CPDF_CMap::OneByte);
} else if (nSegs > 1) {
cmap_->SetCodingScheme(CPDF_CMap::MixedFourBytes);
ranges_.reserve(nSegs);
std::move(pending_ranges_.begin(), pending_ranges_.end(),
std::back_inserter(ranges_));
pending_ranges_.clear();
}
status_ = kStart;
}
// static
uint32_t CPDF_CMapParser::GetCode(ByteStringView word) {
if (word.IsEmpty()) {
return 0;
}
FX_SAFE_UINT32 num = 0;
if (word[0] == '<') {
for (size_t i = 1; i < word.GetLength() && FXSYS_IsHexDigit(word[i]); ++i) {
num = num * 16 + FXSYS_HexCharToInt(word[i]);
if (!num.IsValid()) {
return 0;
}
}
return num.ValueOrDie();
}
for (size_t i = 0;
i < word.GetLength() && FXSYS_IsDecimalDigit(word.CharAt(i)); ++i) {
num = num * 10 + FXSYS_DecimalCharToInt(word.CharAt(i));
if (!num.IsValid()) {
return 0;
}
}
return num.ValueOrDie();
}
// static
std::optional<CPDF_CMap::CodeRange> CPDF_CMapParser::GetCodeRange(
ByteStringView first,
ByteStringView second) {
if (first.IsEmpty() || first[0] != '<') {
return std::nullopt;
}
size_t i;
for (i = 1; i < first.GetLength(); ++i) {
if (first[i] == '>') {
break;
}
}
size_t char_size = (i - 1) / 2;
if (char_size > 4) {
return std::nullopt;
}
CPDF_CMap::CodeRange range;
range.char_size_ = char_size;
for (i = 0; i < range.char_size_; ++i) {
uint8_t digit1 = first[i * 2 + 1];
uint8_t digit2 = first[i * 2 + 2];
range.lower_[i] =
FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
}
size_t size = second.GetLength();
for (i = 0; i < range.char_size_; ++i) {
size_t i1 = i * 2 + 1;
size_t i2 = i1 + 1;
uint8_t digit1 = i1 < size ? second[i1] : '0';
uint8_t digit2 = i2 < size ? second[i2] : '0';
range.upper_[i] =
FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
}
return range;
}
// static
CIDSet CPDF_CMapParser::CharsetFromOrdering(ByteStringView ordering) {
static const std::array<const char*, CIDSET_NUM_SETS> kCharsetNames = {
{nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"}};
for (size_t charset = 1; charset < std::size(kCharsetNames); ++charset) {
if (ordering == kCharsetNames[charset]) {
return static_cast<CIDSet>(charset);
}
}
return CIDSET_UNKNOWN;
}