blob: 551173162b5e28cdc964c48e94a0892417cb591e [file] [log] [blame]
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfdoc/ctypeset.h"
#include <algorithm>
#include "core/fpdfdoc/cline.h"
#include "core/fpdfdoc/cpdf_variabletext.h"
#include "core/fpdfdoc/cpvt_wordinfo.h"
#include "core/fpdfdoc/csection.h"
#include "third_party/base/stl_util.h"
namespace {
const uint8_t special_chars[128] = {
0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
};
bool IsLatin(uint16_t word) {
if (word <= 0x007F)
return !!(special_chars[word] & 0x01);
return ((word >= 0x00C0 && word <= 0x00FF) ||
(word >= 0x0100 && word <= 0x024F) ||
(word >= 0x1E00 && word <= 0x1EFF) ||
(word >= 0x2C60 && word <= 0x2C7F) ||
(word >= 0xA720 && word <= 0xA7FF) ||
(word >= 0xFF21 && word <= 0xFF3A) ||
(word >= 0xFF41 && word <= 0xFF5A));
}
bool IsDigit(uint32_t word) {
return word >= 0x0030 && word <= 0x0039;
}
bool IsCJK(uint32_t word) {
if ((word >= 0x1100 && word <= 0x11FF) ||
(word >= 0x2E80 && word <= 0x2FFF) ||
(word >= 0x3040 && word <= 0x9FBF) ||
(word >= 0xAC00 && word <= 0xD7AF) ||
(word >= 0xF900 && word <= 0xFAFF) ||
(word >= 0xFE30 && word <= 0xFE4F) ||
(word >= 0x20000 && word <= 0x2A6DF) ||
(word >= 0x2F800 && word <= 0x2FA1F)) {
return true;
}
if (word >= 0x3000 && word <= 0x303F) {
return (
word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
}
return word >= 0xFF66 && word <= 0xFF9D;
}
bool IsPunctuation(uint32_t word) {
if (word <= 0x007F)
return !!(special_chars[word] & 0x08);
if (word >= 0x0080 && word <= 0x00FF) {
return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
word == 0x0091 || word == 0x0092 || word == 0x0093 ||
word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
word == 0x00B8);
}
if (word >= 0x2000 && word <= 0x206F) {
return (
word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
word == 0x203E || word == 0x2044);
}
if (word >= 0x3000 && word <= 0x303F) {
return (
word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
}
if (word >= 0xFE50 && word <= 0xFE6F)
return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
if (word >= 0xFF00 && word <= 0xFFEF) {
return (
word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
}
return false;
}
bool IsConnectiveSymbol(uint32_t word) {
return word <= 0x007F && (special_chars[word] & 0x20);
}
bool IsOpenStylePunctuation(uint32_t word) {
if (word <= 0x007F)
return !!(special_chars[word] & 0x04);
return (word == 0x300A || word == 0x300C || word == 0x300E ||
word == 0x3010 || word == 0x3014 || word == 0x3016 ||
word == 0x3018 || word == 0x301A || word == 0xFF08 ||
word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
}
bool IsCurrencySymbol(uint16_t word) {
return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
(word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
word == 0xFFE5 || word == 0xFFE6);
}
bool IsPrefixSymbol(uint16_t word) {
return IsCurrencySymbol(word) || word == 0x2116;
}
bool IsSpace(uint16_t word) {
return word == 0x0020 || word == 0x3000;
}
bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
(IsLatin(curWord) || IsDigit(curWord))) {
return false;
}
if (IsSpace(curWord) || IsPunctuation(curWord)) {
return false;
}
if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
return false;
}
if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
return true;
}
if (IsPrefixSymbol(prevWord)) {
return false;
}
if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
return true;
}
if (IsCJK(prevWord)) {
return true;
}
return false;
}
} // namespace
CTypeset::CTypeset(CSection* pSection)
: m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
m_pVT(pSection->m_pVT),
m_pSection(pSection) {}
CTypeset::~CTypeset() {}
CPVT_FloatRect CTypeset::CharArray() {
m_rcRet = CPVT_FloatRect(0, 0, 0, 0);
if (m_pSection->m_LineArray.empty())
return m_rcRet;
float fNodeWidth = m_pVT->GetPlateWidth() /
(m_pVT->GetCharArray() <= 0 ? 1 : m_pVT->GetCharArray());
float fLineAscent =
m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
float fLineDescent =
m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
float x = 0.0f;
float y = m_pVT->GetLineLeading() + fLineAscent;
int32_t nStart = 0;
CLine* pLine = m_pSection->m_LineArray.front().get();
switch (m_pVT->GetAlignment()) {
case 0:
pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
break;
case 1:
nStart = (m_pVT->GetCharArray() -
pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray)) /
2;
pLine->m_LineInfo.fLineX =
fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
break;
case 2:
nStart = m_pVT->GetCharArray() -
pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
pLine->m_LineInfo.fLineX =
fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
break;
}
for (int32_t w = 0,
sz = pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
w < sz; w++) {
if (w >= m_pVT->GetCharArray())
break;
float fNextWidth = 0;
if (pdfium::IndexInBounds(m_pSection->m_WordArray, w + 1)) {
CPVT_WordInfo* pNextWord = m_pSection->m_WordArray[w + 1].get();
pNextWord->fWordTail = 0;
fNextWidth = m_pVT->GetWordWidth(*pNextWord);
}
CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
pWord->fWordTail = 0;
float fWordWidth = m_pVT->GetWordWidth(*pWord);
float fWordAscent = m_pVT->GetWordAscent(*pWord);
float fWordDescent = m_pVT->GetWordDescent(*pWord);
x = (float)(fNodeWidth * (w + nStart + 0.5) -
fWordWidth * VARIABLETEXT_HALF);
pWord->fWordX = x;
pWord->fWordY = y;
if (w == 0) {
pLine->m_LineInfo.fLineX = x;
}
if (w != pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1) {
pWord->fWordTail =
(fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
: 0);
} else {
pWord->fWordTail = 0;
}
x += fWordWidth;
fLineAscent = std::max(fLineAscent, fWordAscent);
fLineDescent = std::min(fLineDescent, fWordDescent);
}
pLine->m_LineInfo.nBeginWordIndex = 0;
pLine->m_LineInfo.nEndWordIndex =
pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1;
pLine->m_LineInfo.fLineY = y;
pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
pLine->m_LineInfo.fLineAscent = fLineAscent;
pLine->m_LineInfo.fLineDescent = fLineDescent;
m_rcRet = CPVT_FloatRect(0, 0, x, y - fLineDescent);
return m_rcRet;
}
CFX_SizeF CTypeset::GetEditSize(float fFontSize) {
ASSERT(m_pSection);
ASSERT(m_pVT);
SplitLines(false, fFontSize);
return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
}
CPVT_FloatRect CTypeset::Typeset() {
ASSERT(m_pVT);
m_pSection->m_LineArray.clear();
SplitLines(true, 0.0f);
OutputLines();
return m_rcRet;
}
void CTypeset::SplitLines(bool bTypeset, float fFontSize) {
ASSERT(m_pVT);
ASSERT(m_pSection);
int32_t nLineHead = 0;
int32_t nLineTail = 0;
float fMaxX = 0.0f, fMaxY = 0.0f;
float fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
float fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
float fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
int32_t nWordStartPos = 0;
bool bFullWord = false;
int32_t nLineFullWordIndex = 0;
int32_t nCharIndex = 0;
CPVT_LineInfo line;
float fWordWidth = 0;
float fTypesetWidth =
std::max(m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(), 0.0f);
int32_t nTotalWords =
pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
bool bOpened = false;
if (nTotalWords > 0) {
int32_t i = 0;
while (i < nTotalWords) {
CPVT_WordInfo* pWord = m_pSection->m_WordArray[i].get();
CPVT_WordInfo* pOldWord = pWord;
if (i > 0) {
pOldWord = m_pSection->m_WordArray[i - 1].get();
}
if (pWord) {
if (bTypeset) {
fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
fWordWidth = m_pVT->GetWordWidth(*pWord);
} else {
fLineAscent =
std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
fLineDescent =
std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
fWordWidth = m_pVT->GetWordWidth(
pWord->nFontIndex, pWord->Word, m_pVT->GetSubWord(),
m_pVT->GetCharSpace(), fFontSize, pWord->fWordTail);
}
if (!bOpened) {
if (IsOpenStylePunctuation(pWord->Word)) {
bOpened = true;
bFullWord = true;
} else if (pOldWord) {
if (NeedDivision(pOldWord->Word, pWord->Word)) {
bFullWord = true;
}
}
} else {
if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
bOpened = false;
}
}
if (bFullWord) {
bFullWord = false;
if (nCharIndex > 0) {
nLineFullWordIndex++;
}
nWordStartPos = i;
fBackupLineWidth = fLineWidth;
fBackupLineAscent = fLineAscent;
fBackupLineDescent = fLineDescent;
}
nCharIndex++;
}
if (m_pVT->IsAutoReturn() && fTypesetWidth > 0 &&
fLineWidth + fWordWidth > fTypesetWidth) {
if (nLineFullWordIndex > 0) {
i = nWordStartPos;
fLineWidth = fBackupLineWidth;
fLineAscent = fBackupLineAscent;
fLineDescent = fBackupLineDescent;
}
if (nCharIndex == 1) {
fLineWidth = fWordWidth;
i++;
}
nLineTail = i - 1;
if (bTypeset) {
line.nBeginWordIndex = nLineHead;
line.nEndWordIndex = nLineTail;
line.nTotalWord = nLineTail - nLineHead + 1;
line.fLineWidth = fLineWidth;
line.fLineAscent = fLineAscent;
line.fLineDescent = fLineDescent;
m_pSection->AddLine(line);
}
fMaxY += (fLineAscent + m_pVT->GetLineLeading());
fMaxY -= fLineDescent;
fMaxX = std::max(fLineWidth, fMaxX);
nLineHead = i;
fLineWidth = 0.0f;
fLineAscent = 0.0f;
fLineDescent = 0.0f;
nCharIndex = 0;
nLineFullWordIndex = 0;
bFullWord = false;
} else {
fLineWidth += fWordWidth;
i++;
}
}
if (nLineHead <= nTotalWords - 1) {
nLineTail = nTotalWords - 1;
if (bTypeset) {
line.nBeginWordIndex = nLineHead;
line.nEndWordIndex = nLineTail;
line.nTotalWord = nLineTail - nLineHead + 1;
line.fLineWidth = fLineWidth;
line.fLineAscent = fLineAscent;
line.fLineDescent = fLineDescent;
m_pSection->AddLine(line);
}
fMaxY += (fLineAscent + m_pVT->GetLineLeading());
fMaxY -= fLineDescent;
fMaxX = std::max(fLineWidth, fMaxX);
}
} else {
if (bTypeset) {
fLineAscent = m_pVT->GetLineAscent();
fLineDescent = m_pVT->GetLineDescent();
} else {
fLineAscent =
m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
fLineDescent =
m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
}
if (bTypeset) {
line.nBeginWordIndex = -1;
line.nEndWordIndex = -1;
line.nTotalWord = 0;
line.fLineWidth = 0;
line.fLineAscent = fLineAscent;
line.fLineDescent = fLineDescent;
m_pSection->AddLine(line);
}
fMaxY += m_pVT->GetLineLeading() + fLineAscent - fLineDescent;
}
m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
}
void CTypeset::OutputLines() {
ASSERT(m_pVT);
ASSERT(m_pSection);
float fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
float fPosX = 0.0f, fPosY = 0.0f;
float fLineIndent = m_pVT->GetLineIndent();
float fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
switch (m_pVT->GetAlignment()) {
default:
case 0:
fMinX = 0.0f;
break;
case 1:
fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
break;
case 2:
fMinX = fTypesetWidth - m_rcRet.Width();
break;
}
fMaxX = fMinX + m_rcRet.Width();
fMinY = 0.0f;
fMaxY = m_rcRet.Height();
int32_t nTotalLines =
pdfium::CollectionSize<int32_t>(m_pSection->m_LineArray);
if (nTotalLines > 0) {
for (int32_t l = 0; l < nTotalLines; l++) {
CLine* pLine = m_pSection->m_LineArray[l].get();
switch (m_pVT->GetAlignment()) {
default:
case 0:
fPosX = 0;
break;
case 1:
fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
VARIABLETEXT_HALF;
break;
case 2:
fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
break;
}
fPosX += fLineIndent;
fPosY += m_pVT->GetLineLeading();
fPosY += pLine->m_LineInfo.fLineAscent;
pLine->m_LineInfo.fLineX = fPosX - fMinX;
pLine->m_LineInfo.fLineY = fPosY - fMinY;
for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
w <= pLine->m_LineInfo.nEndWordIndex; w++) {
if (pdfium::IndexInBounds(m_pSection->m_WordArray, w)) {
CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
pWord->fWordX = fPosX - fMinX;
pWord->fWordY = fPosY - fMinY;
fPosX += m_pVT->GetWordWidth(*pWord);
}
}
fPosY -= pLine->m_LineInfo.fLineDescent;
}
}
m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
}