Replace CPDF_TextPage::CharInfo class with PAGECHAR_INFO.
The two classes are nearly identical, so delete the existing
CPDF_TextPage::CharInfo and make PAGECHAR_INFO the new CharInfo class.
To resolve the differences:
- Remove |PAGECHAR_INFO::m_FontSize|. Add GetCharFontSize() to calculate
the font size.
- Resolve the conflicting types for the "charcode" member from the two
classes. Neither is correct. Use uint32_t instead because that is what
setters/getters expect.
Also change CPDF_TextPage::GetCharInfo() to just return a CharInfo
instead of copying data into one. Change GetCharInfo() to CHECK() its
|index| parameter is valid, which is fine because all callers do that
check already. Then simplify the callers accordingly.
Bug: pdfium:1112
Change-Id: Ib25488272d7ec389817573bca06f710fa82d2cc9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/65451
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index e4d3c40..84bb415 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -119,8 +119,7 @@
const int nTotalChar = m_pTextPage->CountChars();
const WideString page_text = m_pTextPage->GetAllPageText();
while (pos < nTotalChar) {
- CPDF_TextPage::CharInfo char_info;
- m_pTextPage->GetCharInfo(pos, &char_info);
+ const CPDF_TextPage::CharInfo& char_info = m_pTextPage->GetCharInfo(pos);
if (char_info.m_Flag != FPDFTEXT_CHAR_GENERATED &&
char_info.m_Unicode != TEXT_SPACE_CHAR && pos != nTotalChar - 1) {
bAfterHyphen = (char_info.m_Flag == FPDFTEXT_CHAR_HYPHEN ||
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index f1617ef..39807b2 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -112,8 +112,8 @@
return count / (end - start);
}
-bool IsControlChar(const PAGECHAR_INFO& charInfo) {
- switch (charInfo.m_Unicode) {
+bool IsControlChar(const CPDF_TextPage::CharInfo& char_info) {
+ switch (char_info.m_Unicode) {
case 0x2:
case 0x3:
case 0x93:
@@ -122,7 +122,7 @@
case 0x97:
case 0x98:
case 0xfffe:
- return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
+ return char_info.m_Flag != FPDFTEXT_CHAR_HYPHEN;
default:
return false;
}
@@ -236,14 +236,10 @@
CPDF_TextPage::CharInfo::CharInfo() = default;
+CPDF_TextPage::CharInfo::CharInfo(const CharInfo&) = default;
+
CPDF_TextPage::CharInfo::~CharInfo() = default;
-PAGECHAR_INFO::PAGECHAR_INFO() {}
-
-PAGECHAR_INFO::PAGECHAR_INFO(const PAGECHAR_INFO&) = default;
-
-PAGECHAR_INFO::~PAGECHAR_INFO() {}
-
CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, bool rtl)
: m_pPage(pPage), m_rtl(rtl), m_DisplayMatrix(GetPageMatrix(pPage)) {
Init();
@@ -260,7 +256,7 @@
m_CharIndices.push_back(0);
for (int i = 0; i < nCount; ++i) {
- const PAGECHAR_INFO& charinfo = m_CharList[i];
+ const CharInfo& charinfo = m_CharList[i];
if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED ||
(charinfo.m_Unicode != 0 && !IsControlChar(charinfo)) ||
(charinfo.m_Unicode == 0 && charinfo.m_CharCode != 0)) {
@@ -330,7 +326,7 @@
int curPos = start;
bool bFlagNewRect = true;
while (nCount--) {
- const PAGECHAR_INFO& info_curchar = m_CharList[curPos++];
+ const CharInfo& info_curchar = m_CharList[curPos++];
if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED)
continue;
if (info_curchar.m_CharBox.Width() < kSizeEpsilon ||
@@ -394,7 +390,7 @@
}
WideString CPDF_TextPage::GetTextByPredicate(
- const std::function<bool(const PAGECHAR_INFO&)>& predicate) const {
+ const std::function<bool(const CharInfo&)>& predicate) const {
float posy = 0;
bool IsContainPreChar = false;
bool IsAddLineFeed = false;
@@ -426,33 +422,28 @@
}
WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
- return GetTextByPredicate([&rect](const PAGECHAR_INFO& charinfo) {
+ return GetTextByPredicate([&rect](const CharInfo& charinfo) {
return IsRectIntersect(rect, charinfo.m_CharBox);
});
}
WideString CPDF_TextPage::GetTextByObject(
const CPDF_TextObject* pTextObj) const {
- return GetTextByPredicate([pTextObj](const PAGECHAR_INFO& charinfo) {
+ return GetTextByPredicate([pTextObj](const CharInfo& charinfo) {
return charinfo.m_pTextObj == pTextObj;
});
}
-void CPDF_TextPage::GetCharInfo(size_t index, CharInfo* info) const {
- if (index >= size())
- return;
+const CPDF_TextPage::CharInfo& CPDF_TextPage::GetCharInfo(size_t index) const {
+ CHECK(index < m_CharList.size());
+ return m_CharList[index];
+}
- const PAGECHAR_INFO& charinfo = m_CharList[index];
- info->m_Charcode = charinfo.m_CharCode;
- info->m_Origin = charinfo.m_Origin;
- info->m_Unicode = charinfo.m_Unicode;
- info->m_Flag = charinfo.m_Flag;
- info->m_CharBox = charinfo.m_CharBox;
- info->m_pTextObj = charinfo.m_pTextObj;
- bool bHasFont = charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont();
- info->m_FontSize =
- bHasFont ? charinfo.m_pTextObj->GetFontSize() : kDefaultFontSize;
- info->m_Matrix = charinfo.m_Matrix;
+float CPDF_TextPage::GetCharFontSize(size_t index) const {
+ CHECK(index < m_CharList.size());
+ const CPDF_TextObject* text_object = m_CharList[index].m_pTextObj.Get();
+ bool has_font = text_object && text_object->GetFont();
+ return has_font ? text_object->GetFontSize() : kDefaultFontSize;
}
WideString CPDF_TextPage::GetPageText(int start, int count) const {
@@ -577,7 +568,7 @@
void CPDF_TextPage::AppendGeneratedCharacter(wchar_t unicode,
const CFX_Matrix& formMatrix) {
- Optional<PAGECHAR_INFO> pGenerateChar = GenerateCharInfo(unicode);
+ Optional<CharInfo> pGenerateChar = GenerateCharInfo(unicode);
if (!pGenerateChar)
return;
@@ -627,8 +618,8 @@
}
void CPDF_TextPage::AddCharInfoByLRDirection(wchar_t wChar,
- const PAGECHAR_INFO& info) {
- PAGECHAR_INFO info2 = info;
+ const CharInfo& info) {
+ CharInfo info2 = info;
if (IsControlChar(info2)) {
info2.m_Index = -1;
m_CharList.push_back(info2);
@@ -656,8 +647,8 @@
}
void CPDF_TextPage::AddCharInfoByRLDirection(wchar_t wChar,
- const PAGECHAR_INFO& info) {
- PAGECHAR_INFO info2 = info;
+ const CharInfo& info) {
+ CharInfo info2 = info;
if (IsControlChar(info2)) {
info2.m_Index = -1;
m_CharList.push_back(info2);
@@ -878,7 +869,7 @@
if (wChar >= 0xFFFD)
continue;
- PAGECHAR_INFO charinfo;
+ CharInfo charinfo;
charinfo.m_Origin = pTextObj->GetPos();
charinfo.m_Index = m_TextBuf.GetLength();
charinfo.m_Unicode = wChar;
@@ -893,7 +884,7 @@
}
void CPDF_TextPage::FindPreviousTextObject() {
- const PAGECHAR_INFO* pPrevCharInfo = GetPrevCharInfo();
+ const CharInfo* pPrevCharInfo = GetPrevCharInfo();
if (!pPrevCharInfo)
return;
@@ -942,8 +933,7 @@
case GenerateCharacter::kNone:
break;
case GenerateCharacter::kSpace: {
- Optional<PAGECHAR_INFO> pGenerateChar =
- GenerateCharInfo(TEXT_SPACE_CHAR);
+ Optional<CharInfo> pGenerateChar = GenerateCharInfo(TEXT_SPACE_CHAR);
if (pGenerateChar) {
if (!formMatrix.IsIdentity())
pGenerateChar->m_Matrix = formMatrix;
@@ -977,7 +967,7 @@
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
m_TempCharList.pop_back();
}
- PAGECHAR_INFO* charinfo = &m_TempCharList.back();
+ CharInfo* charinfo = &m_TempCharList.back();
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
charinfo->m_Unicode = 0x2;
charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
@@ -1009,7 +999,7 @@
const size_t nItems = pTextObj->CountItems();
for (size_t i = 0; i < nItems; ++i) {
CPDF_TextObjectItem item;
- PAGECHAR_INFO charinfo;
+ CharInfo charinfo;
pTextObj->GetItemInfo(i, &item);
if (item.m_CharCode == 0xffffffff) {
WideString str = m_TempTextBuf.MakeString();
@@ -1106,7 +1096,7 @@
static_cast<float>(TEXT_CHARRATIO_GAPDELTA) * pTextObj->GetFontSize());
for (int n = pdfium::CollectionSize<int>(m_TempCharList);
n > pdfium::CollectionSize<int>(m_TempCharList) - count; --n) {
- const PAGECHAR_INFO& charinfo1 = m_TempCharList[n - 1];
+ const CharInfo& charinfo1 = m_TempCharList[n - 1];
CFX_PointF diff = charinfo1.m_Origin - charinfo.m_Origin;
if (charinfo1.m_CharCode == charinfo.m_CharCode &&
charinfo1.m_pTextObj->GetFont() == charinfo.m_pTextObj->GetFont() &&
@@ -1189,12 +1179,12 @@
return true;
}
- const PAGECHAR_INFO* pPrevCharInfo = GetPrevCharInfo();
+ const CharInfo* pPrevCharInfo = GetPrevCharInfo();
return pPrevCharInfo && pPrevCharInfo->m_Flag == FPDFTEXT_CHAR_PIECE &&
IsHyphenCode(pPrevCharInfo->m_Unicode);
}
-const PAGECHAR_INFO* CPDF_TextPage::GetPrevCharInfo() const {
+const CPDF_TextPage::CharInfo* CPDF_TextPage::GetPrevCharInfo() const {
if (!m_TempCharList.empty())
return &m_TempCharList.back();
return !m_CharList.empty() ? &m_CharList.back() : nullptr;
@@ -1394,19 +1384,21 @@
return false;
}
-Optional<PAGECHAR_INFO> CPDF_TextPage::GenerateCharInfo(wchar_t unicode) {
- const PAGECHAR_INFO* pPrevCharInfo = GetPrevCharInfo();
+Optional<CPDF_TextPage::CharInfo> CPDF_TextPage::GenerateCharInfo(
+ wchar_t unicode) {
+ const CharInfo* pPrevCharInfo = GetPrevCharInfo();
if (!pPrevCharInfo)
return {};
- PAGECHAR_INFO info;
+ CharInfo info;
info.m_Index = m_TextBuf.GetLength();
info.m_CharCode = CPDF_Font::kInvalidCharCode;
info.m_Unicode = unicode;
info.m_Flag = FPDFTEXT_CHAR_GENERATED;
int preWidth = 0;
- if (pPrevCharInfo->m_pTextObj && pPrevCharInfo->m_CharCode != -1) {
+ if (pPrevCharInfo->m_pTextObj &&
+ pPrevCharInfo->m_CharCode != CPDF_Font::kInvalidCharCode) {
preWidth = GetCharWidth(pPrevCharInfo->m_CharCode,
pPrevCharInfo->m_pTextObj->GetFont().Get());
}
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index b8f862c..ad12976 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -36,22 +36,6 @@
#define TEXT_HYPHEN L"-"
#define TEXT_CHARRATIO_GAPDELTA 0.070
-class PAGECHAR_INFO {
- public:
- PAGECHAR_INFO();
- PAGECHAR_INFO(const PAGECHAR_INFO&);
- ~PAGECHAR_INFO();
-
- int m_Index = 0;
- int m_CharCode = 0;
- wchar_t m_Unicode = 0;
- int32_t m_Flag = 0;
- CFX_PointF m_Origin;
- CFX_FloatRect m_CharBox;
- UnownedPtr<CPDF_TextObject> m_pTextObj;
- CFX_Matrix m_Matrix;
-};
-
struct PDFTEXT_Obj {
PDFTEXT_Obj();
PDFTEXT_Obj(const PDFTEXT_Obj& that);
@@ -66,12 +50,13 @@
class CharInfo {
public:
CharInfo();
+ CharInfo(const CharInfo&);
~CharInfo();
+ int m_Index = 0;
+ uint32_t m_CharCode = 0;
wchar_t m_Unicode = 0;
- wchar_t m_Charcode = 0;
int32_t m_Flag = 0;
- float m_FontSize = 0;
CFX_PointF m_Origin;
CFX_FloatRect m_CharBox;
UnownedPtr<CPDF_TextObject> m_pTextObj;
@@ -85,7 +70,11 @@
int TextIndexFromCharIndex(int char_index) const;
size_t size() const { return m_CharList.size(); }
int CountChars() const;
- void GetCharInfo(size_t index, CharInfo* info) const;
+
+ // These methods CHECK() to make sure |index| is within bounds.
+ const CharInfo& GetCharInfo(size_t index) const;
+ float GetCharFontSize(size_t index) const;
+
std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const;
int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const;
WideString GetTextByRect(const CFX_FloatRect& rect) const;
@@ -128,8 +117,8 @@
CPDF_PageObjectHolder::const_iterator ObjPos);
GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj,
const CFX_Matrix& formMatrix);
- const PAGECHAR_INFO* GetPrevCharInfo() const;
- Optional<PAGECHAR_INFO> GenerateCharInfo(wchar_t unicode);
+ const CharInfo* GetPrevCharInfo() const;
+ Optional<CharInfo> GenerateCharInfo(wchar_t unicode);
bool IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
const CPDF_PageObjectHolder* pObjList,
CPDF_PageObjectHolder::const_iterator iter) const;
@@ -139,20 +128,20 @@
MarkedContentState PreMarkedContent(PDFTEXT_Obj pObj);
void ProcessMarkedContent(PDFTEXT_Obj pObj);
void FindPreviousTextObject();
- void AddCharInfoByLRDirection(wchar_t wChar, const PAGECHAR_INFO& info);
- void AddCharInfoByRLDirection(wchar_t wChar, const PAGECHAR_INFO& info);
+ void AddCharInfoByLRDirection(wchar_t wChar, const CharInfo& info);
+ void AddCharInfoByRLDirection(wchar_t wChar, const CharInfo& info);
TextOrientation GetTextObjectWritingMode(
const CPDF_TextObject* pTextObj) const;
TextOrientation FindTextlineFlowOrientation() const;
void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix);
void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
WideString GetTextByPredicate(
- const std::function<bool(const PAGECHAR_INFO&)>& predicate) const;
+ const std::function<bool(const CharInfo&)>& predicate) const;
UnownedPtr<const CPDF_Page> const m_pPage;
std::vector<uint16_t> m_CharIndices;
- std::deque<PAGECHAR_INFO> m_CharList;
- std::deque<PAGECHAR_INFO> m_TempCharList;
+ std::deque<CharInfo> m_CharList;
+ std::deque<CharInfo> m_TempCharList;
CFX_WideTextBuf m_TextBuf;
CFX_WideTextBuf m_TempTextBuf;
UnownedPtr<CPDF_TextObject> m_pPreTextObj;
diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp
index 03ef0d3..7c18f17 100644
--- a/fpdfsdk/fpdf_text.cpp
+++ b/fpdfsdk/fpdf_text.cpp
@@ -75,8 +75,7 @@
if (!textpage)
return 0;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
return charinfo.m_Unicode;
}
@@ -86,9 +85,7 @@
if (!textpage)
return 0;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
- return charinfo.m_FontSize;
+ return textpage->GetCharFontSize(index);
}
FPDF_EXPORT unsigned long FPDF_CALLCONV
@@ -101,8 +98,7 @@
if (!textpage)
return 0;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
if (!charinfo.m_pTextObj)
return 0;
@@ -124,8 +120,7 @@
if (!textpage)
return -1;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
if (!charinfo.m_pTextObj)
return -1;
@@ -138,8 +133,7 @@
if (!textpage)
return FPDF_TEXTRENDERMODE_UNKNOWN;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
if (!charinfo.m_pTextObj)
return FPDF_TEXTRENDERMODE_UNKNOWN;
@@ -158,8 +152,7 @@
if (!textpage || !R || !G || !B || !A)
return false;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
if (!charinfo.m_pTextObj)
return false;
@@ -183,8 +176,7 @@
if (!textpage || !R || !G || !B || !A)
return false;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
if (!charinfo.m_pTextObj)
return false;
@@ -204,8 +196,7 @@
if (!textpage)
return -1.0f;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
// On the left is our current Matrix and on the right a generic rotation
// matrix for our coordinate space.
// | a b 0 | | cos(t) -sin(t) 0 |
@@ -232,8 +223,7 @@
if (!textpage)
return false;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
*left = charinfo.m_CharBox.left;
*right = charinfo.m_CharBox.right;
*bottom = charinfo.m_CharBox.bottom;
@@ -250,25 +240,25 @@
if (!textpage)
return false;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
+ float font_size = textpage->GetCharFontSize(index);
- if (charinfo.m_pTextObj && !IsFloatZero(charinfo.m_FontSize)) {
+ if (charinfo.m_pTextObj && !IsFloatZero(font_size)) {
bool is_vert_writing = charinfo.m_pTextObj->GetFont()->IsVertWriting();
if (is_vert_writing && charinfo.m_pTextObj->GetFont()->IsCIDFont()) {
CPDF_CIDFont* pCIDFont = charinfo.m_pTextObj->GetFont()->AsCIDFont();
- uint16_t cid = pCIDFont->CIDFromCharCode(charinfo.m_Charcode);
+ uint16_t cid = pCIDFont->CIDFromCharCode(charinfo.m_CharCode);
short vx;
short vy;
pCIDFont->GetVertOrigin(cid, vx, vy);
- double offsetx = (vx - 500) * charinfo.m_FontSize / 1000.0;
- double offsety = vy * charinfo.m_FontSize / 1000.0;
+ double offsetx = (vx - 500) * font_size / 1000.0;
+ double offsety = vy * font_size / 1000.0;
short vert_width = pCIDFont->GetVertWidth(cid);
- double height = vert_width * charinfo.m_FontSize / 1000.0;
+ double height = vert_width * font_size / 1000.0;
rect->left = charinfo.m_Origin.x + offsetx;
- rect->right = rect->left + charinfo.m_FontSize;
+ rect->right = rect->left + font_size;
rect->bottom = charinfo.m_Origin.y + offsety;
rect->top = rect->bottom + height;
return true;
@@ -277,8 +267,8 @@
int ascent = charinfo.m_pTextObj->GetFont()->GetTypeAscent();
int descent = charinfo.m_pTextObj->GetFont()->GetTypeDescent();
if (ascent != descent) {
- float width = charinfo.m_pTextObj->GetCharWidth(charinfo.m_Charcode);
- float font_scale = charinfo.m_FontSize / (ascent - descent);
+ float width = charinfo.m_pTextObj->GetCharWidth(charinfo.m_CharCode);
+ float font_scale = font_size / (ascent - descent);
rect->left = charinfo.m_Origin.x;
rect->right = charinfo.m_Origin.x + (is_vert_writing ? -width : width);
@@ -289,10 +279,7 @@
}
// Fallback to the tight bounds in empty text scenarios, or bad font metrics
- rect->left = charinfo.m_CharBox.left;
- rect->right = charinfo.m_CharBox.right;
- rect->bottom = charinfo.m_CharBox.bottom;
- rect->top = charinfo.m_CharBox.top;
+ *rect = FSRectFFromCFXFloatRect(charinfo.m_CharBox);
return true;
}
@@ -305,8 +292,7 @@
if (!textpage)
return false;
- CPDF_TextPage::CharInfo charinfo;
- textpage->GetCharInfo(index, &charinfo);
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
*x = charinfo.m_Origin.x;
*y = charinfo.m_Origin.y;
return true;