Break dependence of cjs_document.cpp on fpdfapi/font.
Knowledge of fonts is only required by two anonymous functions
which should actually be methods of CPDF_TextObject, which already
has knowledge of fonts.
Change-Id: If10adbb87cfe4bfbed9d4131fca81f13b517badf
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/58590
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/page/cpdf_textobject.cpp b/core/fpdfapi/page/cpdf_textobject.cpp
index 49d6a69..feb49e4 100644
--- a/core/fpdfapi/page/cpdf_textobject.cpp
+++ b/core/fpdfapi/page/cpdf_textobject.cpp
@@ -13,6 +13,8 @@
#include "core/fpdfapi/font/cpdf_font.h"
#include "third_party/base/ptr_util.h"
+#define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
+
CPDF_TextObjectItem::CPDF_TextObjectItem() : m_CharCode(0) {}
CPDF_TextObjectItem::~CPDF_TextObjectItem() = default;
@@ -99,6 +101,65 @@
}
}
+int CPDF_TextObject::CountWords() const {
+ RetainPtr<CPDF_Font> pFont = GetFont();
+ if (!pFont)
+ return 0;
+
+ bool bInLatinWord = false;
+ int nWords = 0;
+ for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
+ uint32_t charcode = CPDF_Font::kInvalidCharCode;
+ float unused_kerning;
+ GetCharInfo(i, &charcode, &unused_kerning);
+
+ WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
+ uint16_t unicode = 0;
+ if (swUnicode.GetLength() > 0)
+ unicode = swUnicode[0];
+
+ bool bIsLatin = ISLATINWORD(unicode);
+ if (bIsLatin && bInLatinWord)
+ continue;
+
+ bInLatinWord = bIsLatin;
+ if (unicode != 0x20)
+ nWords++;
+ }
+
+ return nWords;
+}
+
+WideString CPDF_TextObject::GetWordString(int nWordIndex) const {
+ RetainPtr<CPDF_Font> pFont = GetFont();
+ if (!pFont)
+ return WideString();
+
+ WideString swRet;
+ int nWords = 0;
+ bool bInLatinWord = false;
+ for (size_t i = 0, sz = CountChars(); i < sz; ++i) {
+ uint32_t charcode = CPDF_Font::kInvalidCharCode;
+ float unused_kerning;
+ GetCharInfo(i, &charcode, &unused_kerning);
+
+ WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
+ uint16_t unicode = 0;
+ if (swUnicode.GetLength() > 0)
+ unicode = swUnicode[0];
+
+ bool bIsLatin = ISLATINWORD(unicode);
+ if (!bIsLatin || !bInLatinWord) {
+ bInLatinWord = bIsLatin;
+ if (unicode != 0x20)
+ nWords++;
+ }
+ if (nWords - 1 == nWordIndex)
+ swRet += unicode;
+ }
+ return swRet;
+}
+
std::unique_ptr<CPDF_TextObject> CPDF_TextObject::Clone() const {
auto obj = pdfium::MakeUnique<CPDF_TextObject>();
obj->CopyData(this);
diff --git a/core/fpdfapi/page/cpdf_textobject.h b/core/fpdfapi/page/cpdf_textobject.h
index 129a0fe..b0189a9 100644
--- a/core/fpdfapi/page/cpdf_textobject.h
+++ b/core/fpdfapi/page/cpdf_textobject.h
@@ -46,9 +46,12 @@
void GetCharInfo(size_t index, uint32_t* charcode, float* kerning) const;
void GetCharInfo(size_t index, CPDF_TextObjectItem* pInfo) const;
float GetCharWidth(uint32_t charcode) const;
+ int CountWords() const;
+ WideString GetWordString(int nWordIndex) const;
CFX_PointF GetPos() const { return m_Pos; }
CFX_Matrix GetTextMatrix() const;
+
RetainPtr<CPDF_Font> GetFont() const;
float GetFontSize() const;
diff --git a/fxjs/cjs_document.cpp b/fxjs/cjs_document.cpp
index 34b5e2a..d71126d 100644
--- a/fxjs/cjs_document.cpp
+++ b/fxjs/cjs_document.cpp
@@ -8,7 +8,6 @@
#include <utility>
-#include "core/fpdfapi/font/cpdf_font.h"
#include "core/fpdfapi/page/cpdf_pageobject.h"
#include "core/fpdfapi/page/cpdf_textobject.h"
#include "core/fpdfapi/parser/cpdf_array.h"
@@ -29,75 +28,6 @@
#include "fxjs/cjs_icon.h"
#include "fxjs/js_resources.h"
-namespace {
-
-#define ISLATINWORD(u) (u != 0x20 && u <= 0x28FF)
-
-int CountWords(const CPDF_TextObject* pTextObj) {
- RetainPtr<CPDF_Font> pFont = pTextObj->GetFont();
- if (!pFont)
- return 0;
-
- bool bInLatinWord = false;
- int nWords = 0;
- for (size_t i = 0, sz = pTextObj->CountChars(); i < sz; ++i) {
- uint32_t charcode = CPDF_Font::kInvalidCharCode;
- float unused_kerning;
-
- pTextObj->GetCharInfo(i, &charcode, &unused_kerning);
- WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
-
- uint16_t unicode = 0;
- if (swUnicode.GetLength() > 0)
- unicode = swUnicode[0];
-
- bool bIsLatin = ISLATINWORD(unicode);
- if (bIsLatin && bInLatinWord)
- continue;
-
- bInLatinWord = bIsLatin;
- if (unicode != 0x20)
- nWords++;
- }
-
- return nWords;
-}
-
-WideString GetObjWordStr(const CPDF_TextObject* pTextObj, int nWordIndex) {
- RetainPtr<CPDF_Font> pFont = pTextObj->GetFont();
- if (!pFont)
- return WideString();
-
- WideString swRet;
- int nWords = 0;
- bool bInLatinWord = false;
- for (size_t i = 0, sz = pTextObj->CountChars(); i < sz; ++i) {
- uint32_t charcode = CPDF_Font::kInvalidCharCode;
- float unused_kerning;
-
- pTextObj->GetCharInfo(i, &charcode, &unused_kerning);
- WideString swUnicode = pFont->UnicodeFromCharCode(charcode);
-
- uint16_t unicode = 0;
- if (swUnicode.GetLength() > 0)
- unicode = swUnicode[0];
-
- bool bIsLatin = ISLATINWORD(unicode);
- if (!bIsLatin || !bInLatinWord) {
- bInLatinWord = bIsLatin;
- if (unicode != 0x20)
- nWords++;
- }
-
- if (nWords - 1 == nWordIndex)
- swRet += unicode;
- }
-
- return swRet;
-}
-
-} // namespace
-
const JSPropertySpec CJS_Document::PropertySpecs[] = {
{"ADBE", get_ADBE_static, set_ADBE_static},
{"author", get_author_static, set_author_static},
@@ -1319,9 +1249,9 @@
for (auto& pPageObj : *page) {
if (pPageObj->IsText()) {
CPDF_TextObject* pTextObj = pPageObj->AsText();
- int nObjWords = CountWords(pTextObj);
+ int nObjWords = pTextObj->CountWords();
if (nWords + nObjWords >= nWordNo) {
- swRet = GetObjWordStr(pTextObj, nWordNo - nWords);
+ swRet = pTextObj->GetWordString(nWordNo - nWords);
break;
}
nWords += nObjWords;
@@ -1367,9 +1297,8 @@
int nWords = 0;
for (auto& pPageObj : *page) {
if (pPageObj->IsText())
- nWords += CountWords(pPageObj->AsText());
+ nWords += pPageObj->AsText()->CountWords();
}
-
return CJS_Result::Success(pRuntime->NewNumber(nWords));
}