Add experimental FPDFText_IsGenerated() API.
Some characters inside a FPDF_TEXTPAGE are generated by PDFium and not
part of the actual content stream. These are usually carriage returns
and line feeds. Add an API to expose whether a character is generated or
not.
Bug: pdfium:1719
Change-Id: Ibb6ec198aed87a74a5d128edad7f671aeae86a8e
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/99830
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp
index da00c47..bdd9fd3 100644
--- a/fpdfsdk/fpdf_text.cpp
+++ b/fpdfsdk/fpdf_text.cpp
@@ -71,6 +71,16 @@
return charinfo.m_Unicode;
}
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_IsGenerated(FPDF_TEXTPAGE text_page,
+ int index) {
+ CPDF_TextPage* textpage = GetTextPageForValidIndex(text_page, index);
+ if (!textpage)
+ return -1;
+
+ const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
+ return charinfo.m_CharType == CPDF_TextPage::CharType::kGenerated ? 1 : 0;
+}
+
FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
int index) {
CPDF_TextPage* textpage = GetTextPageForValidIndex(text_page, index);
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index 54e5d26..0fe0a99 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -952,6 +952,37 @@
UnloadPage(page);
}
+TEST_F(FPDFTextEmbedderTest, IsGenerated) {
+ ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ {
+ ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
+ ASSERT_TRUE(textpage);
+
+ EXPECT_EQ(static_cast<unsigned int>('H'),
+ FPDFText_GetUnicode(textpage.get(), 0));
+ EXPECT_EQ(0, FPDFText_IsGenerated(textpage.get(), 0));
+ EXPECT_EQ(static_cast<unsigned int>(' '),
+ FPDFText_GetUnicode(textpage.get(), 6));
+ EXPECT_EQ(0, FPDFText_IsGenerated(textpage.get(), 6));
+
+ EXPECT_EQ(static_cast<unsigned int>('\r'),
+ FPDFText_GetUnicode(textpage.get(), 13));
+ EXPECT_EQ(1, FPDFText_IsGenerated(textpage.get(), 13));
+ EXPECT_EQ(static_cast<unsigned int>('\n'),
+ FPDFText_GetUnicode(textpage.get(), 14));
+ EXPECT_EQ(1, FPDFText_IsGenerated(textpage.get(), 14));
+
+ EXPECT_EQ(-1, FPDFText_IsGenerated(textpage.get(), -1));
+ EXPECT_EQ(-1, FPDFText_IsGenerated(textpage.get(), kHelloGoodbyeTextSize));
+ EXPECT_EQ(-1, FPDFText_IsGenerated(nullptr, 6));
+ }
+
+ UnloadPage(page);
+}
+
TEST_F(FPDFTextEmbedderTest, Bug_921) {
ASSERT_TRUE(OpenDocument("bug_921.pdf"));
FPDF_PAGE page = LoadPage(0);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 0f656ae..23586c4 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -427,6 +427,7 @@
CHK(FPDFText_GetText);
CHK(FPDFText_GetTextRenderMode);
CHK(FPDFText_GetUnicode);
+ CHK(FPDFText_IsGenerated);
CHK(FPDFText_LoadPage);
// fpdf_thumbnail.h
diff --git a/public/fpdf_text.h b/public/fpdf_text.h
index 65604d8..22f25e9 100644
--- a/public/fpdf_text.h
+++ b/public/fpdf_text.h
@@ -74,6 +74,21 @@
FPDF_EXPORT unsigned int FPDF_CALLCONV
FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
+// Experimental API.
+// Function: FPDFText_IsGenerated
+// Get if a character in a page is generated by PDFium.
+// Parameters:
+// text_page - Handle to a text page information structure.
+// Returned by FPDFText_LoadPage function.
+// index - Zero-based index of the character.
+// Return value:
+// 1 if the character is generated by PDFium.
+// 0 if the character is not generated by PDFium.
+// -1 if there was an error.
+//
+FPDF_EXPORT int FPDF_CALLCONV
+FPDFText_IsGenerated(FPDF_TEXTPAGE text_page, int index);
+
// Function: FPDFText_GetFontSize
// Get the font size of a particular character.
// Parameters: