Add FPDF_CONSECUTIVE public flag.
This is a flag for use with FPDFText_FindStart(). It exists internally
in the implementation. Expose it via the public API, and add tests to
show what it does.
Fix some typos along the way.
Change-Id: Ibe86cfe9b437bb97939631214fe61c95ee2b19be
Reviewed-on: https://pdfium-review.googlesource.com/c/50472
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index db2716a..fc9564d 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -210,7 +210,7 @@
GetFPDFWideString(L"orld");
{
- // No occurences of "nope" in test page.
+ // No occurrences of "nope" in test page.
ScopedFPDFTextFind search(FPDFText_FindStart(textpage, nope.get(), 0, 0));
EXPECT_TRUE(search);
EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
@@ -228,7 +228,7 @@
}
{
- // Two occurences of "world" in test page.
+ // Two occurrences of "world" in test page.
ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world.get(), 0, 2));
EXPECT_TRUE(search);
@@ -236,12 +236,12 @@
EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
- // First occurence of "world" in this test page.
+ // First occurrence of "world" in this test page.
EXPECT_TRUE(FPDFText_FindNext(search.get()));
EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
- // Last occurence of "world" in this test page.
+ // Last occurrence of "world" in this test page.
EXPECT_TRUE(FPDFText_FindNext(search.get()));
EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
@@ -251,7 +251,7 @@
EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
- // Back to first occurence.
+ // Back to first occurrence.
EXPECT_TRUE(FPDFText_FindPrev(search.get()));
EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
@@ -312,6 +312,91 @@
UnloadPage(page);
}
+TEST_F(FPDFTextEmbedderTest, TextSearchConsecutive) {
+ ASSERT_TRUE(OpenDocument("find_text_consecutive.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+ ASSERT_TRUE(textpage);
+
+ std::unique_ptr<unsigned short, pdfium::FreeDeleter> aaaa =
+ GetFPDFWideString(L"aaaa");
+
+ {
+ // Search for "aaaa" yields 2 results in "aaaaaaaaaa".
+ ScopedFPDFTextFind search(FPDFText_FindStart(textpage, aaaa.get(), 0, 0));
+ EXPECT_TRUE(search);
+
+ // Remains not found until advanced.
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
+
+ // First occurrence of "aaaa" in this test page.
+ EXPECT_TRUE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+ // Last occurrence of "aaaa" in this test page.
+ EXPECT_TRUE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+ // Found position unchanged when fails to advance.
+ EXPECT_FALSE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+ // Back to first occurrence.
+ EXPECT_TRUE(FPDFText_FindPrev(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+ // Found position unchanged when fails to retreat.
+ EXPECT_FALSE(FPDFText_FindPrev(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+ }
+
+ {
+ // Search for "aaaa" yields 7 results in "aaaaaaaaaa", when searching with
+ // FPDF_CONSECUTIVE.
+ ScopedFPDFTextFind search(
+ FPDFText_FindStart(textpage, aaaa.get(), FPDF_CONSECUTIVE, 0));
+ EXPECT_TRUE(search);
+
+ // Remains not found until advanced.
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
+
+ // Find consecutive occurrences of "aaaa" in this test page:
+ for (int i = 0; i < 7; ++i) {
+ EXPECT_TRUE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+ }
+
+ // Found position unchanged when fails to advance.
+ EXPECT_FALSE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(6, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+ for (int i = 5; i >= 0; --i) {
+ EXPECT_TRUE(FPDFText_FindPrev(search.get()));
+ EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+ }
+
+ // Found position unchanged when fails to retreat.
+ EXPECT_FALSE(FPDFText_FindPrev(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+ }
+
+ FPDFText_ClosePage(textpage);
+ UnloadPage(page);
+}
+
// Test that the page has characters despite a bad stream length.
TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) {
ASSERT_TRUE(OpenDocument("bug_57.pdf"));
diff --git a/public/fpdf_text.h b/public/fpdf_text.h
index feb54fb..008c236 100644
--- a/public/fpdf_text.h
+++ b/public/fpdf_text.h
@@ -294,10 +294,13 @@
int buflen);
// Flags used by FPDFText_FindStart function.
-#define FPDF_MATCHCASE \
- 0x00000001 // If not set, it will not match case by default.
-#define FPDF_MATCHWHOLEWORD \
- 0x00000002 // If not set, it will not match the whole word by default.
+//
+// If not set, it will not match case by default.
+#define FPDF_MATCHCASE 0x00000001
+// If not set, it will not match the whole word by default.
+#define FPDF_MATCHWHOLEWORD 0x00000002
+// If not set, it will skip past the current match to look for the next match.
+#define FPDF_CONSECUTIVE 0x00000004
// Function: FPDFText_FindStart
// Start a search.
diff --git a/testing/image_diff/image_diff.cpp b/testing/image_diff/image_diff.cpp
index 806e0c8..3b098b9 100644
--- a/testing/image_diff/image_diff.cpp
+++ b/testing/image_diff/image_diff.cpp
@@ -158,7 +158,7 @@
int w = std::min(baseline.w(), actual.w());
int h = std::min(baseline.h(), actual.h());
- // Count occurences of each RGBA pixel value of baseline in the overlap.
+ // Count occurrences of each RGBA pixel value of baseline in the overlap.
std::map<uint32_t, int32_t> baseline_histogram;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
diff --git a/testing/resources/find_text_consecutive.in b/testing/resources/find_text_consecutive.in
new file mode 100644
index 0000000..9e35e1d
--- /dev/null
+++ b/testing/resources/find_text_consecutive.in
@@ -0,0 +1,45 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /MediaBox [ 0 0 200 200 ]
+ /Count 1
+ /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 4 0 R
+ >>
+ >>
+ /Contents 5 0 R
+>>
+endobj
+{{object 4 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+{{object 5 0}} <<
+{{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(aaaaaaaaaa bbbbbbbbb) Tj
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/find_text_consecutive.pdf b/testing/resources/find_text_consecutive.pdf
new file mode 100644
index 0000000..5083d75
--- /dev/null
+++ b/testing/resources/find_text_consecutive.pdf
@@ -0,0 +1,57 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /MediaBox [ 0 0 200 200 ]
+ /Count 1
+ /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Resources <<
+ /Font <<
+ /F1 4 0 R
+ >>
+ >>
+ /Contents 5 0 R
+>>
+endobj
+4 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+5 0 obj <<
+/Length 51
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(aaaaaaaaaa bbbbbbbbb) Tj
+ET
+endstream
+endobj
+xref
+0 6
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000161 00000 n
+0000000287 00000 n
+0000000365 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 6
+>>
+startxref
+465
+%%EOF