Add FPDF_CONSECUTIVE public flag.

This is a flag for use with FPDFText_FindStart(). It exists internally
in the implementation. Expose it via the public API, and add tests to
show what it does.

Fix some typos along the way.

Change-Id: Ibe86cfe9b437bb97939631214fe61c95ee2b19be
Reviewed-on: https://pdfium-review.googlesource.com/c/50472
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index db2716a..fc9564d 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -210,7 +210,7 @@
       GetFPDFWideString(L"orld");
 
   {
-    // No occurences of "nope" in test page.
+    // No occurrences of "nope" in test page.
     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, nope.get(), 0, 0));
     EXPECT_TRUE(search);
     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
@@ -228,7 +228,7 @@
   }
 
   {
-    // Two occurences of "world" in test page.
+    // Two occurrences of "world" in test page.
     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world.get(), 0, 2));
     EXPECT_TRUE(search);
 
@@ -236,12 +236,12 @@
     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
 
-    // First occurence of "world" in this test page.
+    // First occurrence of "world" in this test page.
     EXPECT_TRUE(FPDFText_FindNext(search.get()));
     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
 
-    // Last occurence of "world" in this test page.
+    // Last occurrence of "world" in this test page.
     EXPECT_TRUE(FPDFText_FindNext(search.get()));
     EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
@@ -251,7 +251,7 @@
     EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
 
-    // Back to first occurence.
+    // Back to first occurrence.
     EXPECT_TRUE(FPDFText_FindPrev(search.get()));
     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
@@ -312,6 +312,91 @@
   UnloadPage(page);
 }
 
+TEST_F(FPDFTextEmbedderTest, TextSearchConsecutive) {
+  ASSERT_TRUE(OpenDocument("find_text_consecutive.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+  ASSERT_TRUE(textpage);
+
+  std::unique_ptr<unsigned short, pdfium::FreeDeleter> aaaa =
+      GetFPDFWideString(L"aaaa");
+
+  {
+    // Search for "aaaa" yields 2 results in "aaaaaaaaaa".
+    ScopedFPDFTextFind search(FPDFText_FindStart(textpage, aaaa.get(), 0, 0));
+    EXPECT_TRUE(search);
+
+    // Remains not found until advanced.
+    EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
+
+    // First occurrence of "aaaa" in this test page.
+    EXPECT_TRUE(FPDFText_FindNext(search.get()));
+    EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+    // Last occurrence of "aaaa" in this test page.
+    EXPECT_TRUE(FPDFText_FindNext(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+    // Found position unchanged when fails to advance.
+    EXPECT_FALSE(FPDFText_FindNext(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+    // Back to first occurrence.
+    EXPECT_TRUE(FPDFText_FindPrev(search.get()));
+    EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+    // Found position unchanged when fails to retreat.
+    EXPECT_FALSE(FPDFText_FindPrev(search.get()));
+    EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+  }
+
+  {
+    // Search for "aaaa" yields 7 results in "aaaaaaaaaa", when searching with
+    // FPDF_CONSECUTIVE.
+    ScopedFPDFTextFind search(
+        FPDFText_FindStart(textpage, aaaa.get(), FPDF_CONSECUTIVE, 0));
+    EXPECT_TRUE(search);
+
+    // Remains not found until advanced.
+    EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
+
+    // Find consecutive occurrences of "aaaa" in this test page:
+    for (int i = 0; i < 7; ++i) {
+      EXPECT_TRUE(FPDFText_FindNext(search.get()));
+      EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
+      EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+    }
+
+    // Found position unchanged when fails to advance.
+    EXPECT_FALSE(FPDFText_FindNext(search.get()));
+    EXPECT_EQ(6, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+
+    for (int i = 5; i >= 0; --i) {
+      EXPECT_TRUE(FPDFText_FindPrev(search.get()));
+      EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
+      EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+    }
+
+    // Found position unchanged when fails to retreat.
+    EXPECT_FALSE(FPDFText_FindPrev(search.get()));
+    EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+    EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
+  }
+
+  FPDFText_ClosePage(textpage);
+  UnloadPage(page);
+}
+
 // Test that the page has characters despite a bad stream length.
 TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) {
   ASSERT_TRUE(OpenDocument("bug_57.pdf"));
diff --git a/public/fpdf_text.h b/public/fpdf_text.h
index feb54fb..008c236 100644
--- a/public/fpdf_text.h
+++ b/public/fpdf_text.h
@@ -294,10 +294,13 @@
                                                       int buflen);
 
 // Flags used by FPDFText_FindStart function.
-#define FPDF_MATCHCASE \
-  0x00000001  // If not set, it will not match case by default.
-#define FPDF_MATCHWHOLEWORD \
-  0x00000002  // If not set, it will not match the whole word by default.
+//
+// If not set, it will not match case by default.
+#define FPDF_MATCHCASE 0x00000001
+// If not set, it will not match the whole word by default.
+#define FPDF_MATCHWHOLEWORD 0x00000002
+// If not set, it will skip past the current match to look for the next match.
+#define FPDF_CONSECUTIVE 0x00000004
 
 // Function: FPDFText_FindStart
 //          Start a search.
diff --git a/testing/image_diff/image_diff.cpp b/testing/image_diff/image_diff.cpp
index 806e0c8..3b098b9 100644
--- a/testing/image_diff/image_diff.cpp
+++ b/testing/image_diff/image_diff.cpp
@@ -158,7 +158,7 @@
   int w = std::min(baseline.w(), actual.w());
   int h = std::min(baseline.h(), actual.h());
 
-  // Count occurences of each RGBA pixel value of baseline in the overlap.
+  // Count occurrences of each RGBA pixel value of baseline in the overlap.
   std::map<uint32_t, int32_t> baseline_histogram;
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
diff --git a/testing/resources/find_text_consecutive.in b/testing/resources/find_text_consecutive.in
new file mode 100644
index 0000000..9e35e1d
--- /dev/null
+++ b/testing/resources/find_text_consecutive.in
@@ -0,0 +1,45 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+    >>
+  >>
+  /Contents 5 0 R
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 5 0}} <<
+{{streamlen}}
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(aaaaaaaaaa bbbbbbbbb) Tj
+ET
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/find_text_consecutive.pdf b/testing/resources/find_text_consecutive.pdf
new file mode 100644
index 0000000..5083d75
--- /dev/null
+++ b/testing/resources/find_text_consecutive.pdf
@@ -0,0 +1,57 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+    >>
+  >>
+  /Contents 5 0 R
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+5 0 obj <<
+/Length 51
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(aaaaaaaaaa bbbbbbbbb) Tj
+ET
+endstream
+endobj
+xref
+0 6
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000161 00000 n 
+0000000287 00000 n 
+0000000365 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 6
+>>
+startxref
+465
+%%EOF