Add FPDFText_FindStart() test case with non-ASCII characters.
Illustrate a bug where FPDFText_FindStart() and related functions do not
get the correct results on Windows.
Bug: pdfium:1370
Change-Id: Ibafe0569c5d225d21b1518334c5c6653596bde3a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/57931
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index d0e63ac..e211d2d 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -7,6 +7,7 @@
#include <utility>
#include <vector>
+#include "build/build_config.h"
#include "core/fxcrt/fx_memory.h"
#include "core/fxge/fx_font.h"
#include "public/cpp/fpdf_scopers.h"
@@ -392,6 +393,45 @@
UnloadPage(page);
}
+// Fails on Windows. https://crbug.com/pdfium/1370
+#if defined(OS_WIN)
+#define MAYBE_TextSearchLatinExtended DISABLED_TextSearchLatinExtended
+#else
+#define MAYBE_TextSearchLatinExtended TextSearchLatinExtended
+#endif
+TEST_F(FPDFTextEmbedderTest, MAYBE_TextSearchLatinExtended) {
+ ASSERT_TRUE(OpenDocument("latin_extended.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+ ASSERT_TRUE(textpage);
+
+ // Upper/lowercase 'a' with breve.
+ constexpr FPDF_WCHAR kNeedleUpper[] = {0x0102, 0x0000};
+ constexpr FPDF_WCHAR kNeedleLower[] = {0x0103, 0x0000};
+
+ for (const auto* needle : {kNeedleUpper, kNeedleLower}) {
+ ScopedFPDFTextFind search(FPDFText_FindStart(textpage, needle, 0, 0));
+ EXPECT_TRUE(search);
+ EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
+
+ // Should find 2 results at position 21/22, both with length 1.
+ EXPECT_TRUE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(2, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
+ EXPECT_TRUE(FPDFText_FindNext(search.get()));
+ EXPECT_EQ(3, FPDFText_GetSchResultIndex(search.get()));
+ EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
+ // And no more than 2 results.
+ EXPECT_FALSE(FPDFText_FindNext(search.get()));
+ }
+
+ FPDFText_ClosePage(textpage);
+ UnloadPage(page);
+}
+
// Test that the page has characters despite a bad stream length.
TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) {
ASSERT_TRUE(OpenDocument("bug_57.pdf"));
diff --git a/testing/resources/latin_extended.pdf b/testing/resources/latin_extended.pdf
new file mode 100644
index 0000000..6c34ab0
--- /dev/null
+++ b/testing/resources/latin_extended.pdf
Binary files differ