Add FPDFText_FindStart() test case with non-ASCII characters. Illustrate a bug where FPDFText_FindStart() and related functions do not get the correct results on Windows. Bug: pdfium:1370 Change-Id: Ibafe0569c5d225d21b1518334c5c6653596bde3a Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/57931 Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp index d0e63ac..e211d2d 100644 --- a/fpdfsdk/fpdf_text_embeddertest.cpp +++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -7,6 +7,7 @@ #include <utility> #include <vector> +#include "build/build_config.h" #include "core/fxcrt/fx_memory.h" #include "core/fxge/fx_font.h" #include "public/cpp/fpdf_scopers.h" @@ -392,6 +393,45 @@ UnloadPage(page); } +// Fails on Windows. https://crbug.com/pdfium/1370 +#if defined(OS_WIN) +#define MAYBE_TextSearchLatinExtended DISABLED_TextSearchLatinExtended +#else +#define MAYBE_TextSearchLatinExtended TextSearchLatinExtended +#endif +TEST_F(FPDFTextEmbedderTest, MAYBE_TextSearchLatinExtended) { + ASSERT_TRUE(OpenDocument("latin_extended.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + ASSERT_TRUE(textpage); + + // Upper/lowercase 'a' with breve. + constexpr FPDF_WCHAR kNeedleUpper[] = {0x0102, 0x0000}; + constexpr FPDF_WCHAR kNeedleLower[] = {0x0103, 0x0000}; + + for (const auto* needle : {kNeedleUpper, kNeedleLower}) { + ScopedFPDFTextFind search(FPDFText_FindStart(textpage, needle, 0, 0)); + EXPECT_TRUE(search); + EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get())); + EXPECT_EQ(0, FPDFText_GetSchCount(search.get())); + + // Should find 2 results at position 21/22, both with length 1. + EXPECT_TRUE(FPDFText_FindNext(search.get())); + EXPECT_EQ(2, FPDFText_GetSchResultIndex(search.get())); + EXPECT_EQ(1, FPDFText_GetSchCount(search.get())); + EXPECT_TRUE(FPDFText_FindNext(search.get())); + EXPECT_EQ(3, FPDFText_GetSchResultIndex(search.get())); + EXPECT_EQ(1, FPDFText_GetSchCount(search.get())); + // And no more than 2 results. + EXPECT_FALSE(FPDFText_FindNext(search.get())); + } + + FPDFText_ClosePage(textpage); + UnloadPage(page); +} + // Test that the page has characters despite a bad stream length. TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) { ASSERT_TRUE(OpenDocument("bug_57.pdf"));
diff --git a/testing/resources/latin_extended.pdf b/testing/resources/latin_extended.pdf new file mode 100644 index 0000000..6c34ab0 --- /dev/null +++ b/testing/resources/latin_extended.pdf Binary files differ