Add regression test for /ActualText parsing
Use [1] as a starting point, handwrite a small PDF for testing and use
it in an embedder test.
[1] https://pdfium-review.googlesource.com/c/pdfium/+/127410/1
Bug: 384770169
Change-Id: Ia714d269c9235e1de312eeef5937a011fa9e11ce
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/128591
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index 92ac221..af9f307 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -2059,6 +2059,24 @@
ElementsAreArray(kNeedsImprovementResult));
}
+TEST_F(FPDFTextEmbedderTest, Bug384770169) {
+ ASSERT_TRUE(OpenDocument("bug_384770169.pdf"));
+ ScopedEmbedderTestPage page = LoadScopedPage(0);
+ ASSERT_TRUE(page);
+
+ ScopedFPDFTextPage textpage(FPDFText_LoadPage(page.get()));
+ ASSERT_TRUE(textpage);
+
+ static constexpr char kExpected[] = "What is my favorite food?";
+ // Includes trailing NUL character.
+ static constexpr int kExpectedSize = sizeof(kExpected);
+ unsigned short buffer[256] = {};
+ EXPECT_EQ(kExpectedSize,
+ FPDFText_GetText(textpage.get(), 0, std::size(buffer), buffer));
+ EXPECT_THAT(pdfium::make_span(buffer).first(kExpectedSize),
+ ElementsAreArray(kExpected));
+}
+
TEST_F(FPDFTextEmbedderTest, TextObjectSetIsActive) {
ASSERT_TRUE(OpenDocument("hello_world.pdf"));
ScopedEmbedderTestPage page = LoadScopedPage(0);
diff --git a/testing/resources/bug_384770169.in b/testing/resources/bug_384770169.in
new file mode 100644
index 0000000..313d327
--- /dev/null
+++ b/testing/resources/bug_384770169.in
@@ -0,0 +1,53 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /Count 1
+ /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Contents 4 0 R
+ /Parent 2 0 R
+ /MediaBox [0 0 200 200]
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ >>
+ >>
+>>
+endobj
+{{object 4 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+/F1 12 Tf
+1 0 0 1 20 100 Tm
+(What is) Tj
+/Span<</ActualText (my) >> BDC
+50 0 Td (your) Tj
+EMC
+30 0 Td (favorite) Tj
+/Span<</ActualText (food) >> BDC
+50 0 Td (color) Tj
+EMC
+(?) Tj
+ET
+endstream
+endobj
+{{object 5 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_384770169.pdf b/testing/resources/bug_384770169.pdf
new file mode 100644
index 0000000..5c7cfc0
--- /dev/null
+++ b/testing/resources/bug_384770169.pdf
@@ -0,0 +1,65 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /Count 1
+ /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Contents 4 0 R
+ /Parent 2 0 R
+ /MediaBox [0 0 200 200]
+ /Resources <<
+ /Font <<
+ /F1 5 0 R
+ >>
+ >>
+>>
+endobj
+4 0 obj <<
+ /Length 185
+>>
+stream
+BT
+/F1 12 Tf
+1 0 0 1 20 100 Tm
+(What is) Tj
+/Span<</ActualText (my) >> BDC
+50 0 Td (your) Tj
+EMC
+30 0 Td (favorite) Tj
+/Span<</ActualText (food) >> BDC
+50 0 Td (color) Tj
+EMC
+(?) Tj
+ET
+endstream
+endobj
+5 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Helvetica
+>>
+endobj
+xref
+0 6
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000131 00000 n
+0000000283 00000 n
+0000000520 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 6
+>>
+startxref
+596
+%%EOF