Add a test case for FPDFText_GetUnicode() regression.

https://pdfium-review.googlesource.com/80610 regressed text extraction
for bigtable-osdi06.pdf, so the CL got reverted in
https://pdfium-review.googlesource.com/83070. This adds a regression
test with a minimized version of bigtable-osdi06.pdf.

Change-Id: Ieff1cabfd0afffc3ad5d538427a766c55099b7d1
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/83072
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Hui Yingst <nigi@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index cc541cc..8db0969 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -1660,3 +1660,28 @@
 
   UnloadPage(page);
 }
+
+TEST_F(FPDFTextEmbedderTest, BigtableTextExtraction) {
+  constexpr char kExpectedText[] =
+      "{fay,jeff,sanjay,wilsonh,kerr,m3b,tushar,\x02k es,gruber}@google.com";
+  constexpr int kExpectedTextCount = pdfium::size(kExpectedText) - 1;
+
+  ASSERT_TRUE(OpenDocument("bigtable_mini.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  {
+    ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
+    ASSERT_TRUE(text_page);
+    int char_count = FPDFText_CountChars(text_page.get());
+    ASSERT_GE(char_count, 0);
+    ASSERT_EQ(kExpectedTextCount, char_count);
+
+    for (int i = 0; i < kExpectedTextCount; ++i) {
+      EXPECT_EQ(static_cast<uint32_t>(kExpectedText[i]),
+                FPDFText_GetUnicode(text_page.get(), i));
+    }
+  }
+
+  UnloadPage(page);
+}
diff --git a/testing/resources/bigtable_mini.in b/testing/resources/bigtable_mini.in
new file mode 100644
index 0000000..7e80992
--- /dev/null
+++ b/testing/resources/bigtable_mini.in
@@ -0,0 +1,113 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents 4 0 R
+  /MediaBox [0 0 612 792]
+  /Resources <<
+    /ProcSet [/PDF /ImageB /Text]
+    /Font <<
+      /F1 5 0 R
+      /F2 6 0 R
+    >>
+  >>
+>>
+endobj
+{{object 4 0}} <<
+  {{streamlen}}
+>>
+stream
+q BT
+1 0 0 1 250 667 Tm
+/F2 8.96638 Tf
+-243.635 -15.84 Td
+(f)Tj
+/F1 8.96638 Tf
+4.55491 0 Td
+(f)Tj
+2.87476 0 Td
+(ay)Tj
+7.79253 0 Td
+(,jef)Tj
+11.506 0 Td
+(f,sanjay)Tj
+27.4558 0 Td
+(,wilsonh,k)Tj
+37.1801 0 Td
+(err)Tj
+9.58372 0 Td
+(,m3b,tushar)Tj
+41.8537 0 Td
+(,k)Tj
+11.6349 0 Td
+(es,gruber)Tj
+/F2 8.96638 Tf
+32.9694 0 Td
+(g)Tj
+/F1 8.96638 Tf
+4.55491 0 Td
+(@google.com)Tj
+ET Q
+endstream
+endobj
+{{object 5 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 6 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /FirstChar 102
+  /BaseFont /RFSQHQ+CMSY9
+  /FontDescriptor 7 0 R
+  /LastChar 103
+  /Widths [508 508]
+>>
+endobj
+{{object 7 0}} <<
+  /Type /FontDescriptor
+  /Ascent 750
+  /CapHeight 750
+  /CharSet (/braceleft/braceright)
+  /Descent -250
+  /Flags 4
+  /FontBBox [0 -250 440 750]
+  /FontFile3 8 0 R
+  /FontName /RFSQHQ+CMSY9
+  /ItalicAngle 0
+  /StemV 65
+>>
+endobj
+{{object 8 0}} <<
+  /Subtype /Type1C
+  /Filter [/ASCII85Decode /FlateDecode]
+  {{streamlen}}
+>>
+stream
+GhQY<?t!MPA7Xa1EXNBL#h4%k%?@du#q5_5B)EgioL"p)pKs!j9@_A!X@n"A#^s/nr,0aY?!i,5R?>nA
+43BRuTX#t%4ekD2_c]pSd*g?I_(dmV-o3k<:Veup,U50*Ylr.i;$bHCc:fghe5L>1a\`<FkpTR<8hEdi
+.SEJ:>O%`N>>SLd>,:)GT9<BBa2#L+Pa9V1&Ao("^^P</!u&Ql7PhdY9T5#6IePGiOgOaNdLsRg)OHi+
+n+a.f[/4_7lnp:OXP)%6XER"WK`:C2*&F%pl[L]/LH0S#rJk:S[coEjaA0p=l`BI4BP[$LCn09862jKs
+Y)F7W^!5B(h.[kDUY*1W\e@l8mE%N?^8RN2qNUCce!bWPjLtF8=4Zg,R,1!:HR6^V/b\TIh0Z`11`hZZ
+*BO'Za1C[ph)dSm>aE#>k-A'_h4)!bqJD$jjj@/_bu*BN?.Ud@HV0Y&l1Vg$1F)e^]:6ER3IV(Tbj1;S
+BposQ0Df/MDiTcoYO4BSY\&*%<aJs1;J/:.>?>$UG7-pseF)R.T'a;@,PN5BX]sWQf<*o6H:RGZ*f^Pj
+qkd,*mVA#1lAJeH\%d!)GAm3[%_gAF5OB8mr>(pE^5_Fj[i?3Bq0X7/D)6E](`9a_gaUoK~>
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bigtable_mini.pdf b/testing/resources/bigtable_mini.pdf
new file mode 100644
index 0000000..adb3e02
--- /dev/null
+++ b/testing/resources/bigtable_mini.pdf
@@ -0,0 +1,128 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents 4 0 R
+  /MediaBox [0 0 612 792]
+  /Resources <<
+    /ProcSet [/PDF /ImageB /Text]
+    /Font <<
+      /F1 5 0 R
+      /F2 6 0 R
+    >>
+  >>
+>>
+endobj
+4 0 obj <<
+  /Length 374
+>>
+stream
+q BT
+1 0 0 1 250 667 Tm
+/F2 8.96638 Tf
+-243.635 -15.84 Td
+(f)Tj
+/F1 8.96638 Tf
+4.55491 0 Td
+(f)Tj
+2.87476 0 Td
+(ay)Tj
+7.79253 0 Td
+(,jef)Tj
+11.506 0 Td
+(f,sanjay)Tj
+27.4558 0 Td
+(,wilsonh,k)Tj
+37.1801 0 Td
+(err)Tj
+9.58372 0 Td
+(,m3b,tushar)Tj
+41.8537 0 Td
+(,k)Tj
+11.6349 0 Td
+(es,gruber)Tj
+/F2 8.96638 Tf
+32.9694 0 Td
+(g)Tj
+/F1 8.96638 Tf
+4.55491 0 Td
+(@google.com)Tj
+ET Q
+endstream
+endobj
+5 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+6 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /FirstChar 102
+  /BaseFont /RFSQHQ+CMSY9
+  /FontDescriptor 7 0 R
+  /LastChar 103
+  /Widths [508 508]
+>>
+endobj
+7 0 obj <<
+  /Type /FontDescriptor
+  /Ascent 750
+  /CapHeight 750
+  /CharSet (/braceleft/braceright)
+  /Descent -250
+  /Flags 4
+  /FontBBox [0 -250 440 750]
+  /FontFile3 8 0 R
+  /FontName /RFSQHQ+CMSY9
+  /ItalicAngle 0
+  /StemV 65
+>>
+endobj
+8 0 obj <<
+  /Subtype /Type1C
+  /Filter [/ASCII85Decode /FlateDecode]
+  /Length 640
+>>
+stream
+GhQY<?t!MPA7Xa1EXNBL#h4%k%?@du#q5_5B)EgioL"p)pKs!j9@_A!X@n"A#^s/nr,0aY?!i,5R?>nA
+43BRuTX#t%4ekD2_c]pSd*g?I_(dmV-o3k<:Veup,U50*Ylr.i;$bHCc:fghe5L>1a\`<FkpTR<8hEdi
+.SEJ:>O%`N>>SLd>,:)GT9<BBa2#L+Pa9V1&Ao("^^P</!u&Ql7PhdY9T5#6IePGiOgOaNdLsRg)OHi+
+n+a.f[/4_7lnp:OXP)%6XER"WK`:C2*&F%pl[L]/LH0S#rJk:S[coEjaA0p=l`BI4BP[$LCn09862jKs
+Y)F7W^!5B(h.[kDUY*1W\e@l8mE%N?^8RN2qNUCce!bWPjLtF8=4Zg,R,1!:HR6^V/b\TIh0Z`11`hZZ
+*BO'Za1C[ph)dSm>aE#>k-A'_h4)!bqJD$jjj@/_bu*BN?.Ud@HV0Y&l1Vg$1F)e^]:6ER3IV(Tbj1;S
+BposQ0Df/MDiTcoYO4BSY\&*%<aJs1;J/:.>?>$UG7-pseF)R.T'a;@,PN5BX]sWQf<*o6H:RGZ*f^Pj
+qkd,*mVA#1lAJeH\%d!)GAm3[%_gAF5OB8mr>(pE^5_Fj[i?3Bq0X7/D)6E](`9a_gaUoK~>
+endstream
+endobj
+xref
+0 9
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000131 00000 n 
+0000000333 00000 n 
+0000000759 00000 n 
+0000000837 00000 n 
+0000000993 00000 n 
+0000001234 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 9
+>>
+startxref
+1985
+%%EOF