Implement FPDFText_GetMatrix().

Add a public API to get the transformation matrix for characters in a
FPDF_TEXTPAGE. Include a new font_matrix.pdf test case, where fonts can
have a size of 1, even though they are visually rendered as though they
have a size of 12.

Bug: pdfium:1445
Change-Id: I2f091eeb839d94d5c93b13b8ae755d0e520f5f40
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/65353
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp
index 7c18f17..078bc93 100644
--- a/fpdfsdk/fpdf_text.cpp
+++ b/fpdfsdk/fpdf_text.cpp
@@ -283,6 +283,21 @@
   return true;
 }
 
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetMatrix(FPDF_TEXTPAGE text_page,
+                                                       int index,
+                                                       FS_MATRIX* matrix) {
+  if (!matrix)
+    return false;
+
+  CPDF_TextPage* textpage = GetTextPageForValidIndex(text_page, index);
+  if (!textpage)
+    return false;
+
+  const CPDF_TextPage::CharInfo& charinfo = textpage->GetCharInfo(index);
+  *matrix = FSMatrixFromCFXMatrix(charinfo.m_Matrix);
+  return true;
+}
+
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
 FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page,
                        int index,
@@ -298,7 +313,6 @@
   return true;
 }
 
-// select
 FPDF_EXPORT int FPDF_CALLCONV
 FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
                            double x,
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index 001c0be..06ff4cc 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -1464,3 +1464,87 @@
   FPDFText_ClosePage(text_page);
   UnloadPage(page);
 }
+
+TEST_F(FPDFTextEmbedderTest, GetMatrix) {
+  constexpr char kExpectedText[] = "A1\r\nA2\r\nA3";
+  constexpr size_t kExpectedTextSize = FX_ArraySize(kExpectedText);
+  constexpr FS_MATRIX kExpectedMatrices[] = {
+      {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
+      {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
+      {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
+      {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
+      {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
+      {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
+      {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
+      {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
+      {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
+      {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
+  };
+  constexpr size_t kExpectedCount = FX_ArraySize(kExpectedMatrices);
+  static_assert(kExpectedCount + 1 == kExpectedTextSize,
+                "Bad expected matrix size");
+
+  // For a size 12 letter 'A'.
+  constexpr double kExpectedCharWidth = 8.436;
+  constexpr double kExpectedCharHeight = 6.77;
+
+  ASSERT_TRUE(OpenDocument("font_matrix.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  {
+    ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
+    ASSERT_TRUE(text_page);
+    ASSERT_EQ(static_cast<int>(kExpectedCount),
+              FPDFText_CountChars(text_page.get()));
+
+    {
+      // Check the characters.
+      unsigned short buffer[kExpectedTextSize];
+      ASSERT_EQ(static_cast<int>(kExpectedTextSize),
+                FPDFText_GetText(text_page.get(), 0, kExpectedCount, buffer));
+      EXPECT_TRUE(
+          check_unsigned_shorts(kExpectedText, buffer, kExpectedTextSize));
+    }
+
+    {
+      // Check the character box size.
+      double left;
+      double right;
+      double bottom;
+      double top;
+      ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 0, &left, &right,
+                                      &bottom, &top));
+      EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
+      EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
+      ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 4, &left, &right,
+                                      &bottom, &top));
+      EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
+      EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
+      ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 8, &left, &right,
+                                      &bottom, &top));
+      EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
+      EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
+    }
+
+    // Check the character matrix.
+    FS_MATRIX matrix;
+    for (size_t i = 0; i < kExpectedCount; ++i) {
+      ASSERT_TRUE(FPDFText_GetMatrix(text_page.get(), i, &matrix)) << i;
+      EXPECT_FLOAT_EQ(kExpectedMatrices[i].a, matrix.a) << i;
+      EXPECT_FLOAT_EQ(kExpectedMatrices[i].b, matrix.b) << i;
+      EXPECT_FLOAT_EQ(kExpectedMatrices[i].c, matrix.c) << i;
+      EXPECT_FLOAT_EQ(kExpectedMatrices[i].d, matrix.d) << i;
+      EXPECT_FLOAT_EQ(kExpectedMatrices[i].e, matrix.e) << i;
+      EXPECT_FLOAT_EQ(kExpectedMatrices[i].f, matrix.f) << i;
+    }
+
+    // Check bad parameters.
+    EXPECT_FALSE(FPDFText_GetMatrix(nullptr, 0, &matrix));
+    EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 10, &matrix));
+    EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), -1, &matrix));
+    EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 0, nullptr));
+  }
+
+  UnloadPage(page);
+}
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 89ca154..f8a3f1e 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -338,6 +338,7 @@
     CHK(FPDFText_GetFontSize);
     CHK(FPDFText_GetFontWeight);
     CHK(FPDFText_GetLooseCharBox);
+    CHK(FPDFText_GetMatrix);
     CHK(FPDFText_GetRect);
     CHK(FPDFText_GetSchCount);
     CHK(FPDFText_GetSchResultIndex);
diff --git a/public/fpdf_text.h b/public/fpdf_text.h
index 4129282..65554e4 100644
--- a/public/fpdf_text.h
+++ b/public/fpdf_text.h
@@ -262,6 +262,24 @@
 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
 FPDFText_GetLooseCharBox(FPDF_TEXTPAGE text_page, int index, FS_RECTF* rect);
 
+// Experimental API.
+// Function: FPDFText_GetMatrix
+//          Get the effective transformation matrix for a particular character.
+// Parameters:
+//          text_page   -   Handle to a text page information structure.
+//                          Returned by FPDFText_LoadPage().
+//          index       -   Zero-based index of the character.
+//          matrix      -   Pointer to a FS_MATRIX receiving the transformation
+//                          matrix.
+// Return Value:
+//          On success, return TRUE and fill in |matrix|. If |text_page| is
+//          invalid, or if |index| is out of bounds, or if |matrix| is NULL,
+//          then return FALSE, and |matrix| remains unmodified.
+//
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetMatrix(FPDF_TEXTPAGE text_page,
+                                                       int index,
+                                                       FS_MATRIX* matrix);
+
 // Function: FPDFText_GetCharOrigin
 //          Get origin of a particular character.
 // Parameters:
diff --git a/testing/resources/font_matrix.in b/testing/resources/font_matrix.in
new file mode 100644
index 0000000..6057e86
--- /dev/null
+++ b/testing/resources/font_matrix.in
@@ -0,0 +1,59 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+    >>
+  >>
+  /Contents 5 0 R
+  /MediaBox [ 0 0 100 200 ]
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 5 0}} <<
+  {{streamlen}}
+>>
+stream
+q
+12 0 0 10 30 40 cm
+BT
+3 5 Td /F1 1 Tf (A1) Tj
+ET
+Q
+q
+1 0 0 1 20 60 cm
+BT
+12 0 0 10 30 20 Tm
+-1 -2 Td /F1 1 Tf (A2) Tj
+ET
+Q
+q
+1 0 0 0.833333 10 80 cm
+BT
+50 60 Td /F1 12 Tf (A3) Tj
+ET
+Q
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/font_matrix.pdf b/testing/resources/font_matrix.pdf
new file mode 100644
index 0000000..435a126
--- /dev/null
+++ b/testing/resources/font_matrix.pdf
@@ -0,0 +1,71 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+    >>
+  >>
+  /Contents 5 0 R
+  /MediaBox [ 0 0 100 200 ]
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+5 0 obj <<
+  /Length 186
+>>
+stream
+q
+12 0 0 10 30 40 cm
+BT
+3 5 Td /F1 1 Tf (A1) Tj
+ET
+Q
+q
+1 0 0 1 20 60 cm
+BT
+12 0 0 10 30 20 Tm
+-1 -2 Td /F1 1 Tf (A2) Tj
+ET
+Q
+q
+1 0 0 0.833333 10 80 cm
+BT
+50 60 Td /F1 12 Tf (A3) Tj
+ET
+Q
+endstream
+endobj
+xref
+0 6
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000133 00000 n 
+0000000287 00000 n 
+0000000365 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 6
+>>
+startxref
+603
+%%EOF