Add title (/T) extraction for PDF tagged structures

This CL adds the ability to extract the title from a tagged structure element if
one exists.

Bug: pdfium:672
Change-Id: I22e2a8371db4f08b8a70dd77002f1befab97f530
Reviewed-on: https://pdfium-review.googlesource.com/3819
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: dsinclair <dsinclair@chromium.org>
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index 137d5b3..c85ae0d 100644
--- a/core/fpdfdoc/cpdf_structelement.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -33,7 +33,8 @@
     : m_pTree(pTree),
       m_pParent(pParent),
       m_pDict(pDict),
-      m_Type(pDict->GetStringFor("S")) {
+      m_Type(pDict->GetStringFor("S")),
+      m_Title(pDict->GetStringFor("T")) {
   if (pTree->GetRoleMap()) {
     CFX_ByteString mapped = pTree->GetRoleMap()->GetStringFor(m_Type);
     if (!mapped.IsEmpty())
diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h
index ba0685e..c65363d 100644
--- a/core/fpdfdoc/cpdf_structelement.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -39,6 +39,7 @@
   friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
 
   const CFX_ByteString& GetType() const { return m_Type; }
+  const CFX_ByteString& GetTitle() const { return m_Title; }
   CPDF_Dictionary* GetDict() const { return m_pDict; }
 
   int CountKids() const;
@@ -58,6 +59,7 @@
   CPDF_StructElement* const m_pParent;
   CPDF_Dictionary* const m_pDict;
   CFX_ByteString m_Type;
+  CFX_ByteString m_Title;
   std::vector<CPDF_StructKid> m_Kids;
 };
 
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
index 96d40b4..74c44f8 100644
--- a/fpdfsdk/fpdf_structtree.cpp
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -83,6 +83,16 @@
               : 0;
 }
 
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
+                            void* buffer,
+                            unsigned long buflen) {
+  CPDF_StructElement* elem = ToStructTreeElement(struct_element);
+  return elem
+             ? WideStringToBuffer(elem->GetTitle().UTF8Decode(), buffer, buflen)
+             : 0;
+}
+
 DLLEXPORT int STDCALL
 FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
   CPDF_StructElement* elem = ToStructTreeElement(struct_element);
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
index 6f85d42..9cf46cc 100644
--- a/public/fpdf_structtree.h
+++ b/public/fpdf_structtree.h
@@ -93,6 +93,26 @@
                            void* buffer,
                            unsigned long buflen);
 
+// Function: FPDF_StructElement_GetTitle
+//           Get the title (/T) for a given element.
+// Parameters:
+//           struct_element - Handle to the struct element.
+//           buffer         - A buffer for output. May be NULL.
+//           buflen         - The length of the buffer, in bytes. May be 0.
+// Return value:
+//           The number of bytes in the title, including the terminating NUL
+//           character. The number of bytes is returned regardless of the
+//           |buffer| and |buflen| parameters.
+// Comments:
+//           Regardless of the platform, the |buffer| is always in UTF-16LE
+//           encoding. The string is terminated by a UTF16 NUL character. If
+//           |buflen| is less than the required length, or |buffer| is NULL,
+//           |buffer| will not be modified.
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
+                            void* buffer,
+                            unsigned long buflen);
+
 // Function: FPDF_StructElement_CountChildren
 //          Count the number of children for the structure element.
 // Parameters:
diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc
index 1dc76fe..d2b3c01 100644
--- a/samples/pdfium_test.cc
+++ b/samples/pdfium_test.cc
@@ -641,6 +641,11 @@
   printf("%*s%ls", indent * 2, "", ConvertToWString(buf, len).c_str());
 
   memset(buf, 0, sizeof(buf));
+  len = FPDF_StructElement_GetTitle(child, buf, kBufSize);
+  if (len > 0)
+    printf(": '%ls'", ConvertToWString(buf, len).c_str());
+
+  memset(buf, 0, sizeof(buf));
   len = FPDF_StructElement_GetAltText(child, buf, kBufSize);
   if (len > 0)
     printf(" (%ls)", ConvertToWString(buf, len).c_str());