Make FPDF_StructElement_GetTitle() handle UTF-16BE correctly.

Modify tagged_alt_text.pdf and its source file to add a title with
UTF-16BE encoding. Then enhance FPDFStructTreeEmbedderTest to read out
the value. To make the test pass, get the title from its dictionary as
Unicode text instead of a ByteString.

Also change CPDF_StructElement to stop storing the title, and get the
value from the dictionary directly in its GetTitle() method as needed.
Along the way, further make the code more consistent by removing
checks that assume CPDF_StructElement::GetDict() can return nullptr.

BUG=pdfium:1298

Change-Id: I8fc89b88315181dc4770c9cc24b4aa292b54d068
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/54570
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index c42faa3..1e7e5dc 100644
--- a/core/fpdfdoc/cpdf_structelement.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -33,8 +33,7 @@
     : m_pTree(pTree),
       m_pParent(pParent),
       m_pDict(pDict),
-      m_Type(pDict->GetStringFor("S")),
-      m_Title(pDict->GetStringFor("T")) {
+      m_Type(pDict->GetStringFor("S")) {
   if (pTree->GetRoleMap()) {
     ByteString mapped = pTree->GetRoleMap()->GetStringFor(m_Type);
     if (!mapped.IsEmpty())
@@ -45,6 +44,10 @@
 
 CPDF_StructElement::~CPDF_StructElement() = default;
 
+WideString CPDF_StructElement::GetTitle() const {
+  return GetDict()->GetUnicodeTextFor("T");
+}
+
 size_t CPDF_StructElement::CountKids() const {
   return m_Kids.size();
 }
diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h
index 2dcdc97..359662e 100644
--- a/core/fpdfdoc/cpdf_structelement.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -39,7 +39,9 @@
   friend RetainPtr<T> pdfium::MakeRetain(Args&&... args);
 
   ByteString GetType() const { return m_Type; }
-  ByteString GetTitle() const { return m_Title; }
+  WideString GetTitle() const;
+
+  // Never returns nullptr.
   const CPDF_Dictionary* GetDict() const { return m_pDict.Get(); }
 
   size_t CountKids() const;
@@ -61,7 +63,6 @@
   UnownedPtr<CPDF_StructElement> const m_pParent;
   UnownedPtr<const CPDF_Dictionary> const m_pDict;
   ByteString m_Type;
-  ByteString m_Title;
   std::vector<CPDF_StructKid> m_Kids;
 };
 
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
index 6623f39..d0b0972 100644
--- a/fpdfsdk/fpdf_structtree.cpp
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -74,18 +74,16 @@
                               unsigned long buflen) {
   CPDF_StructElement* elem =
       CPDFStructElementFromFPDFStructElement(struct_element);
-  return (elem && elem->GetDict())
-             ? WideStringToBuffer(elem->GetDict()->GetUnicodeTextFor("Alt"),
-                                  buffer, buflen)
-             : 0;
+  return elem ? WideStringToBuffer(elem->GetDict()->GetUnicodeTextFor("Alt"),
+                                   buffer, buflen)
+              : 0;
 }
 
 FPDF_EXPORT int FPDF_CALLCONV
 FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element) {
   CPDF_StructElement* elem =
       CPDFStructElementFromFPDFStructElement(struct_element);
-  const CPDF_Object* p =
-      (elem && elem->GetDict()) ? elem->GetDict()->GetObjectFor("K") : nullptr;
+  const CPDF_Object* p = elem ? elem->GetDict()->GetObjectFor("K") : nullptr;
   return p && p->IsNumber() ? p->GetInteger() : -1;
 }
 
@@ -107,10 +105,7 @@
                             unsigned long buflen) {
   CPDF_StructElement* elem =
       CPDFStructElementFromFPDFStructElement(struct_element);
-  return elem ? WideStringToBuffer(
-                    WideString::FromUTF8(elem->GetTitle().AsStringView()),
-                    buffer, buflen)
-              : 0;
+  return elem ? WideStringToBuffer(elem->GetTitle(), buffer, buflen) : 0;
 }
 
 FPDF_EXPORT int FPDF_CALLCONV
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
index c68e2eb..bbaa115 100644
--- a/fpdfsdk/fpdf_structtree_embeddertest.cpp
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -144,7 +144,7 @@
     ASSERT_NE(nullptr, element);
 
     // test nullptr inputs
-    unsigned short buffer[12];
+    unsigned short buffer[13];
     ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, buffer, sizeof(buffer)));
     ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, nullptr, 0));
     ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, nullptr, 0));
@@ -162,6 +162,17 @@
     const wchar_t kExpected[] = L"TitleText";
     EXPECT_EQ(WideString(kExpected),
               WideString::FromUTF16LE(buffer, FXSYS_len(kExpected)));
+
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
+    FPDF_STRUCTELEMENT child_element =
+        FPDF_StructElement_GetChildAtIndex(element, 0);
+    ASSERT_NE(nullptr, element);
+
+    ASSERT_EQ(26U, FPDF_StructElement_GetTitle(child_element, buffer,
+                                               sizeof(buffer)));
+    const wchar_t kChildExpected[] = L"symbol: 100k";
+    EXPECT_EQ(WideString(kChildExpected),
+              WideString::FromUTF16LE(buffer, FXSYS_len(kChildExpected)));
   }
 
   UnloadPage(page);
diff --git a/testing/resources/tagged_alt_text.in b/testing/resources/tagged_alt_text.in
index 7ea5ac0..42ec7f1 100644
--- a/testing/resources/tagged_alt_text.in
+++ b/testing/resources/tagged_alt_text.in
@@ -128,8 +128,8 @@
   /Type /StructElem
   /S /Document
   /K [12 0 R]
-  /T (TitleText)
   /P 8 0 R
+  /T (TitleText)
   /Pg 3 0 R
 >>
 endobj
@@ -139,6 +139,7 @@
   /A 14 0 R
   /K [10 0 R]
   /P 11 0 R
+  /T <feff00730079006d0062006f006c003a0020003100300030006b>
   /Pg 3 0 R
 >>
 endobj
diff --git a/testing/resources/tagged_alt_text.pdf b/testing/resources/tagged_alt_text.pdf
index 73b4988..c75504a 100644
--- a/testing/resources/tagged_alt_text.pdf
+++ b/testing/resources/tagged_alt_text.pdf
@@ -129,8 +129,8 @@
   /Type /StructElem
   /S /Document
   /K [12 0 R]
-  /T (TitleText)
   /P 8 0 R
+  /T (TitleText)
   /Pg 3 0 R
 >>
 endobj
@@ -140,6 +140,7 @@
   /A 14 0 R
   /K [10 0 R]
   /P 11 0 R
+  /T <feff00730079006d0062006f006c003a0020003100300030006b>
   /Pg 3 0 R
 >>
 endobj
@@ -171,12 +172,12 @@
 0000001454 00000 n 
 0000001612 00000 n 
 0000001723 00000 n 
-0000001830 00000 n 
-0000001948 00000 n 
+0000001890 00000 n 
+0000002008 00000 n 
 trailer <<
   /Root 1 0 R
   /Size 15
 >>
 startxref
-2003
+2063
 %%EOF