Add FPDF_StructElement_GetObjType

Get the object type (/Type) for a given element. Acrobat does not
enforce /Type field to be 'StructElement' string. It renders any value
in /Type. Therefore adding this allows us to see and validate that.

Change-Id: I0808b0c072cbe561e0aa2a5d1a9128a0bcd41422
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/90570
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index 56cd410..dda016b 100644
--- a/core/fpdfdoc/cpdf_structelement.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -49,6 +49,10 @@
 
 CPDF_StructElement::~CPDF_StructElement() = default;
 
+ByteString CPDF_StructElement::GetObjType() const {
+  return GetDict()->GetStringFor("Type");
+}
+
 WideString CPDF_StructElement::GetAltText() const {
   return GetDict()->GetUnicodeTextFor("Alt");
 }
diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h
index 5aac005..efeb2dc 100644
--- a/core/fpdfdoc/cpdf_structelement.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -22,6 +22,7 @@
   CONSTRUCT_VIA_MAKE_RETAIN;
 
   ByteString GetType() const { return m_Type; }
+  ByteString GetObjType() const;
   WideString GetAltText() const;
   WideString GetActualText() const;
   WideString GetTitle() const;
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
index 5dc4ec1..a8800cd 100644
--- a/fpdfsdk/fpdf_structtree.cpp
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -166,6 +166,18 @@
 }
 
 FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,
+                              void* buffer,
+                              unsigned long buflen) {
+  CPDF_StructElement* elem =
+      CPDFStructElementFromFPDFStructElement(struct_element);
+  return elem ? WideStringToBuffer(
+                    WideString::FromUTF8(elem->GetObjType().AsStringView()),
+                    buffer, buflen)
+              : 0;
+}
+
+FPDF_EXPORT unsigned long FPDF_CALLCONV
 FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
                             void* buffer,
                             unsigned long buflen) {
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
index 6a57631..bc26022 100644
--- a/fpdfsdk/fpdf_structtree_embeddertest.cpp
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -368,6 +368,58 @@
   UnloadPage(page);
 }
 
+TEST_F(FPDFStructTreeEmbedderTest, GetObjType) {
+  ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  {
+    ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
+    ASSERT_TRUE(struct_tree);
+    ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
+
+    FPDF_STRUCTELEMENT child =
+        FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
+    ASSERT_TRUE(child);
+
+    // test nullptr inputs
+    unsigned short buffer[28] = {};
+    ASSERT_EQ(0U,
+              FPDF_StructElement_GetObjType(nullptr, buffer, sizeof(buffer)));
+    ASSERT_EQ(0U, FPDF_StructElement_GetObjType(nullptr, nullptr, 0));
+    ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, nullptr, 0));
+
+    // Deliberately pass in a small buffer size to make sure `buffer` remains
+    // untouched.
+    ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, buffer, 1));
+    for (size_t i = 0; i < pdfium::size(buffer); ++i)
+      EXPECT_EQ(0U, buffer[i]);
+
+    ASSERT_EQ(22U,
+              FPDF_StructElement_GetObjType(child, buffer, sizeof(buffer)));
+    EXPECT_EQ(L"StructElem", GetPlatformWString(buffer));
+
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(child));
+    FPDF_STRUCTELEMENT gchild = FPDF_StructElement_GetChildAtIndex(child, 0);
+    memset(buffer, 0, sizeof(buffer));
+    // Missing /Type in `gchild`
+    ASSERT_EQ(0U,
+              FPDF_StructElement_GetObjType(gchild, buffer, sizeof(buffer)));
+    // Buffer is untouched.
+    for (size_t i = 0; i < pdfium::size(buffer); ++i)
+      EXPECT_EQ(0U, buffer[i]);
+
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild));
+    FPDF_STRUCTELEMENT ggchild = FPDF_StructElement_GetChildAtIndex(gchild, 0);
+    ASSERT_EQ(28U,
+              FPDF_StructElement_GetObjType(ggchild, buffer, sizeof(buffer)));
+    // Reading bad elem also works.
+    EXPECT_EQ(L"NotStructElem", GetPlatformWString(buffer));
+  }
+
+  UnloadPage(page);
+}
+
 TEST_F(FPDFStructTreeEmbedderTest, GetTitle) {
   ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
   FPDF_PAGE page = LoadPage(0);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 99c53a3..3d75908 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -359,6 +359,7 @@
     CHK(FPDF_StructElement_GetID);
     CHK(FPDF_StructElement_GetLang);
     CHK(FPDF_StructElement_GetMarkedContentID);
+    CHK(FPDF_StructElement_GetObjType);
     CHK(FPDF_StructElement_GetStringAttribute);
     CHK(FPDF_StructElement_GetTitle);
     CHK(FPDF_StructElement_GetType);
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
index 4784bd4..8daccdb 100644
--- a/public/fpdf_structtree.h
+++ b/public/fpdf_structtree.h
@@ -190,6 +190,27 @@
                            void* buffer,
                            unsigned long buflen);
 
+// Experimental API.
+// Function: FPDF_StructElement_GetObjType
+//           Get the object type (/Type) for a given element.
+// Parameters:
+//           struct_element - Handle to the struct element.
+//           buffer         - A buffer for output. May be NULL.
+//           buflen         - The length of the buffer, in bytes. May be 0.
+// Return value:
+//           The number of bytes in the object type, including the terminating
+//           NUL character. The number of bytes is returned regardless of the
+//           |buffer| and |buflen| parameters.
+// Comments:
+//           Regardless of the platform, the |buffer| is always in UTF-16LE
+//           encoding. The string is terminated by a UTF16 NUL character. If
+//           |buflen| is less than the required length, or |buffer| is NULL,
+//           |buffer| will not be modified.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,
+                              void* buffer,
+                              unsigned long buflen);
+
 // Function: FPDF_StructElement_GetTitle
 //           Get the title (/T) for a given element.
 // Parameters: