Implement FPDF_StructElement_GetChildMarkedContentID

Similarly to FPDF_StructElement_GetChildAtIndex, it retrieves child's
content id if the child is a page/stream.

Change-Id: I8f4f65e02e01e47b5af0b52968279d910402a376
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/112790
Commit-Queue: Lei Zhang <thestig@chromium.org>
Auto-Submit: Dmitry Panteleev <dpantele@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index 95e887f..18ca39e 100644
--- a/core/fpdfdoc/cpdf_structelement.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -87,6 +87,13 @@
                                                : nullptr;
 }
 
+int CPDF_StructElement::GetKidContentId(size_t index) const {
+  return m_Kids[index].m_Type == Kid::kStreamContent ||
+                 m_Kids[index].m_Type == Kid::kPageContent
+             ? m_Kids[index].m_ContentId
+             : -1;
+}
+
 bool CPDF_StructElement::UpdateKidIfElement(const CPDF_Dictionary* pDict,
                                             CPDF_StructElement* pElement) {
   bool bSave = false;
diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h
index 72a0df3..95c3f9e 100644
--- a/core/fpdfdoc/cpdf_structelement.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -34,6 +34,7 @@
 
   size_t CountKids() const;
   CPDF_StructElement* GetKidIfElement(size_t index) const;
+  int GetKidContentId(size_t index) const;
   bool UpdateKidIfElement(const CPDF_Dictionary* pDict,
                           CPDF_StructElement* pElement);
 
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
index 4817f47..8e0c9b9 100644
--- a/fpdfsdk/fpdf_structtree.cpp
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -270,6 +270,18 @@
   return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index));
 }
 
+FPDF_EXPORT int FPDF_CALLCONV
+FPDF_StructElement_GetChildMarkedContentID(FPDF_STRUCTELEMENT struct_element,
+                                           int index) {
+  CPDF_StructElement* elem =
+      CPDFStructElementFromFPDFStructElement(struct_element);
+  if (!elem || index < 0 || static_cast<size_t>(index) >= elem->CountKids()) {
+    return -1;
+  }
+
+  return elem->GetKidContentId(index);
+}
+
 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
 FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) {
   CPDF_StructElement* elem =
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
index 2704887..78a3b87 100644
--- a/fpdfsdk/fpdf_structtree_embeddertest.cpp
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -389,6 +389,53 @@
   UnloadPage(page);
 }
 
+TEST_F(FPDFStructTreeEmbedderTest, GetChildMarkedContentID) {
+  ASSERT_TRUE(OpenDocument("tagged_mcr_multipage.pdf"));
+
+  // Using the loop to make difference clear
+  for (int page_i : {0, 1}) {
+    FPDF_PAGE page = LoadPage(page_i);
+    ASSERT_TRUE(page);
+    ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
+    ASSERT_TRUE(struct_tree);
+    ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
+
+    FPDF_STRUCTELEMENT struct_doc =
+        FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
+    ASSERT_TRUE(struct_doc);
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(struct_doc));
+
+    ASSERT_EQ(2, FPDF_StructElement_CountChildren(struct_doc));
+    FPDF_STRUCTELEMENT child1 =
+        FPDF_StructElement_GetChildAtIndex(struct_doc, 0);
+    EXPECT_FALSE(child1);
+    FPDF_STRUCTELEMENT child2 =
+        FPDF_StructElement_GetChildAtIndex(struct_doc, 1);
+    EXPECT_FALSE(child2);
+
+    EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdCount(struct_doc));
+
+    // Both MCID are returned as if part of this page, while they are not.
+    // So `FPDF_StructElement_GetMarkedContentIdAtIndex(...)` does not work
+    // for StructElement spanning multiple pages.
+    EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(struct_doc, 0));
+    EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(struct_doc, 1));
+
+    // One MCR is pointing to page 1, another to page2, so those are different
+    // for different pages.
+    EXPECT_EQ(page_i == 0 ? 0 : -1,
+              FPDF_StructElement_GetChildMarkedContentID(struct_doc, 0));
+    EXPECT_EQ(page_i == 1 ? 0 : -1,
+              FPDF_StructElement_GetChildMarkedContentID(struct_doc, 1));
+    // Invalid index
+    EXPECT_EQ(-1, FPDF_StructElement_GetChildMarkedContentID(struct_doc, -1));
+    EXPECT_EQ(-1, FPDF_StructElement_GetChildMarkedContentID(struct_doc, 2));
+    // Invalid element
+    EXPECT_EQ(-1, FPDF_StructElement_GetChildMarkedContentID(nullptr, 0));
+    UnloadPage(page);
+  }
+}
+
 TEST_F(FPDFStructTreeEmbedderTest, GetType) {
   ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
   FPDF_PAGE page = LoadPage(0);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 90f6052..15464dd 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -375,6 +375,7 @@
     CHK(FPDF_StructElement_GetAttributeAtIndex);
     CHK(FPDF_StructElement_GetAttributeCount);
     CHK(FPDF_StructElement_GetChildAtIndex);
+    CHK(FPDF_StructElement_GetChildMarkedContentID);
     CHK(FPDF_StructElement_GetID);
     CHK(FPDF_StructElement_GetLang);
     CHK(FPDF_StructElement_GetMarkedContentID);
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
index a6c158a..69c5f67 100644
--- a/public/fpdf_structtree.h
+++ b/public/fpdf_structtree.h
@@ -255,6 +255,23 @@
                                    int index);
 
 // Experimental API.
+// Function: FPDF_StructElement_GetChildMarkedContentID
+//          Get the child's content id
+// Parameters:
+//          struct_element -   Handle to the struct element.
+//          index          -   The index for the child, 0-based.
+// Return value:
+//          The marked content ID of the child. If no ID exists, returns -1.
+// Comments:
+//          If the child exists but is not a stream or object, then this
+//          function will return -1. This will also return -1 for out of bounds
+//          indices. Compared to FPDF_StructElement_GetMarkedContentIdAtIndex,
+//          it is scoped to the current page.
+FPDF_EXPORT int FPDF_CALLCONV
+FPDF_StructElement_GetChildMarkedContentID(FPDF_STRUCTELEMENT struct_element,
+                                           int index);
+
+// Experimental API.
 // Function: FPDF_StructElement_GetParent
 //          Get the parent of the structure element.
 // Parameters:
diff --git a/testing/resources/tagged_mcr_multipage.in b/testing/resources/tagged_mcr_multipage.in
new file mode 100644
index 0000000..af05da4
--- /dev/null
+++ b/testing/resources/tagged_mcr_multipage.in
@@ -0,0 +1,119 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /MarkInfo <<
+    /Type /MarkInfo
+    /Marked true
+  >>
+  /Pages 2 0 R
+  /StructTreeRoot 8 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /CropBox [ 10.8197 8.459 605.705 801.639 ]
+  /MediaBox [ 0.0 0.0 616.721 809.902 ]
+  /Count 2
+  /Kids [
+    4 0 R
+    6 0 R
+  ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Page
+  /Tabs /S
+  /Parent 2 0 R
+  /StructParents 0
+  /Contents 5 0 R
+  /Resources <<
+    /ProcSet [/PDF /Text]
+    /Font <<
+      /F1 3 0 R
+    >>
+  >>
+>>
+endobj
+{{object 5 0}} <<
+  {{streamlen}}
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(1)Tj
+EMC
+ET
+endstream
+endobj
+{{object 6 0}} <<
+  /Type /Page
+  /Tabs /S
+  /Parent 2 0 R
+  /StructParents 1
+  /Contents 7 0 R
+  /Resources <<
+    /ProcSet [/PDF /Text]
+    /Font <<
+      /F1 3 0 R
+    >>
+  >>
+>>
+endobj
+{{object 7 0}} <<
+  {{streamlen}}
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(2)Tj
+EMC
+ET
+endstream
+endobj
+{{object 8 0}} <<
+  /Type /StructTreeRoot
+  /K 10 0 R
+  /ParentTree 9 0 R
+  /ParentTreeNextKey 2
+>>
+endobj
+{{object 9 0}} <<
+  /Nums [
+    0
+    [10 0 R]
+    1
+    [10 0 R]
+  ]
+>>
+endobj
+{{object 10 0}} <<
+  /T ()
+  /S /Document
+  /P 8 0 R
+  /Pg 4 0 R
+  /K [
+    0
+    <<
+      /MCID 0
+      /Pg 6 0 R
+      /Type /MCR
+    >>
+  ]
+>>
+%endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/tagged_mcr_multipage.pdf b/testing/resources/tagged_mcr_multipage.pdf
new file mode 100644
index 0000000..fcc5faf
--- /dev/null
+++ b/testing/resources/tagged_mcr_multipage.pdf
@@ -0,0 +1,136 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /MarkInfo <<
+    /Type /MarkInfo
+    /Marked true
+  >>
+  /Pages 2 0 R
+  /StructTreeRoot 8 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /CropBox [ 10.8197 8.459 605.705 801.639 ]
+  /MediaBox [ 0.0 0.0 616.721 809.902 ]
+  /Count 2
+  /Kids [
+    4 0 R
+    6 0 R
+  ]
+>>
+endobj
+3 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+4 0 obj <<
+  /Type /Page
+  /Tabs /S
+  /Parent 2 0 R
+  /StructParents 0
+  /Contents 5 0 R
+  /Resources <<
+    /ProcSet [/PDF /Text]
+    /Font <<
+      /F1 3 0 R
+    >>
+  >>
+>>
+endobj
+5 0 obj <<
+  /Length 83
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(1)Tj
+EMC
+ET
+endstream
+endobj
+6 0 obj <<
+  /Type /Page
+  /Tabs /S
+  /Parent 2 0 R
+  /StructParents 1
+  /Contents 7 0 R
+  /Resources <<
+    /ProcSet [/PDF /Text]
+    /Font <<
+      /F1 3 0 R
+    >>
+  >>
+>>
+endobj
+7 0 obj <<
+  /Length 83
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(2)Tj
+EMC
+ET
+endstream
+endobj
+8 0 obj <<
+  /Type /StructTreeRoot
+  /K 10 0 R
+  /ParentTree 9 0 R
+  /ParentTreeNextKey 2
+>>
+endobj
+9 0 obj <<
+  /Nums [
+    0
+    [10 0 R]
+    1
+    [10 0 R]
+  ]
+>>
+endobj
+10 0 obj <<
+  /T ()
+  /S /Document
+  /P 8 0 R
+  /Pg 4 0 R
+  /K [
+    0
+    <<
+      /MCID 0
+      /Pg 6 0 R
+      /Type /MCR
+    >>
+  ]
+>>
+%endobj
+xref
+0 11
+0000000000 65535 f 
+0000000015 00000 n 
+0000000149 00000 n 
+0000000315 00000 n 
+0000000393 00000 n 
+0000000575 00000 n 
+0000000709 00000 n 
+0000000891 00000 n 
+0000001025 00000 n 
+0000001125 00000 n 
+0000001198 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 11
+>>
+startxref
+1345
+%%EOF