Implement FPDF_StructElement_GetChildMarkedContentID
Similarly to FPDF_StructElement_GetChildAtIndex, it retrieves child's
content id if the child is a page/stream.
Change-Id: I8f4f65e02e01e47b5af0b52968279d910402a376
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/112790
Commit-Queue: Lei Zhang <thestig@chromium.org>
Auto-Submit: Dmitry Panteleev <dpantele@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index 95e887f..18ca39e 100644
--- a/core/fpdfdoc/cpdf_structelement.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -87,6 +87,13 @@
: nullptr;
}
+int CPDF_StructElement::GetKidContentId(size_t index) const {
+ return m_Kids[index].m_Type == Kid::kStreamContent ||
+ m_Kids[index].m_Type == Kid::kPageContent
+ ? m_Kids[index].m_ContentId
+ : -1;
+}
+
bool CPDF_StructElement::UpdateKidIfElement(const CPDF_Dictionary* pDict,
CPDF_StructElement* pElement) {
bool bSave = false;
diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h
index 72a0df3..95c3f9e 100644
--- a/core/fpdfdoc/cpdf_structelement.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -34,6 +34,7 @@
size_t CountKids() const;
CPDF_StructElement* GetKidIfElement(size_t index) const;
+ int GetKidContentId(size_t index) const;
bool UpdateKidIfElement(const CPDF_Dictionary* pDict,
CPDF_StructElement* pElement);
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
index 4817f47..8e0c9b9 100644
--- a/fpdfsdk/fpdf_structtree.cpp
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -270,6 +270,18 @@
return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index));
}
+FPDF_EXPORT int FPDF_CALLCONV
+FPDF_StructElement_GetChildMarkedContentID(FPDF_STRUCTELEMENT struct_element,
+ int index) {
+ CPDF_StructElement* elem =
+ CPDFStructElementFromFPDFStructElement(struct_element);
+ if (!elem || index < 0 || static_cast<size_t>(index) >= elem->CountKids()) {
+ return -1;
+ }
+
+ return elem->GetKidContentId(index);
+}
+
FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) {
CPDF_StructElement* elem =
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
index 2704887..78a3b87 100644
--- a/fpdfsdk/fpdf_structtree_embeddertest.cpp
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -389,6 +389,53 @@
UnloadPage(page);
}
+TEST_F(FPDFStructTreeEmbedderTest, GetChildMarkedContentID) {
+ ASSERT_TRUE(OpenDocument("tagged_mcr_multipage.pdf"));
+
+ // Using the loop to make difference clear
+ for (int page_i : {0, 1}) {
+ FPDF_PAGE page = LoadPage(page_i);
+ ASSERT_TRUE(page);
+ ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
+ ASSERT_TRUE(struct_tree);
+ ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
+
+ FPDF_STRUCTELEMENT struct_doc =
+ FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
+ ASSERT_TRUE(struct_doc);
+ EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(struct_doc));
+
+ ASSERT_EQ(2, FPDF_StructElement_CountChildren(struct_doc));
+ FPDF_STRUCTELEMENT child1 =
+ FPDF_StructElement_GetChildAtIndex(struct_doc, 0);
+ EXPECT_FALSE(child1);
+ FPDF_STRUCTELEMENT child2 =
+ FPDF_StructElement_GetChildAtIndex(struct_doc, 1);
+ EXPECT_FALSE(child2);
+
+ EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdCount(struct_doc));
+
+ // Both MCID are returned as if part of this page, while they are not.
+ // So `FPDF_StructElement_GetMarkedContentIdAtIndex(...)` does not work
+ // for StructElement spanning multiple pages.
+ EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(struct_doc, 0));
+ EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(struct_doc, 1));
+
+ // One MCR is pointing to page 1, another to page2, so those are different
+ // for different pages.
+ EXPECT_EQ(page_i == 0 ? 0 : -1,
+ FPDF_StructElement_GetChildMarkedContentID(struct_doc, 0));
+ EXPECT_EQ(page_i == 1 ? 0 : -1,
+ FPDF_StructElement_GetChildMarkedContentID(struct_doc, 1));
+ // Invalid index
+ EXPECT_EQ(-1, FPDF_StructElement_GetChildMarkedContentID(struct_doc, -1));
+ EXPECT_EQ(-1, FPDF_StructElement_GetChildMarkedContentID(struct_doc, 2));
+ // Invalid element
+ EXPECT_EQ(-1, FPDF_StructElement_GetChildMarkedContentID(nullptr, 0));
+ UnloadPage(page);
+ }
+}
+
TEST_F(FPDFStructTreeEmbedderTest, GetType) {
ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
FPDF_PAGE page = LoadPage(0);
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 90f6052..15464dd 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -375,6 +375,7 @@
CHK(FPDF_StructElement_GetAttributeAtIndex);
CHK(FPDF_StructElement_GetAttributeCount);
CHK(FPDF_StructElement_GetChildAtIndex);
+ CHK(FPDF_StructElement_GetChildMarkedContentID);
CHK(FPDF_StructElement_GetID);
CHK(FPDF_StructElement_GetLang);
CHK(FPDF_StructElement_GetMarkedContentID);
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
index a6c158a..69c5f67 100644
--- a/public/fpdf_structtree.h
+++ b/public/fpdf_structtree.h
@@ -255,6 +255,23 @@
int index);
// Experimental API.
+// Function: FPDF_StructElement_GetChildMarkedContentID
+// Get the child's content id
+// Parameters:
+// struct_element - Handle to the struct element.
+// index - The index for the child, 0-based.
+// Return value:
+// The marked content ID of the child. If no ID exists, returns -1.
+// Comments:
+// If the child exists but is not a stream or object, then this
+// function will return -1. This will also return -1 for out of bounds
+// indices. Compared to FPDF_StructElement_GetMarkedContentIdAtIndex,
+// it is scoped to the current page.
+FPDF_EXPORT int FPDF_CALLCONV
+FPDF_StructElement_GetChildMarkedContentID(FPDF_STRUCTELEMENT struct_element,
+ int index);
+
+// Experimental API.
// Function: FPDF_StructElement_GetParent
// Get the parent of the structure element.
// Parameters:
diff --git a/testing/resources/tagged_mcr_multipage.in b/testing/resources/tagged_mcr_multipage.in
new file mode 100644
index 0000000..af05da4
--- /dev/null
+++ b/testing/resources/tagged_mcr_multipage.in
@@ -0,0 +1,119 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /MarkInfo <<
+ /Type /MarkInfo
+ /Marked true
+ >>
+ /Pages 2 0 R
+ /StructTreeRoot 8 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /CropBox [ 10.8197 8.459 605.705 801.639 ]
+ /MediaBox [ 0.0 0.0 616.721 809.902 ]
+ /Count 2
+ /Kids [
+ 4 0 R
+ 6 0 R
+ ]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+{{object 4 0}} <<
+ /Type /Page
+ /Tabs /S
+ /Parent 2 0 R
+ /StructParents 0
+ /Contents 5 0 R
+ /Resources <<
+ /ProcSet [/PDF /Text]
+ /Font <<
+ /F1 3 0 R
+ >>
+ >>
+>>
+endobj
+{{object 5 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(1)Tj
+EMC
+ET
+endstream
+endobj
+{{object 6 0}} <<
+ /Type /Page
+ /Tabs /S
+ /Parent 2 0 R
+ /StructParents 1
+ /Contents 7 0 R
+ /Resources <<
+ /ProcSet [/PDF /Text]
+ /Font <<
+ /F1 3 0 R
+ >>
+ >>
+>>
+endobj
+{{object 7 0}} <<
+ {{streamlen}}
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(2)Tj
+EMC
+ET
+endstream
+endobj
+{{object 8 0}} <<
+ /Type /StructTreeRoot
+ /K 10 0 R
+ /ParentTree 9 0 R
+ /ParentTreeNextKey 2
+>>
+endobj
+{{object 9 0}} <<
+ /Nums [
+ 0
+ [10 0 R]
+ 1
+ [10 0 R]
+ ]
+>>
+endobj
+{{object 10 0}} <<
+ /T ()
+ /S /Document
+ /P 8 0 R
+ /Pg 4 0 R
+ /K [
+ 0
+ <<
+ /MCID 0
+ /Pg 6 0 R
+ /Type /MCR
+ >>
+ ]
+>>
+%endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/tagged_mcr_multipage.pdf b/testing/resources/tagged_mcr_multipage.pdf
new file mode 100644
index 0000000..fcc5faf
--- /dev/null
+++ b/testing/resources/tagged_mcr_multipage.pdf
@@ -0,0 +1,136 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /MarkInfo <<
+ /Type /MarkInfo
+ /Marked true
+ >>
+ /Pages 2 0 R
+ /StructTreeRoot 8 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /CropBox [ 10.8197 8.459 605.705 801.639 ]
+ /MediaBox [ 0.0 0.0 616.721 809.902 ]
+ /Count 2
+ /Kids [
+ 4 0 R
+ 6 0 R
+ ]
+>>
+endobj
+3 0 obj <<
+ /Type /Font
+ /Subtype /Type1
+ /BaseFont /Times-Roman
+>>
+endobj
+4 0 obj <<
+ /Type /Page
+ /Tabs /S
+ /Parent 2 0 R
+ /StructParents 0
+ /Contents 5 0 R
+ /Resources <<
+ /ProcSet [/PDF /Text]
+ /Font <<
+ /F1 3 0 R
+ >>
+ >>
+>>
+endobj
+5 0 obj <<
+ /Length 83
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(1)Tj
+EMC
+ET
+endstream
+endobj
+6 0 obj <<
+ /Type /Page
+ /Tabs /S
+ /Parent 2 0 R
+ /StructParents 1
+ /Contents 7 0 R
+ /Resources <<
+ /ProcSet [/PDF /Text]
+ /Font <<
+ /F1 3 0 R
+ >>
+ >>
+>>
+endobj
+7 0 obj <<
+ /Length 83
+>>
+stream
+BT
+/Document <</MCID 0 >>BDC
+0 i
+/F1 1 Tf
+12 0 0 12 43.073 771.625 Tm
+(2)Tj
+EMC
+ET
+endstream
+endobj
+8 0 obj <<
+ /Type /StructTreeRoot
+ /K 10 0 R
+ /ParentTree 9 0 R
+ /ParentTreeNextKey 2
+>>
+endobj
+9 0 obj <<
+ /Nums [
+ 0
+ [10 0 R]
+ 1
+ [10 0 R]
+ ]
+>>
+endobj
+10 0 obj <<
+ /T ()
+ /S /Document
+ /P 8 0 R
+ /Pg 4 0 R
+ /K [
+ 0
+ <<
+ /MCID 0
+ /Pg 6 0 R
+ /Type /MCR
+ >>
+ ]
+>>
+%endobj
+xref
+0 11
+0000000000 65535 f
+0000000015 00000 n
+0000000149 00000 n
+0000000315 00000 n
+0000000393 00000 n
+0000000575 00000 n
+0000000709 00000 n
+0000000891 00000 n
+0000001025 00000 n
+0000001125 00000 n
+0000001198 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 11
+>>
+startxref
+1345
+%%EOF