Add a FPDFStructTreeEmbedderTest for MCR and OBJR element types.

Provide test coverage and illustrate issues with the existing StructTree
code, where they cannot retrieve inline objects.

The test PDF is derived from a HTML file with the following content:

<body>
<a name="top">
<p>hello world</p>
<p><a href="#top">link to top</a>
</body>

The HTML is converted to a tagged PDF using Chromium's headless mode
with the following flags, with the assumption that Chromium's tagged PDF
generator code works properly:

--print-to-pdf --export-tagged-pdf --print-to-pdf-no-header

The output PDF from Chromium is then minimized to produce the
tagged_mcr_objr.in and tagged_mcr_objr.pdf.

Bug: pdfium:672,pdfium:1297
Change-Id: I0ba6e9a9a1bba57acf1ed052d4bec631e67d7677
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/68990
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
index 21cbffd..f527877 100644
--- a/fpdfsdk/fpdf_structtree_embeddertest.cpp
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -21,33 +21,33 @@
 
     FPDF_STRUCTELEMENT element =
         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), -1);
-    EXPECT_EQ(nullptr, element);
+    EXPECT_FALSE(element);
     element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1);
-    EXPECT_EQ(nullptr, element);
+    EXPECT_FALSE(element);
     element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
-    ASSERT_NE(nullptr, element);
+    ASSERT_TRUE(element);
     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(element));
     EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0));
 
     ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
     FPDF_STRUCTELEMENT child_element =
         FPDF_StructElement_GetChildAtIndex(element, -1);
-    EXPECT_EQ(nullptr, child_element);
+    EXPECT_FALSE(child_element);
     child_element = FPDF_StructElement_GetChildAtIndex(element, 1);
-    EXPECT_EQ(nullptr, child_element);
+    EXPECT_FALSE(child_element);
     child_element = FPDF_StructElement_GetChildAtIndex(element, 0);
-    ASSERT_NE(nullptr, child_element);
+    ASSERT_TRUE(child_element);
     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(child_element));
     EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0));
 
     ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
     FPDF_STRUCTELEMENT gchild_element =
         FPDF_StructElement_GetChildAtIndex(child_element, -1);
-    EXPECT_EQ(nullptr, gchild_element);
+    EXPECT_FALSE(gchild_element);
     gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1);
-    EXPECT_EQ(nullptr, gchild_element);
+    EXPECT_FALSE(gchild_element);
     gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0);
-    ASSERT_NE(nullptr, gchild_element);
+    ASSERT_TRUE(gchild_element);
     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element));
     ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0));
 
@@ -69,7 +69,7 @@
     ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element));
     FPDF_STRUCTELEMENT ggchild_element =
         FPDF_StructElement_GetChildAtIndex(gchild_element, 0);
-    EXPECT_EQ(nullptr, ggchild_element);
+    EXPECT_FALSE(ggchild_element);
   }
 
   UnloadPage(page);
@@ -105,7 +105,7 @@
 
     FPDF_STRUCTELEMENT element =
         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
-    ASSERT_NE(nullptr, element);
+    ASSERT_TRUE(element);
 
     // test nullptr inputs
     unsigned short buffer[12];
@@ -141,7 +141,7 @@
 
     FPDF_STRUCTELEMENT element =
         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
-    ASSERT_NE(nullptr, element);
+    ASSERT_TRUE(element);
 
     // test nullptr inputs
     unsigned short buffer[13];
@@ -166,7 +166,7 @@
     ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
     FPDF_STRUCTELEMENT child_element =
         FPDF_StructElement_GetChildAtIndex(element, 0);
-    ASSERT_NE(nullptr, element);
+    ASSERT_TRUE(element);
 
     ASSERT_EQ(26U, FPDF_StructElement_GetTitle(child_element, buffer,
                                                sizeof(buffer)));
@@ -190,3 +190,109 @@
   }
   UnloadPage(page);
 }
+
+TEST_F(FPDFStructTreeEmbedderTest, MarkedContentReferenceAndObjectReference) {
+  ASSERT_TRUE(OpenDocument("tagged_mcr_objr.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  {
+    ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
+    ASSERT_TRUE(struct_tree);
+    ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
+
+    FPDF_STRUCTELEMENT object8 =
+        FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
+    ASSERT_TRUE(object8);
+    unsigned short buffer[12];
+    ASSERT_EQ(18U, FPDF_StructElement_GetType(object8, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject8Type[] = L"Document";
+    EXPECT_EQ(WideString(kExpectedObject8Type),
+              WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject8Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object8));
+    ASSERT_EQ(2, FPDF_StructElement_CountChildren(object8));
+
+    // First branch. 10 -> 12 -> 13 -> Inline dict.
+    FPDF_STRUCTELEMENT object10 =
+        FPDF_StructElement_GetChildAtIndex(object8, 0);
+    ASSERT_TRUE(object10);
+    ASSERT_EQ(20U,
+              FPDF_StructElement_GetType(object10, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject10Type[] = L"NonStruct";
+    EXPECT_EQ(
+        WideString(kExpectedObject10Type),
+        WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject10Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object10));
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(object10));
+
+    FPDF_STRUCTELEMENT object12 =
+        FPDF_StructElement_GetChildAtIndex(object10, 0);
+    ASSERT_TRUE(object12);
+    ASSERT_EQ(4U, FPDF_StructElement_GetType(object12, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject12Type[] = L"P";
+    EXPECT_EQ(
+        WideString(kExpectedObject12Type),
+        WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject12Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object12));
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(object12));
+
+    FPDF_STRUCTELEMENT object13 =
+        FPDF_StructElement_GetChildAtIndex(object12, 0);
+    ASSERT_TRUE(object13);
+    ASSERT_EQ(20U,
+              FPDF_StructElement_GetType(object13, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject13Type[] = L"NonStruct";
+    EXPECT_EQ(
+        WideString(kExpectedObject13Type),
+        WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject13Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object13));
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(object13));
+
+    // TODO(crbug.com/pdfium/672): Fetch this child element.
+    EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object13, 0));
+
+    // Second branch. 11 -> 14 -> Inline dict.
+    //                         -> 15 -> Inline dict.
+    FPDF_STRUCTELEMENT object11 =
+        FPDF_StructElement_GetChildAtIndex(object8, 1);
+    ASSERT_TRUE(object11);
+    ASSERT_EQ(4U, FPDF_StructElement_GetType(object11, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject11Type[] = L"P";
+    EXPECT_EQ(
+        WideString(kExpectedObject11Type),
+        WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject11Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object11));
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(object11));
+
+    FPDF_STRUCTELEMENT object14 =
+        FPDF_StructElement_GetChildAtIndex(object11, 0);
+    ASSERT_TRUE(object14);
+    ASSERT_EQ(20U,
+              FPDF_StructElement_GetType(object14, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject14Type[] = L"NonStruct";
+    EXPECT_EQ(
+        WideString(kExpectedObject14Type),
+        WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject14Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object14));
+    ASSERT_EQ(2, FPDF_StructElement_CountChildren(object14));
+
+    // TODO(crbug.com/pdfium/672): Object 15 should be at index 1.
+    EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object14, 1));
+    FPDF_STRUCTELEMENT object15 =
+        FPDF_StructElement_GetChildAtIndex(object14, 0);
+    ASSERT_TRUE(object15);
+    ASSERT_EQ(20U,
+              FPDF_StructElement_GetType(object15, buffer, sizeof(buffer)));
+    const wchar_t kExpectedObject15Type[] = L"NonStruct";
+    EXPECT_EQ(
+        WideString(kExpectedObject15Type),
+        WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject15Type)));
+    EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object15));
+    ASSERT_EQ(1, FPDF_StructElement_CountChildren(object15));
+
+    // TODO(crbug.com/pdfium/672): Fetch this child element.
+    EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object15, 0));
+  }
+
+  UnloadPage(page);
+}
diff --git a/testing/resources/tagged_mcr_objr.in b/testing/resources/tagged_mcr_objr.in
new file mode 100644
index 0000000..3394633
--- /dev/null
+++ b/testing/resources/tagged_mcr_objr.in
@@ -0,0 +1,160 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+  /StructTreeRoot 7 0 R
+  /MarkInfo <<
+    /Type /MarkInfo
+    /Marked true
+  >>
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /StructParents 0
+  /Annots [4 0 R]
+  /Contents 5 0 R
+  /MediaBox [0 0 612 792]
+  /Resources <<
+    /ProcSet [/PDF /Text]
+    /Font <<
+      /F4 6 0 R
+    >>
+  >>
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Annot
+  /Subtype /Link
+  /Border [0 0 0]
+  /Dest /top
+  /F 4
+  /Rect [20 46 68 61]
+>>
+endobj
+{{object 5 0}} <<
+  {{streamlen}}
+>>
+stream
+q
+BT
+/P <</MCID 0 >>BDC
+/F4 16 Tf
+20 650 Td
+(Hello, world!) Tj
+EMC
+ET
+BT
+/P <</MCID 1 >>BDC
+/F4 16 Tf
+20 50 Td
+(Link to top) Tj
+EMC
+ET
+Q
+endstream
+endobj
+{{object 6 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 7 0}} <<
+  /Type /StructTreeRoot
+  /K 8 0 R
+  /ParentTree 9 0 R
+  /ParentTreeNextKey 1
+>>
+endobj
+{{object 8 0}} <<
+  /Type /StructElem
+  /S /Document
+  /P 7 0 R
+  /K [10 0 R 11 0 R]
+  /ID /2
+  /Lang (en-US)
+>>
+endobj
+{{object 9 0}} <<
+  /Type /ParentTree
+  /Nums [0 [13 0 R 15 0 R]]
+>>
+endobj
+{{object 10 0}} <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 8 0 R
+  /K [12 0 R]
+  /ID /6
+>>
+endobj
+{{object 11 0}} <<
+  /Type /StructElem
+  /S /P
+  /P 8 0 R
+  /K [14 0 R]
+  /ID /4
+>>
+endobj
+{{object 12 0}} <<
+  /Type /StructElem
+  /S /P
+  /P 10 0 R
+  /K [13 0 R]
+  /ID /3
+>>
+endobj
+{{object 13 0}} <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 12 0 R
+  /K [
+    <<
+      /Type /MCR
+      /MCID 0
+      /Pg 3 0 R
+    >>
+  ]
+  /ID /7
+>>
+endobj
+{{object 14 0}} <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 11 0 R
+  /K [
+    15 0 R
+    <<
+      /Type /OBJR
+      /Obj 4 0 R
+    >>
+  ]
+  /ID /9
+>>
+endobj
+{{object 15 0}} <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 14 0 R
+  /K [
+    <<
+      /Type /MCR
+      /Pg 3 0 R
+      /MCID 1
+    >>
+  ]
+  /ID /10
+>>
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/tagged_mcr_objr.pdf b/testing/resources/tagged_mcr_objr.pdf
new file mode 100644
index 0000000..a86faa7
--- /dev/null
+++ b/testing/resources/tagged_mcr_objr.pdf
@@ -0,0 +1,182 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+  /StructTreeRoot 7 0 R
+  /MarkInfo <<
+    /Type /MarkInfo
+    /Marked true
+  >>
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /StructParents 0
+  /Annots [4 0 R]
+  /Contents 5 0 R
+  /MediaBox [0 0 612 792]
+  /Resources <<
+    /ProcSet [/PDF /Text]
+    /Font <<
+      /F4 6 0 R
+    >>
+  >>
+>>
+endobj
+4 0 obj <<
+  /Type /Annot
+  /Subtype /Link
+  /Border [0 0 0]
+  /Dest /top
+  /F 4
+  /Rect [20 46 68 61]
+>>
+endobj
+5 0 obj <<
+  /Length 137
+>>
+stream
+q
+BT
+/P <</MCID 0 >>BDC
+/F4 16 Tf
+20 650 Td
+(Hello, world!) Tj
+EMC
+ET
+BT
+/P <</MCID 1 >>BDC
+/F4 16 Tf
+20 50 Td
+(Link to top) Tj
+EMC
+ET
+Q
+endstream
+endobj
+6 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+7 0 obj <<
+  /Type /StructTreeRoot
+  /K 8 0 R
+  /ParentTree 9 0 R
+  /ParentTreeNextKey 1
+>>
+endobj
+8 0 obj <<
+  /Type /StructElem
+  /S /Document
+  /P 7 0 R
+  /K [10 0 R 11 0 R]
+  /ID /2
+  /Lang (en-US)
+>>
+endobj
+9 0 obj <<
+  /Type /ParentTree
+  /Nums [0 [13 0 R 15 0 R]]
+>>
+endobj
+10 0 obj <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 8 0 R
+  /K [12 0 R]
+  /ID /6
+>>
+endobj
+11 0 obj <<
+  /Type /StructElem
+  /S /P
+  /P 8 0 R
+  /K [14 0 R]
+  /ID /4
+>>
+endobj
+12 0 obj <<
+  /Type /StructElem
+  /S /P
+  /P 10 0 R
+  /K [13 0 R]
+  /ID /3
+>>
+endobj
+13 0 obj <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 12 0 R
+  /K [
+    <<
+      /Type /MCR
+      /MCID 0
+      /Pg 3 0 R
+    >>
+  ]
+  /ID /7
+>>
+endobj
+14 0 obj <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 11 0 R
+  /K [
+    15 0 R
+    <<
+      /Type /OBJR
+      /Obj 4 0 R
+    >>
+  ]
+  /ID /9
+>>
+endobj
+15 0 obj <<
+  /Type /StructElem
+  /S /NonStruct
+  /P 14 0 R
+  /K [
+    <<
+      /Type /MCR
+      /Pg 3 0 R
+      /MCID 1
+    >>
+  ]
+  /ID /10
+>>
+endobj
+xref
+0 16
+0000000000 65535 f 
+0000000015 00000 n 
+0000000149 00000 n 
+0000000212 00000 n 
+0000000427 00000 n 
+0000000540 00000 n 
+0000000729 00000 n 
+0000000807 00000 n 
+0000000906 00000 n 
+0000001019 00000 n 
+0000001088 00000 n 
+0000001180 00000 n 
+0000001264 00000 n 
+0000001349 00000 n 
+0000001500 00000 n 
+0000001650 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 16
+>>
+startxref
+1802
+%%EOF