| // Copyright 2016 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "core/fxcrt/fx_string.h" |
| #include "public/fpdf_structtree.h" |
| #include "testing/embedder_test.h" |
| #include "testing/fx_string_testhelpers.h" |
| #include "third_party/base/cxx17_backports.h" |
| #include "third_party/base/optional.h" |
| |
| class FPDFStructTreeEmbedderTest : public EmbedderTest {}; |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetAltText) { |
| ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT element = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), -1); |
| EXPECT_FALSE(element); |
| element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1); |
| EXPECT_FALSE(element); |
| element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(element); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(element)); |
| EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0)); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(element)); |
| FPDF_STRUCTELEMENT child_element = |
| FPDF_StructElement_GetChildAtIndex(element, -1); |
| EXPECT_FALSE(child_element); |
| child_element = FPDF_StructElement_GetChildAtIndex(element, 1); |
| EXPECT_FALSE(child_element); |
| child_element = FPDF_StructElement_GetChildAtIndex(element, 0); |
| ASSERT_TRUE(child_element); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(child_element)); |
| EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0)); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element)); |
| FPDF_STRUCTELEMENT gchild_element = |
| FPDF_StructElement_GetChildAtIndex(child_element, -1); |
| EXPECT_FALSE(gchild_element); |
| gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1); |
| EXPECT_FALSE(gchild_element); |
| gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0); |
| ASSERT_TRUE(gchild_element); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element)); |
| ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0)); |
| |
| unsigned short buffer[12]; |
| memset(buffer, 0, sizeof(buffer)); |
| // Deliberately pass in a small buffer size to make sure |buffer| remains |
| // untouched. |
| ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, 1)); |
| for (size_t i = 0; i < pdfium::size(buffer); ++i) |
| EXPECT_EQ(0U, buffer[i]); |
| |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element)); |
| ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, |
| sizeof(buffer))); |
| const wchar_t kExpected[] = L"Black Image"; |
| EXPECT_EQ(WideString(kExpected), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpected))); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element)); |
| FPDF_STRUCTELEMENT ggchild_element = |
| FPDF_StructElement_GetChildAtIndex(gchild_element, 0); |
| EXPECT_FALSE(ggchild_element); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetStringAttribute) { |
| ASSERT_TRUE(OpenDocument("tagged_table.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT document = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(document); |
| |
| constexpr int kBufLen = 100; |
| uint16_t buffer[kBufLen] = {0}; |
| EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); |
| EXPECT_EQ("Document", GetPlatformString(buffer)); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); |
| FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); |
| ASSERT_TRUE(table); |
| |
| EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); |
| EXPECT_EQ("Table", GetPlatformString(buffer)); |
| |
| // The table should have an attribute "Summary" set to the empty string. |
| EXPECT_EQ(2U, FPDF_StructElement_GetStringAttribute(table, "Summary", |
| buffer, kBufLen)); |
| |
| ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); |
| FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); |
| ASSERT_TRUE(row); |
| |
| ASSERT_EQ(2, FPDF_StructElement_CountChildren(row)); |
| FPDF_STRUCTELEMENT header_cell = FPDF_StructElement_GetChildAtIndex(row, 0); |
| ASSERT_TRUE(header_cell); |
| |
| EXPECT_EQ(6U, FPDF_StructElement_GetType(header_cell, buffer, kBufLen)); |
| EXPECT_EQ("TH", GetPlatformString(buffer)); |
| |
| // The header should have an attribute "Scope" with a scope of "Row". |
| EXPECT_EQ(8U, FPDF_StructElement_GetStringAttribute(header_cell, "Scope", |
| buffer, kBufLen)); |
| EXPECT_EQ("Row", GetPlatformString(buffer)); |
| |
| // The header has an attribute "ColSpan", but it's not a string so it |
| // returns null. |
| EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "ColSpan", |
| buffer, kBufLen)); |
| |
| // An unsupported attribute should return 0. |
| EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "Other", |
| buffer, kBufLen)); |
| |
| // A null struct element should not crash. |
| EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(nullptr, "Other", |
| buffer, kBufLen)); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetID) { |
| ASSERT_TRUE(OpenDocument("tagged_table.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT document = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(document); |
| |
| constexpr int kBufLen = 100; |
| uint16_t buffer[kBufLen] = {0}; |
| EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); |
| EXPECT_EQ("Document", GetPlatformString(buffer)); |
| |
| // The document has no ID. |
| EXPECT_EQ(0U, FPDF_StructElement_GetID(document, buffer, kBufLen)); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); |
| FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); |
| ASSERT_TRUE(table); |
| |
| EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); |
| EXPECT_EQ("Table", GetPlatformString(buffer)); |
| |
| // The table has an ID. |
| EXPECT_EQ(14U, FPDF_StructElement_GetID(table, buffer, kBufLen)); |
| EXPECT_EQ("node12", GetPlatformString(buffer)); |
| |
| // The first child of the table is a row, which has an empty ID. |
| // It returns 2U, the length of an empty string, instead of 0U, |
| // representing null. |
| ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); |
| FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); |
| ASSERT_TRUE(row); |
| EXPECT_EQ(2U, FPDF_StructElement_GetID(row, buffer, kBufLen)); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetLang) { |
| ASSERT_TRUE(OpenDocument("tagged_table.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT document = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(document); |
| |
| constexpr int kBufLen = 100; |
| uint16_t buffer[kBufLen] = {0}; |
| EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); |
| EXPECT_EQ("Document", GetPlatformString(buffer)); |
| |
| // The document has a language. |
| EXPECT_EQ(12U, FPDF_StructElement_GetLang(document, buffer, kBufLen)); |
| EXPECT_EQ("en-US", GetPlatformString(buffer)); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); |
| FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); |
| ASSERT_TRUE(table); |
| |
| // The first child is a table, with a language. |
| EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); |
| EXPECT_EQ("Table", GetPlatformString(buffer)); |
| |
| EXPECT_EQ(6U, FPDF_StructElement_GetLang(table, buffer, kBufLen)); |
| EXPECT_EQ("hu", GetPlatformString(buffer)); |
| |
| // The first child of the table is a row, which doesn't have a |
| // language explicitly set on it. |
| ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); |
| FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); |
| ASSERT_TRUE(row); |
| EXPECT_EQ(0U, FPDF_StructElement_GetLang(row, buffer, kBufLen)); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentID) { |
| ASSERT_TRUE(OpenDocument("marked_content_id.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT element = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(element)); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetType) { |
| ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT element = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(element); |
| |
| // test nullptr inputs |
| unsigned short buffer[12]; |
| ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, buffer, sizeof(buffer))); |
| ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, nullptr, 0)); |
| ASSERT_EQ(18U, FPDF_StructElement_GetType(element, nullptr, 0)); |
| |
| memset(buffer, 0, sizeof(buffer)); |
| // Deliberately pass in a small buffer size to make sure |buffer| remains |
| // untouched. |
| ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, 1)); |
| for (size_t i = 0; i < pdfium::size(buffer); ++i) |
| EXPECT_EQ(0U, buffer[i]); |
| |
| ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, sizeof(buffer))); |
| const wchar_t kExpected[] = L"Document"; |
| EXPECT_EQ(WideString(kExpected), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpected))); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetTitle) { |
| ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT element = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(element); |
| |
| // test nullptr inputs |
| unsigned short buffer[13]; |
| ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, buffer, sizeof(buffer))); |
| ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, nullptr, 0)); |
| ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, nullptr, 0)); |
| |
| memset(buffer, 0, sizeof(buffer)); |
| // Deliberately pass in a small buffer size to make sure |buffer| remains |
| // untouched. |
| ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, buffer, 1)); |
| for (size_t i = 0; i < pdfium::size(buffer); ++i) |
| EXPECT_EQ(0U, buffer[i]); |
| |
| ASSERT_EQ(20U, |
| FPDF_StructElement_GetTitle(element, buffer, sizeof(buffer))); |
| |
| const wchar_t kExpected[] = L"TitleText"; |
| EXPECT_EQ(WideString(kExpected), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpected))); |
| |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(element)); |
| FPDF_STRUCTELEMENT child_element = |
| FPDF_StructElement_GetChildAtIndex(element, 0); |
| ASSERT_TRUE(element); |
| |
| ASSERT_EQ(26U, FPDF_StructElement_GetTitle(child_element, buffer, |
| sizeof(buffer))); |
| const wchar_t kChildExpected[] = L"symbol: 100k"; |
| EXPECT_EQ(WideString(kChildExpected), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kChildExpected))); |
| } |
| |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, GetStructTreeForNestedTaggedPDF) { |
| ASSERT_TRUE(OpenDocument("tagged_nested.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| // This call should not crash. https://crbug.com/pdfium/1480 |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| } |
| UnloadPage(page); |
| } |
| |
| TEST_F(FPDFStructTreeEmbedderTest, MarkedContentReferenceAndObjectReference) { |
| ASSERT_TRUE(OpenDocument("tagged_mcr_objr.pdf")); |
| FPDF_PAGE page = LoadPage(0); |
| ASSERT_TRUE(page); |
| |
| { |
| ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); |
| ASSERT_TRUE(struct_tree); |
| ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); |
| |
| FPDF_STRUCTELEMENT object8 = |
| FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); |
| ASSERT_TRUE(object8); |
| unsigned short buffer[12]; |
| ASSERT_EQ(18U, FPDF_StructElement_GetType(object8, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject8Type[] = L"Document"; |
| EXPECT_EQ(WideString(kExpectedObject8Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject8Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object8)); |
| ASSERT_EQ(2, FPDF_StructElement_CountChildren(object8)); |
| |
| // First branch. 10 -> 12 -> 13 -> Inline dict. |
| FPDF_STRUCTELEMENT object10 = |
| FPDF_StructElement_GetChildAtIndex(object8, 0); |
| ASSERT_TRUE(object10); |
| ASSERT_EQ(20U, |
| FPDF_StructElement_GetType(object10, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject10Type[] = L"NonStruct"; |
| EXPECT_EQ( |
| WideString(kExpectedObject10Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject10Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object10)); |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(object10)); |
| |
| FPDF_STRUCTELEMENT object12 = |
| FPDF_StructElement_GetChildAtIndex(object10, 0); |
| ASSERT_TRUE(object12); |
| ASSERT_EQ(4U, FPDF_StructElement_GetType(object12, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject12Type[] = L"P"; |
| EXPECT_EQ( |
| WideString(kExpectedObject12Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject12Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object12)); |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(object12)); |
| |
| FPDF_STRUCTELEMENT object13 = |
| FPDF_StructElement_GetChildAtIndex(object12, 0); |
| ASSERT_TRUE(object13); |
| ASSERT_EQ(20U, |
| FPDF_StructElement_GetType(object13, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject13Type[] = L"NonStruct"; |
| EXPECT_EQ( |
| WideString(kExpectedObject13Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject13Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object13)); |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(object13)); |
| |
| // TODO(crbug.com/pdfium/672): Fetch this child element. |
| EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object13, 0)); |
| |
| // Second branch. 11 -> 14 -> Inline dict. |
| // -> 15 -> Inline dict. |
| FPDF_STRUCTELEMENT object11 = |
| FPDF_StructElement_GetChildAtIndex(object8, 1); |
| ASSERT_TRUE(object11); |
| ASSERT_EQ(4U, FPDF_StructElement_GetType(object11, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject11Type[] = L"P"; |
| EXPECT_EQ( |
| WideString(kExpectedObject11Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject11Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object11)); |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(object11)); |
| |
| FPDF_STRUCTELEMENT object14 = |
| FPDF_StructElement_GetChildAtIndex(object11, 0); |
| ASSERT_TRUE(object14); |
| ASSERT_EQ(20U, |
| FPDF_StructElement_GetType(object14, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject14Type[] = L"NonStruct"; |
| EXPECT_EQ( |
| WideString(kExpectedObject14Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject14Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object14)); |
| ASSERT_EQ(2, FPDF_StructElement_CountChildren(object14)); |
| |
| // TODO(crbug.com/pdfium/672): Object 15 should be at index 1. |
| EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object14, 1)); |
| FPDF_STRUCTELEMENT object15 = |
| FPDF_StructElement_GetChildAtIndex(object14, 0); |
| ASSERT_TRUE(object15); |
| ASSERT_EQ(20U, |
| FPDF_StructElement_GetType(object15, buffer, sizeof(buffer))); |
| const wchar_t kExpectedObject15Type[] = L"NonStruct"; |
| EXPECT_EQ( |
| WideString(kExpectedObject15Type), |
| WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedObject15Type))); |
| EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object15)); |
| ASSERT_EQ(1, FPDF_StructElement_CountChildren(object15)); |
| |
| // TODO(crbug.com/pdfium/672): Fetch this child element. |
| EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object15, 0)); |
| } |
| |
| UnloadPage(page); |
| } |