blob: 95ae8d581e814b8a4c3cd31f124eb3bcbdb053fd [file] [log] [blame]
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "../../include/fpdfapi/fpdf_parser.h"
#include "../../include/fpdfapi/fpdf_page.h"
#include "../../include/fpdfdoc/fpdf_tagged.h"
#include "tagged_int.h"
const int nMaxRecursion = 32;
static FX_BOOL IsTagged(const CPDF_Document* pDoc)
{
CPDF_Dictionary* pCatalog = pDoc->GetRoot();
CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
}
CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
{
if (!IsTagged(pDoc)) {
return NULL;
}
CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
if (pTree == NULL) {
return NULL;
}
pTree->LoadPageTree(pPageDict);
return pTree;
}
CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
{
if (!IsTagged(pDoc)) {
return NULL;
}
CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
if (pTree == NULL) {
return NULL;
}
pTree->LoadDocTree();
return pTree;
}
CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
{
CPDF_Dictionary* pCatalog = pDoc->GetRoot();
m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
if (m_pTreeRoot == NULL) {
return;
}
m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
}
CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
{
for (int i = 0; i < m_Kids.GetSize(); i ++)
if (m_Kids[i]) {
m_Kids[i]->Release();
}
}
void CPDF_StructTreeImpl::LoadDocTree()
{
m_pPage = NULL;
if (m_pTreeRoot == NULL) {
return;
}
CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
if (pKids == NULL) {
return;
}
if (pKids->GetType() == PDFOBJ_DICTIONARY) {
CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
if (pStructElementImpl == NULL) {
return;
}
m_Kids.Add(pStructElementImpl);
return;
}
if (pKids->GetType() != PDFOBJ_ARRAY) {
return;
}
CPDF_Array* pArray = (CPDF_Array*)pKids;
for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
CPDF_Dictionary* pKid = pArray->GetDict(i);
CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, pKid);
if (pStructElementImpl == NULL) {
return;
}
m_Kids.Add(pStructElementImpl);
}
}
void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
{
m_pPage = pPageDict;
if (m_pTreeRoot == NULL) {
return;
}
CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
if (pKids == NULL) {
return;
}
FX_DWORD dwKids = 0;
if (pKids->GetType() == PDFOBJ_DICTIONARY) {
dwKids = 1;
} else if (pKids->GetType() == PDFOBJ_ARRAY) {
dwKids = ((CPDF_Array*)pKids)->GetCount();
} else {
return;
}
FX_DWORD i;
m_Kids.SetSize(dwKids);
for (i = 0; i < dwKids; i ++) {
m_Kids[i] = NULL;
}
CFX_MapPtrToPtr element_map;
CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
if (pParentTree == NULL) {
return;
}
CPDF_NumberTree parent_tree(pParentTree);
int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
if (parents_id >= 0) {
CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
return;
}
CPDF_Array* pParentArray = (CPDF_Array*)pParents;
for (i = 0; i < pParentArray->GetCount(); i ++) {
CPDF_Dictionary* pParent = pParentArray->GetDict(i);
if (pParent == NULL) {
continue;
}
AddPageNode(pParent, element_map);
}
}
}
CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
{
if (nLevel > nMaxRecursion) {
return NULL;
}
CPDF_StructElementImpl* pElement = NULL;
if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
return pElement;
}
pElement = FX_NEW CPDF_StructElementImpl(this, NULL, pDict);
if (pElement == NULL) {
return NULL;
}
map.SetAt(pDict, pElement);
CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
if (!AddTopLevelNode(pDict, pElement)) {
pElement->Release();
map.RemoveKey(pDict);
}
} else {
CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
FX_BOOL bSave = FALSE;
for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
continue;
}
if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
continue;
}
pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
bSave = TRUE;
}
if (!bSave) {
pElement->Release();
map.RemoveKey(pDict);
}
}
return pElement;
}
FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
{
CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
if (!pObj) {
return FALSE;
}
if (pObj->GetType() == PDFOBJ_DICTIONARY) {
if (pObj->GetObjNum() == pDict->GetObjNum()) {
if (m_Kids[0]) {
m_Kids[0]->Release();
}
m_Kids[0] = pElement->Retain();
} else {
return FALSE;
}
}
if (pObj->GetType() == PDFOBJ_ARRAY) {
CPDF_Array* pTopKids = (CPDF_Array*)pObj;
FX_DWORD i;
FX_BOOL bSave = FALSE;
for (i = 0; i < pTopKids->GetCount(); i ++) {
CPDF_Reference* pKidRef = (CPDF_Reference*)pTopKids->GetElement(i);
if (pKidRef->GetType() != PDFOBJ_REFERENCE || pKidRef->GetRefObjNum() != pDict->GetObjNum()) {
continue;
}
if (m_Kids[i]) {
m_Kids[i]->Release();
}
m_Kids[i] = pElement->Retain();
bSave = TRUE;
}
if (!bSave) {
return FALSE;
}
}
return TRUE;
}
CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
: m_RefCount(0)
{
m_pTree = pTree;
m_pDict = pDict;
m_Type = pDict->GetString(FX_BSTRC("S"));
CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
if (!mapped.IsEmpty()) {
m_Type = mapped;
}
m_pParent = pParent;
LoadKids(pDict);
}
CPDF_StructElementImpl::~CPDF_StructElementImpl()
{
for (int i = 0; i < m_Kids.GetSize(); i ++) {
if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
}
}
}
CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
{
m_RefCount++;
return this;
}
void CPDF_StructElementImpl::Release()
{
if(--m_RefCount < 1) {
delete this;
}
}
void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
{
CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
FX_DWORD PageObjNum = 0;
if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
}
CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
if (pKids == NULL) {
return;
}
if (pKids->GetType() == PDFOBJ_ARRAY) {
CPDF_Array* pArray = (CPDF_Array*)pKids;
m_Kids.SetSize(pArray->GetCount());
for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
CPDF_Object* pKid = pArray->GetElementValue(i);
LoadKid(PageObjNum, pKid, &m_Kids[i]);
}
} else {
m_Kids.SetSize(1);
LoadKid(PageObjNum, pKids, &m_Kids[0]);
}
}
void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
{
pKid->m_Type = CPDF_StructKid::Invalid;
if (pKidObj == NULL) {
return;
}
if (pKidObj->GetType() == PDFOBJ_NUMBER) {
if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
return;
}
pKid->m_Type = CPDF_StructKid::PageContent;
pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
pKid->m_PageContent.m_PageObjNum = PageObjNum;
return;
}
if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
return;
}
CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
}
CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
if (type == FX_BSTRC("MCR")) {
if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
return;
}
pKid->m_Type = CPDF_StructKid::StreamContent;
CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
} else {
pKid->m_StreamContent.m_RefObjNum = 0;
}
pKid->m_StreamContent.m_PageObjNum = PageObjNum;
pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
} else if (type == FX_BSTRC("OBJR")) {
if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
return;
}
pKid->m_Type = CPDF_StructKid::Object;
CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
} else {
pKid->m_Object.m_RefObjNum = 0;
}
pKid->m_Object.m_PageObjNum = PageObjNum;
} else {
pKid->m_Type = CPDF_StructKid::Element;
pKid->m_Element.m_pDict = pKidDict;
if (m_pTree->m_pPage == NULL) {
pKid->m_Element.m_pElement = FX_NEW CPDF_StructElementImpl(m_pTree, this, pKidDict);
} else {
pKid->m_Element.m_pElement = NULL;
}
}
}
static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
{
if (nLevel > nMaxRecursion) {
return NULL;
}
if (pAttrs == NULL) {
return NULL;
}
CPDF_Dictionary* pDict = NULL;
if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
pDict = (CPDF_Dictionary*)pAttrs;
} else if (pAttrs->GetType() == PDFOBJ_STREAM) {
pDict = ((CPDF_Stream*)pAttrs)->GetDict();
} else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
CPDF_Array* pArray = (CPDF_Array*)pAttrs;
for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
CPDF_Object* pElement = pArray->GetElementValue(i);
pDict = FindAttrDict(pElement, owner, nLevel + 1);
if (pDict) {
return pDict;
}
}
}
if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
return pDict;
}
return NULL;
}
CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
{
if (fLevel > nMaxRecursion) {
return NULL;
}
if (bInheritable) {
CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
if (pAttr) {
return pAttr;
}
if (m_pParent == NULL) {
return NULL;
}
return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
}
CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
if (pA) {
CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
if (pAttrDict) {
CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
if (pAttr) {
return pAttr;
}
}
}
CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
if (pC == NULL) {
return NULL;
}
CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
if (pClassMap == NULL) {
return NULL;
}
if (pC->GetType() == PDFOBJ_ARRAY) {
CPDF_Array* pArray = (CPDF_Array*)pC;
for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
CFX_ByteString class_name = pArray->GetString(i);
CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
return pClassDict->GetElementValue(name);
}
}
return NULL;
}
CFX_ByteString class_name = pC->GetString();
CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
return pClassDict->GetElementValue(name);
}
return NULL;
}
CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
{
CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
return pAttr;
}
CPDF_Array* pArray = (CPDF_Array*)pAttr;
if (subindex >= (int)pArray->GetCount()) {
return pAttr;
}
return pArray->GetElementValue(subindex);
}
CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
{
CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
return default_value;
}
return pAttr->GetString();
}
FX_ARGB CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
{
CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
return default_value;
}
CPDF_Array* pArray = (CPDF_Array*)pAttr;
return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
}
FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
{
CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
return default_value;
}
return pAttr->GetNumber();
}
int CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
{
CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
return default_value;
}
return pAttr->GetInteger();
}