blob: 07e172b6a9445d73892bd1cba9ae4c2a7d1b5669 [file] [log] [blame]
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfdoc/cpdf_nametree.h"
#include <utility>
#include <vector>
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_decode.h"
#include "third_party/base/ptr_util.h"
namespace {
constexpr int kNameTreeMaxRecursion = 32;
std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) {
ASSERT(pLimits);
WideString csLeft = pLimits->GetUnicodeTextAt(0);
WideString csRight = pLimits->GetUnicodeTextAt(1);
// If the lower limit is greater than the upper limit, swap them.
if (csLeft.Compare(csRight) > 0) {
pLimits->SetNewAt<CPDF_String>(0, csRight);
pLimits->SetNewAt<CPDF_String>(1, csLeft);
csLeft = pLimits->GetUnicodeTextAt(0);
csRight = pLimits->GetUnicodeTextAt(1);
}
return {csLeft, csRight};
}
// Get the limit arrays that leaf array |pFind| is under in the tree with root
// |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
// the root. Return true if successful.
bool GetNodeAncestorsLimits(CPDF_Dictionary* pNode,
const CPDF_Array* pFind,
int nLevel,
std::vector<CPDF_Array*>* pLimits) {
if (nLevel > kNameTreeMaxRecursion)
return false;
if (pNode->GetArrayFor("Names") == pFind) {
pLimits->push_back(pNode->GetArrayFor("Limits"));
return true;
}
CPDF_Array* pKids = pNode->GetArrayFor("Kids");
if (!pKids)
return false;
for (size_t i = 0; i < pKids->size(); ++i) {
CPDF_Dictionary* pKid = pKids->GetDictAt(i);
if (!pKid)
continue;
if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) {
pLimits->push_back(pNode->GetArrayFor("Limits"));
return true;
}
}
return false;
}
// Upon the deletion of |csName| from leaf array |pFind|, update the ancestors
// of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated
// if needed, and any ancestors that are now empty will be removed.
bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode,
const CPDF_Array* pFind,
const WideString& csName,
int nLevel) {
if (nLevel > kNameTreeMaxRecursion)
return false;
CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
WideString csLeft;
WideString csRight;
if (pLimits)
std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
CPDF_Array* pNames = pNode->GetArrayFor("Names");
if (pNames) {
if (pNames != pFind)
return false;
if (pNames->IsEmpty() || !pLimits)
return true;
if (csLeft != csName && csRight != csName)
return true;
// Since |csName| defines |pNode|'s limits, we need to loop through the
// names to find the new lower and upper limits.
WideString csNewLeft = csRight;
WideString csNewRight = csLeft;
for (size_t i = 0; i < pNames->size() / 2; ++i) {
WideString wsName = pNames->GetUnicodeTextAt(i * 2);
if (wsName.Compare(csNewLeft) < 0)
csNewLeft = wsName;
if (wsName.Compare(csNewRight) > 0)
csNewRight = wsName;
}
pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
pLimits->SetNewAt<CPDF_String>(1, csNewRight);
return true;
}
CPDF_Array* pKids = pNode->GetArrayFor("Kids");
if (!pKids)
return false;
// Loop through the kids to find the leaf array |pFind|.
for (size_t i = 0; i < pKids->size(); ++i) {
CPDF_Dictionary* pKid = pKids->GetDictAt(i);
if (!pKid)
continue;
if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1))
continue;
// Remove this child node if it's empty.
if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) ||
(pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) {
pKids->RemoveAt(i);
}
if (pKids->IsEmpty() || !pLimits)
return true;
if (csLeft != csName && csRight != csName)
return true;
// Since |csName| defines |pNode|'s limits, we need to loop through the
// kids to find the new lower and upper limits.
WideString csNewLeft = csRight;
WideString csNewRight = csLeft;
for (size_t j = 0; j < pKids->size(); ++j) {
CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits");
ASSERT(pKidLimits);
if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0)
csNewLeft = pKidLimits->GetUnicodeTextAt(0);
if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0)
csNewRight = pKidLimits->GetUnicodeTextAt(1);
}
pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
pLimits->SetNewAt<CPDF_String>(1, csNewRight);
return true;
}
return false;
}
// Search for |csName| in the tree with root |pNode|. If successful, return the
// value that |csName| points to; |nIndex| will be the index of |csName|,
// |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
// will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
// will be the leaf array that |csName| should be added to, and |pFindIndex|
// will be the index that it should be added at.
CPDF_Object* SearchNameNodeByName(CPDF_Dictionary* pNode,
const WideString& csName,
int nLevel,
size_t* nIndex,
CPDF_Array** ppFind,
int* pFindIndex) {
if (nLevel > kNameTreeMaxRecursion)
return nullptr;
CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
CPDF_Array* pNames = pNode->GetArrayFor("Names");
if (pLimits) {
WideString csLeft;
WideString csRight;
std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
// Skip this node if the name to look for is smaller than its lower limit.
if (csName.Compare(csLeft) < 0)
return nullptr;
// Skip this node if the name to look for is greater than its higher limit,
// and the node itself is a leaf node.
if (csName.Compare(csRight) > 0 && pNames) {
if (ppFind)
*ppFind = pNames;
if (pFindIndex)
*pFindIndex = pNames->size() / 2 - 1;
return nullptr;
}
}
// If the node is a leaf node, look for the name in its names array.
if (pNames) {
size_t dwCount = pNames->size() / 2;
for (size_t i = 0; i < dwCount; i++) {
WideString csValue = pNames->GetUnicodeTextAt(i * 2);
int32_t iCompare = csValue.Compare(csName);
if (iCompare > 0)
break;
if (ppFind)
*ppFind = pNames;
if (pFindIndex)
*pFindIndex = i;
if (iCompare < 0)
continue;
*nIndex += i;
return pNames->GetDirectObjectAt(i * 2 + 1);
}
*nIndex += dwCount;
return nullptr;
}
// Search through the node's children.
CPDF_Array* pKids = pNode->GetArrayFor("Kids");
if (!pKids)
return nullptr;
for (size_t i = 0; i < pKids->size(); i++) {
CPDF_Dictionary* pKid = pKids->GetDictAt(i);
if (!pKid)
continue;
CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex,
ppFind, pFindIndex);
if (pFound)
return pFound;
}
return nullptr;
}
// Get the key-value pair at |nIndex| in the tree with root |pNode|. If
// successful, return the value object; |csName| will be the key, |ppFind|
// will be the leaf array that this pair is in, and |pFindIndex| will be the
// index of the pair in |pFind|.
CPDF_Object* SearchNameNodeByIndex(CPDF_Dictionary* pNode,
size_t nIndex,
int nLevel,
size_t* nCurIndex,
WideString* csName,
CPDF_Array** ppFind,
int* pFindIndex) {
if (nLevel > kNameTreeMaxRecursion)
return nullptr;
CPDF_Array* pNames = pNode->GetArrayFor("Names");
if (pNames) {
size_t nCount = pNames->size() / 2;
if (nIndex >= *nCurIndex + nCount) {
*nCurIndex += nCount;
return nullptr;
}
if (ppFind)
*ppFind = pNames;
if (pFindIndex)
*pFindIndex = nIndex - *nCurIndex;
*csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2);
return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1);
}
CPDF_Array* pKids = pNode->GetArrayFor("Kids");
if (!pKids)
return nullptr;
for (size_t i = 0; i < pKids->size(); i++) {
CPDF_Dictionary* pKid = pKids->GetDictAt(i);
if (!pKid)
continue;
CPDF_Object* pFound = SearchNameNodeByIndex(
pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex);
if (pFound)
return pFound;
}
return nullptr;
}
// Get the total number of key-value pairs in the tree with root |pNode|.
size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) {
if (nLevel > kNameTreeMaxRecursion)
return 0;
CPDF_Array* pNames = pNode->GetArrayFor("Names");
if (pNames)
return pNames->size() / 2;
CPDF_Array* pKids = pNode->GetArrayFor("Kids");
if (!pKids)
return 0;
size_t nCount = 0;
for (size_t i = 0; i < pKids->size(); i++) {
CPDF_Dictionary* pKid = pKids->GetDictAt(i);
if (!pKid)
continue;
nCount += CountNamesInternal(pKid, nLevel + 1);
}
return nCount;
}
CPDF_Array* GetNamedDestFromObject(CPDF_Object* obj) {
if (!obj)
return nullptr;
CPDF_Array* array = obj->AsArray();
if (array)
return array;
CPDF_Dictionary* dict = obj->AsDictionary();
if (dict)
return dict->GetArrayFor("D");
return nullptr;
}
CPDF_Array* LookupOldStyleNamedDest(CPDF_Document* pDoc,
const ByteString& name) {
CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
if (!pDests)
return nullptr;
return GetNamedDestFromObject(pDests->GetDirectObjectFor(name));
}
} // namespace
CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {
ASSERT(m_pRoot);
}
CPDF_NameTree::~CPDF_NameTree() = default;
// static
std::unique_ptr<CPDF_NameTree> CPDF_NameTree::Create(
CPDF_Document* pDoc,
const ByteString& category) {
CPDF_Dictionary* pRoot = pDoc->GetRoot();
if (!pRoot)
return nullptr;
CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
if (!pNames)
return nullptr;
CPDF_Dictionary* pCategory = pNames->GetDictFor(category);
if (!pCategory)
return nullptr;
return pdfium::WrapUnique(new CPDF_NameTree(pCategory)); // Private ctor.
}
// static
std::unique_ptr<CPDF_NameTree> CPDF_NameTree::CreateWithRootNameArray(
CPDF_Document* pDoc,
const ByteString& category) {
CPDF_Dictionary* pRoot = pDoc->GetRoot();
if (!pRoot)
return nullptr;
// Retrieve the document's Names dictionary; create it if missing.
CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
if (!pNames) {
pNames = pDoc->NewIndirect<CPDF_Dictionary>();
pRoot->SetNewFor<CPDF_Reference>("Names", pDoc, pNames->GetObjNum());
}
// Create the |category| dictionary if missing.
CPDF_Dictionary* pCategory = pNames->GetDictFor(category);
if (!pCategory) {
pCategory = pDoc->NewIndirect<CPDF_Dictionary>();
pCategory->SetNewFor<CPDF_Array>("Names");
pNames->SetNewFor<CPDF_Reference>(category, pDoc, pCategory->GetObjNum());
}
return pdfium::WrapUnique(new CPDF_NameTree(pCategory)); // Private ctor.
}
// static
std::unique_ptr<CPDF_NameTree> CPDF_NameTree::CreateForTesting(
CPDF_Dictionary* pRoot) {
return pdfium::WrapUnique(new CPDF_NameTree(pRoot)); // Private ctor.
}
// static
CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
const ByteString& name) {
CPDF_Array* dest_array = nullptr;
std::unique_ptr<CPDF_NameTree> name_tree = Create(pDoc, "Dests");
if (name_tree)
dest_array = name_tree->LookupNewStyleNamedDest(name);
if (!dest_array)
dest_array = LookupOldStyleNamedDest(pDoc, name);
return dest_array;
}
size_t CPDF_NameTree::GetCount() const {
return CountNamesInternal(m_pRoot.Get(), 0);
}
bool CPDF_NameTree::AddValueAndName(RetainPtr<CPDF_Object> pObj,
const WideString& name) {
size_t nIndex = 0;
CPDF_Array* pFind = nullptr;
int nFindIndex = -1;
// Handle the corner case where the root node is empty. i.e. No kids and no
// names. In which case, just insert into it and skip all the searches.
CPDF_Array* pNames = m_pRoot->GetArrayFor("Names");
if (pNames && pNames->IsEmpty() && !m_pRoot->GetArrayFor("Kids"))
pFind = pNames;
if (!pFind) {
// Fail if the tree already contains this name or if the tree is too deep.
if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind,
&nFindIndex)) {
return false;
}
}
// If the returned |pFind| is a nullptr, then |name| is smaller than all
// existing entries in the tree, and we did not find a leaf array to place
// |name| into. We instead will find the leftmost leaf array in which to place
// |name| and |pObj|.
if (!pFind) {
size_t nCurIndex = 0;
WideString csName;
SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind,
nullptr);
}
// Give up if that fails too.
if (!pFind)
return false;
// Insert the name and the object into the leaf array found. Note that the
// insertion position is right after the key-value pair returned by |index|.
size_t nNameIndex = (nFindIndex + 1) * 2;
size_t nValueIndex = nNameIndex + 1;
pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
pFind->InsertAt(nValueIndex, std::move(pObj));
// Expand the limits that the newly added name is under, if the name falls
// outside of the limits of its leaf array or any arrays above it.
std::vector<CPDF_Array*> pLimits;
GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits);
for (auto* pLimit : pLimits) {
if (!pLimit)
continue;
if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
pLimit->SetNewAt<CPDF_String>(0, name);
if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
pLimit->SetNewAt<CPDF_String>(1, name);
}
return true;
}
bool CPDF_NameTree::DeleteValueAndName(int nIndex) {
size_t nCurIndex = 0;
WideString csName;
CPDF_Array* pFind = nullptr;
int nFindIndex = -1;
// Fail if the tree does not contain |nIndex|.
if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName,
&pFind, &nFindIndex)) {
return false;
}
// Remove the name and the object from the leaf array |pFind|.
pFind->RemoveAt(nFindIndex * 2);
pFind->RemoveAt(nFindIndex * 2);
// Delete empty nodes and update the limits of |pFind|'s ancestors as needed.
UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0);
return true;
}
CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
WideString* csName) const {
csName->clear();
size_t nCurIndex = 0;
return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName,
nullptr, nullptr);
}
CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const {
size_t nIndex = 0;
return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
nullptr);
}
CPDF_Array* CPDF_NameTree::LookupNewStyleNamedDest(const ByteString& sName) {
return GetNamedDestFromObject(LookupValue(PDF_DecodeText(sName.raw_span())));
}