blob: eb99020c15072c9abaff01b4a2f4bcc9cee8ba68 [file] [log] [blame]
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/page/cpdf_contentparser.h"
#include "constants/page_object.h"
#include "core/fpdfapi/font/cpdf_type3char.h"
#include "core/fpdfapi/page/cpdf_allstates.h"
#include "core/fpdfapi/page/cpdf_form.h"
#include "core/fpdfapi/page/cpdf_page.h"
#include "core/fpdfapi/page/cpdf_pageobject.h"
#include "core/fpdfapi/page/cpdf_path.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_stream_acc.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/pauseindicator_iface.h"
#include "core/fxcrt/span_util.h"
#include "core/fxcrt/stl_util.h"
#include "core/fxge/cfx_fillrenderoptions.h"
#include "third_party/base/check.h"
#include "third_party/base/check_op.h"
CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
: m_CurrentStage(Stage::kGetContent), m_pObjectHolder(pPage) {
DCHECK(pPage);
if (!pPage->GetDocument()) {
m_CurrentStage = Stage::kComplete;
return;
}
CPDF_Object* pContent =
pPage->GetDict()->GetDirectObjectFor(pdfium::page_object::kContents);
if (!pContent) {
HandlePageContentFailure();
return;
}
CPDF_Stream* pStream = pContent->AsStream();
if (pStream) {
HandlePageContentStream(pStream);
return;
}
CPDF_Array* pArray = pContent->AsArray();
if (pArray && HandlePageContentArray(pArray))
return;
HandlePageContentFailure();
}
CPDF_ContentParser::CPDF_ContentParser(CPDF_Form* pForm,
const CPDF_AllStates* pGraphicStates,
const CFX_Matrix* pParentMatrix,
CPDF_Type3Char* pType3Char,
std::set<const uint8_t*>* pParsedSet)
: m_CurrentStage(Stage::kParse),
m_pObjectHolder(pForm),
m_pType3Char(pType3Char) {
DCHECK(pForm);
CFX_Matrix form_matrix = pForm->GetDict()->GetMatrixFor("Matrix");
if (pGraphicStates)
form_matrix.Concat(pGraphicStates->m_CTM);
CPDF_Array* pBBox = pForm->GetDict()->GetArrayFor("BBox");
CFX_FloatRect form_bbox;
CPDF_Path ClipPath;
if (pBBox) {
form_bbox = pBBox->GetRect();
ClipPath.Emplace();
ClipPath.AppendFloatRect(form_bbox);
ClipPath.Transform(form_matrix);
if (pParentMatrix)
ClipPath.Transform(*pParentMatrix);
form_bbox = form_matrix.TransformRect(form_bbox);
if (pParentMatrix)
form_bbox = pParentMatrix->TransformRect(form_bbox);
}
CPDF_Dictionary* pResources = pForm->GetDict()->GetDictFor("Resources");
m_pParser = std::make_unique<CPDF_StreamContentParser>(
pForm->GetDocument(), pForm->GetPageResources(), pForm->GetResources(),
pParentMatrix, pForm, pResources, form_bbox, pGraphicStates, pParsedSet);
m_pParser->GetCurStates()->m_CTM = form_matrix;
m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
if (ClipPath.HasRef()) {
m_pParser->GetCurStates()->m_ClipPath.AppendPathWithAutoMerge(
ClipPath, CFX_FillRenderOptions::FillType::kWinding);
}
if (pForm->GetTransparency().IsGroup()) {
CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState;
pState->SetBlendType(BlendMode::kNormal);
pState->SetStrokeAlpha(1.0f);
pState->SetFillAlpha(1.0f);
pState->SetSoftMask(nullptr);
}
m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pForm->GetStream());
m_pSingleStream->LoadAllDataFiltered();
m_pData.Reset(m_pSingleStream->GetData());
m_Size = m_pSingleStream->GetSize();
}
CPDF_ContentParser::~CPDF_ContentParser() = default;
// Returning |true| means that there is more content to be processed and
// Continue() should be called again. Returning |false| means that we've
// completed the parse and Continue() is complete.
bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) {
while (m_CurrentStage == Stage::kGetContent) {
m_CurrentStage = GetContent();
if (pPause && pPause->NeedToPauseNow())
return true;
}
if (m_CurrentStage == Stage::kPrepareContent)
m_CurrentStage = PrepareContent();
while (m_CurrentStage == Stage::kParse) {
m_CurrentStage = Parse();
if (pPause && pPause->NeedToPauseNow())
return true;
}
if (m_CurrentStage == Stage::kCheckClip)
m_CurrentStage = CheckClip();
DCHECK_EQ(m_CurrentStage, Stage::kComplete);
return false;
}
CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
DCHECK_EQ(m_CurrentStage, Stage::kGetContent);
DCHECK(m_pObjectHolder->IsPage());
CPDF_Array* pContent =
m_pObjectHolder->GetDict()->GetArrayFor(pdfium::page_object::kContents);
const CPDF_Stream* pStreamObj = ToStream(
pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
m_StreamArray[m_CurrentOffset] =
pdfium::MakeRetain<CPDF_StreamAcc>(pStreamObj);
m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
m_CurrentOffset++;
return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
: Stage::kGetContent;
}
CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
m_CurrentOffset = 0;
if (m_StreamArray.empty()) {
m_pData.Reset(m_pSingleStream->GetData());
m_Size = m_pSingleStream->GetSize();
return Stage::kParse;
}
FX_SAFE_UINT32 safeSize = 0;
for (const auto& stream : m_StreamArray) {
m_StreamSegmentOffsets.push_back(safeSize.ValueOrDie());
safeSize += stream->GetSize();
safeSize += 1;
if (!safeSize.IsValid())
return Stage::kComplete;
}
m_Size = safeSize.ValueOrDie();
m_pData.Reset(
std::unique_ptr<uint8_t, FxFreeDeleter>(FX_TryAlloc(uint8_t, m_Size)));
if (!m_pData)
return Stage::kComplete;
size_t pos = 0;
auto data_span = pdfium::make_span(m_pData.Get(), m_Size);
for (const auto& stream : m_StreamArray) {
fxcrt::spancpy(data_span.subspan(pos), stream->GetSpan());
pos += stream->GetSize();
data_span[pos++] = ' ';
}
m_StreamArray.clear();
return Stage::kParse;
}
CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
if (!m_pParser) {
m_ParsedSet.clear();
m_pParser = std::make_unique<CPDF_StreamContentParser>(
m_pObjectHolder->GetDocument(), m_pObjectHolder->GetPageResources(),
nullptr, nullptr, m_pObjectHolder.Get(),
m_pObjectHolder->GetResources(), m_pObjectHolder->GetBBox(), nullptr,
&m_ParsedSet);
m_pParser->GetCurStates()->m_ColorState.SetDefault();
}
if (m_CurrentOffset >= m_Size)
return Stage::kCheckClip;
if (m_StreamSegmentOffsets.empty())
m_StreamSegmentOffsets.push_back(0);
static constexpr uint32_t kParseStepLimit = 100;
m_CurrentOffset += m_pParser->Parse({m_pData.Get(), m_Size}, m_CurrentOffset,
kParseStepLimit, m_StreamSegmentOffsets);
return Stage::kParse;
}
CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
if (m_pType3Char) {
m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
m_pParser->GetType3Data());
}
for (auto& pObj : *m_pObjectHolder) {
if (!pObj->m_ClipPath.HasRef())
continue;
if (pObj->m_ClipPath.GetPathCount() != 1)
continue;
if (pObj->m_ClipPath.GetTextCount() > 0)
continue;
CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
if (!ClipPath.IsRect() || pObj->IsShading())
continue;
CFX_PointF point0 = ClipPath.GetPoint(0);
CFX_PointF point2 = ClipPath.GetPoint(2);
CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
if (old_rect.Contains(pObj->GetRect()))
pObj->m_ClipPath.SetNull();
}
return Stage::kComplete;
}
void CPDF_ContentParser::HandlePageContentStream(CPDF_Stream* pStream) {
m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
m_pSingleStream->LoadAllDataFiltered();
m_CurrentStage = Stage::kPrepareContent;
}
bool CPDF_ContentParser::HandlePageContentArray(CPDF_Array* pArray) {
m_nStreams = fxcrt::CollectionSize<uint32_t>(*pArray);
if (m_nStreams == 0)
return false;
m_StreamArray.resize(m_nStreams);
return true;
}
void CPDF_ContentParser::HandlePageContentFailure() {
m_CurrentStage = Stage::kComplete;
}