| // Copyright 2016 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "core/fpdfapi/page/cpdf_contentparser.h" |
| |
| #include "constants/page_object.h" |
| #include "core/fpdfapi/font/cpdf_type3char.h" |
| #include "core/fpdfapi/page/cpdf_allstates.h" |
| #include "core/fpdfapi/page/cpdf_form.h" |
| #include "core/fpdfapi/page/cpdf_page.h" |
| #include "core/fpdfapi/page/cpdf_pageobject.h" |
| #include "core/fpdfapi/page/cpdf_path.h" |
| #include "core/fpdfapi/parser/cpdf_array.h" |
| #include "core/fpdfapi/parser/cpdf_dictionary.h" |
| #include "core/fpdfapi/parser/cpdf_stream.h" |
| #include "core/fpdfapi/parser/cpdf_stream_acc.h" |
| #include "core/fxcrt/fx_safe_types.h" |
| #include "core/fxcrt/pauseindicator_iface.h" |
| #include "core/fxcrt/span_util.h" |
| #include "core/fxcrt/stl_util.h" |
| #include "core/fxge/cfx_fillrenderoptions.h" |
| #include "third_party/base/check.h" |
| #include "third_party/base/check_op.h" |
| |
| CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage) |
| : m_CurrentStage(Stage::kGetContent), m_pObjectHolder(pPage) { |
| DCHECK(pPage); |
| if (!pPage->GetDocument()) { |
| m_CurrentStage = Stage::kComplete; |
| return; |
| } |
| |
| CPDF_Object* pContent = |
| pPage->GetDict()->GetDirectObjectFor(pdfium::page_object::kContents); |
| if (!pContent) { |
| HandlePageContentFailure(); |
| return; |
| } |
| |
| CPDF_Stream* pStream = pContent->AsStream(); |
| if (pStream) { |
| HandlePageContentStream(pStream); |
| return; |
| } |
| |
| CPDF_Array* pArray = pContent->AsArray(); |
| if (pArray && HandlePageContentArray(pArray)) |
| return; |
| |
| HandlePageContentFailure(); |
| } |
| |
| CPDF_ContentParser::CPDF_ContentParser(CPDF_Form* pForm, |
| const CPDF_AllStates* pGraphicStates, |
| const CFX_Matrix* pParentMatrix, |
| CPDF_Type3Char* pType3Char, |
| std::set<const uint8_t*>* pParsedSet) |
| : m_CurrentStage(Stage::kParse), |
| m_pObjectHolder(pForm), |
| m_pType3Char(pType3Char) { |
| DCHECK(pForm); |
| CFX_Matrix form_matrix = pForm->GetDict()->GetMatrixFor("Matrix"); |
| if (pGraphicStates) |
| form_matrix.Concat(pGraphicStates->m_CTM); |
| |
| CPDF_Array* pBBox = pForm->GetDict()->GetArrayFor("BBox"); |
| CFX_FloatRect form_bbox; |
| CPDF_Path ClipPath; |
| if (pBBox) { |
| form_bbox = pBBox->GetRect(); |
| ClipPath.Emplace(); |
| ClipPath.AppendFloatRect(form_bbox); |
| ClipPath.Transform(form_matrix); |
| if (pParentMatrix) |
| ClipPath.Transform(*pParentMatrix); |
| |
| form_bbox = form_matrix.TransformRect(form_bbox); |
| if (pParentMatrix) |
| form_bbox = pParentMatrix->TransformRect(form_bbox); |
| } |
| |
| CPDF_Dictionary* pResources = pForm->GetDict()->GetDictFor("Resources"); |
| m_pParser = std::make_unique<CPDF_StreamContentParser>( |
| pForm->GetDocument(), pForm->GetPageResources(), pForm->GetResources(), |
| pParentMatrix, pForm, pResources, form_bbox, pGraphicStates, pParsedSet); |
| m_pParser->GetCurStates()->m_CTM = form_matrix; |
| m_pParser->GetCurStates()->m_ParentMatrix = form_matrix; |
| if (ClipPath.HasRef()) { |
| m_pParser->GetCurStates()->m_ClipPath.AppendPathWithAutoMerge( |
| ClipPath, CFX_FillRenderOptions::FillType::kWinding); |
| } |
| if (pForm->GetTransparency().IsGroup()) { |
| CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState; |
| pState->SetBlendType(BlendMode::kNormal); |
| pState->SetStrokeAlpha(1.0f); |
| pState->SetFillAlpha(1.0f); |
| pState->SetSoftMask(nullptr); |
| } |
| m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pForm->GetStream()); |
| m_pSingleStream->LoadAllDataFiltered(); |
| m_pData.Reset(m_pSingleStream->GetData()); |
| m_Size = m_pSingleStream->GetSize(); |
| } |
| |
| CPDF_ContentParser::~CPDF_ContentParser() = default; |
| |
| // Returning |true| means that there is more content to be processed and |
| // Continue() should be called again. Returning |false| means that we've |
| // completed the parse and Continue() is complete. |
| bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) { |
| while (m_CurrentStage == Stage::kGetContent) { |
| m_CurrentStage = GetContent(); |
| if (pPause && pPause->NeedToPauseNow()) |
| return true; |
| } |
| |
| if (m_CurrentStage == Stage::kPrepareContent) |
| m_CurrentStage = PrepareContent(); |
| |
| while (m_CurrentStage == Stage::kParse) { |
| m_CurrentStage = Parse(); |
| if (pPause && pPause->NeedToPauseNow()) |
| return true; |
| } |
| |
| if (m_CurrentStage == Stage::kCheckClip) |
| m_CurrentStage = CheckClip(); |
| |
| DCHECK_EQ(m_CurrentStage, Stage::kComplete); |
| return false; |
| } |
| |
| CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() { |
| DCHECK_EQ(m_CurrentStage, Stage::kGetContent); |
| DCHECK(m_pObjectHolder->IsPage()); |
| CPDF_Array* pContent = |
| m_pObjectHolder->GetDict()->GetArrayFor(pdfium::page_object::kContents); |
| const CPDF_Stream* pStreamObj = ToStream( |
| pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr); |
| m_StreamArray[m_CurrentOffset] = |
| pdfium::MakeRetain<CPDF_StreamAcc>(pStreamObj); |
| m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered(); |
| m_CurrentOffset++; |
| |
| return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent |
| : Stage::kGetContent; |
| } |
| |
| CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() { |
| m_CurrentOffset = 0; |
| |
| if (m_StreamArray.empty()) { |
| m_pData.Reset(m_pSingleStream->GetData()); |
| m_Size = m_pSingleStream->GetSize(); |
| return Stage::kParse; |
| } |
| |
| FX_SAFE_UINT32 safeSize = 0; |
| for (const auto& stream : m_StreamArray) { |
| m_StreamSegmentOffsets.push_back(safeSize.ValueOrDie()); |
| safeSize += stream->GetSize(); |
| safeSize += 1; |
| if (!safeSize.IsValid()) |
| return Stage::kComplete; |
| } |
| |
| m_Size = safeSize.ValueOrDie(); |
| m_pData.Reset( |
| std::unique_ptr<uint8_t, FxFreeDeleter>(FX_TryAlloc(uint8_t, m_Size))); |
| if (!m_pData) |
| return Stage::kComplete; |
| |
| size_t pos = 0; |
| auto data_span = pdfium::make_span(m_pData.Get(), m_Size); |
| for (const auto& stream : m_StreamArray) { |
| fxcrt::spancpy(data_span.subspan(pos), stream->GetSpan()); |
| pos += stream->GetSize(); |
| data_span[pos++] = ' '; |
| } |
| m_StreamArray.clear(); |
| return Stage::kParse; |
| } |
| |
| CPDF_ContentParser::Stage CPDF_ContentParser::Parse() { |
| if (!m_pParser) { |
| m_ParsedSet.clear(); |
| m_pParser = std::make_unique<CPDF_StreamContentParser>( |
| m_pObjectHolder->GetDocument(), m_pObjectHolder->GetPageResources(), |
| nullptr, nullptr, m_pObjectHolder.Get(), |
| m_pObjectHolder->GetResources(), m_pObjectHolder->GetBBox(), nullptr, |
| &m_ParsedSet); |
| m_pParser->GetCurStates()->m_ColorState.SetDefault(); |
| } |
| if (m_CurrentOffset >= m_Size) |
| return Stage::kCheckClip; |
| |
| if (m_StreamSegmentOffsets.empty()) |
| m_StreamSegmentOffsets.push_back(0); |
| |
| static constexpr uint32_t kParseStepLimit = 100; |
| m_CurrentOffset += m_pParser->Parse({m_pData.Get(), m_Size}, m_CurrentOffset, |
| kParseStepLimit, m_StreamSegmentOffsets); |
| return Stage::kParse; |
| } |
| |
| CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() { |
| if (m_pType3Char) { |
| m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(), |
| m_pParser->GetType3Data()); |
| } |
| |
| for (auto& pObj : *m_pObjectHolder) { |
| if (!pObj->m_ClipPath.HasRef()) |
| continue; |
| if (pObj->m_ClipPath.GetPathCount() != 1) |
| continue; |
| if (pObj->m_ClipPath.GetTextCount() > 0) |
| continue; |
| |
| CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); |
| if (!ClipPath.IsRect() || pObj->IsShading()) |
| continue; |
| |
| CFX_PointF point0 = ClipPath.GetPoint(0); |
| CFX_PointF point2 = ClipPath.GetPoint(2); |
| CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y); |
| if (old_rect.Contains(pObj->GetRect())) |
| pObj->m_ClipPath.SetNull(); |
| } |
| return Stage::kComplete; |
| } |
| |
| void CPDF_ContentParser::HandlePageContentStream(CPDF_Stream* pStream) { |
| m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pStream); |
| m_pSingleStream->LoadAllDataFiltered(); |
| m_CurrentStage = Stage::kPrepareContent; |
| } |
| |
| bool CPDF_ContentParser::HandlePageContentArray(CPDF_Array* pArray) { |
| m_nStreams = fxcrt::CollectionSize<uint32_t>(*pArray); |
| if (m_nStreams == 0) |
| return false; |
| |
| m_StreamArray.resize(m_nStreams); |
| return true; |
| } |
| |
| void CPDF_ContentParser::HandlePageContentFailure() { |
| m_CurrentStage = Stage::kComplete; |
| } |