Don't re-encode annotation /Contents Copies the /Contents string of an annotation unmodified. Bug: pdfium:1098 Change-Id: I33a20e012fefb8b8061885d224ae059dab0d8bf0 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/106550 Commit-Queue: K. Moon <kmoon@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfdoc/BUILD.gn b/core/fpdfdoc/BUILD.gn index 2e7a150..5e11adb 100644 --- a/core/fpdfdoc/BUILD.gn +++ b/core/fpdfdoc/BUILD.gn
@@ -97,6 +97,7 @@ sources = [ "cpdf_action_unittest.cpp", "cpdf_annot_unittest.cpp", + "cpdf_annotlist_unittest.cpp", "cpdf_bafontmap_unittest.cpp", "cpdf_defaultappearance_unittest.cpp", "cpdf_dest_unittest.cpp",
diff --git a/core/fpdfdoc/cpdf_annotlist.cpp b/core/fpdfdoc/cpdf_annotlist.cpp index c421e95..ff3ed81 100644 --- a/core/fpdfdoc/cpdf_annotlist.cpp +++ b/core/fpdfdoc/cpdf_annotlist.cpp
@@ -23,6 +23,7 @@ #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfapi/render/cpdf_renderoptions.h" #include "core/fpdfdoc/cpdf_annot.h" #include "core/fpdfdoc/cpdf_formfield.h" @@ -78,12 +79,13 @@ if (!pParentDict) return nullptr; - // TODO(jaepark): We shouldn't strip BOM for some strings and not for others. - // See pdfium:593. - WideString sContents = - pParentDict->GetUnicodeTextFor(pdfium::annotation::kContents); - if (sContents.IsEmpty()) + // TODO(crbug.com/pdfium/1098): Determine if we really need to check if + // /Contents is empty or not. If so, optimize decoding for empty check. + ByteString contents = + pParentDict->GetByteStringFor(pdfium::annotation::kContents); + if (PDF_DecodeText(contents.raw_span()).IsEmpty()) { return nullptr; + } auto pAnnotDict = pDocument->New<CPDF_Dictionary>(); pAnnotDict->SetNewFor<CPDF_Name>(pdfium::annotation::kType, "Annot"); @@ -91,8 +93,8 @@ pAnnotDict->SetNewFor<CPDF_String>( pdfium::form_fields::kT, pParentDict->GetByteStringFor(pdfium::form_fields::kT), false); - pAnnotDict->SetNewFor<CPDF_String>(pdfium::annotation::kContents, - sContents.ToUTF8(), false); + pAnnotDict->SetNewFor<CPDF_String>(pdfium::annotation::kContents, contents, + /*bHex=*/false); CFX_FloatRect rect = pParentDict->GetRectFor(pdfium::annotation::kRect); rect.Normalize();
diff --git a/core/fpdfdoc/cpdf_annotlist_unittest.cpp b/core/fpdfdoc/cpdf_annotlist_unittest.cpp new file mode 100644 index 0000000..5d7e1e2 --- /dev/null +++ b/core/fpdfdoc/cpdf_annotlist_unittest.cpp
@@ -0,0 +1,127 @@ +// Copyright 2023 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfdoc/cpdf_annotlist.h" + +#include <stdint.h> + +#include <initializer_list> +#include <memory> + +#include "constants/annotation_common.h" +#include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfapi/page/test_with_page_module.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fpdfapi/parser/cpdf_test_document.h" +#include "core/fpdfdoc/cpdf_annot.h" +#include "core/fxcrt/bytestring.h" +#include "core/fxcrt/retain_ptr.h" +#include "core/fxcrt/widestring.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class CPDFAnnotListTest : public TestWithPageModule { + public: + void SetUp() override { + TestWithPageModule::SetUp(); + + document_ = std::make_unique<CPDF_TestDocument>(); + document_->SetRoot(pdfium::MakeRetain<CPDF_Dictionary>()); + page_ = pdfium::MakeRetain<CPDF_Page>( + document_.get(), pdfium::MakeRetain<CPDF_Dictionary>()); + } + + void TearDown() override { + page_.Reset(); + document_.reset(); + + TestWithPageModule::TearDown(); + } + + protected: + void AddTextAnnotation(const ByteString& contents) { + RetainPtr<CPDF_Dictionary> annotation = + page_->GetOrCreateAnnotsArray()->AppendNew<CPDF_Dictionary>(); + annotation->SetNewFor<CPDF_Name>(pdfium::annotation::kSubtype, "Text"); + annotation->SetNewFor<CPDF_String>(pdfium::annotation::kContents, contents, + /*bHex=*/false); + } + + std::unique_ptr<CPDF_TestDocument> document_; + RetainPtr<CPDF_Page> page_; +}; + +ByteString MakeByteString(std::initializer_list<uint8_t> bytes) { + return ByteString(std::data(bytes), std::size(bytes)); +} + +ByteString GetRawContents(const CPDF_Annot* annotation) { + return annotation->GetAnnotDict()->GetByteStringFor( + pdfium::annotation::kContents); +} + +WideString GetDecodedContents(const CPDF_Annot* annotation) { + return annotation->GetAnnotDict()->GetUnicodeTextFor( + pdfium::annotation::kContents); +} + +} // namespace + +TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromPdfEncoded) { + const ByteString kContents = MakeByteString({'A', 'a', 0xE4, 0xA0}); + AddTextAnnotation(kContents); + + CPDF_AnnotList list(page_); + + ASSERT_EQ(2u, list.Count()); + EXPECT_EQ(kContents, GetRawContents(list.GetAt(1))); + EXPECT_EQ(WideString::FromUTF8("Aaä€"), GetDecodedContents(list.GetAt(1))); +} + +TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromUnicode) { + const ByteString kContents = + MakeByteString({0xFE, 0xFF, 0x00, 'A', 0x00, 'a', 0x00, 0xE4, 0x20, 0xAC, + 0xD8, 0x3D, 0xDC, 0xC4}); + AddTextAnnotation(kContents); + + CPDF_AnnotList list(page_); + + ASSERT_EQ(2u, list.Count()); + EXPECT_EQ(kContents, GetRawContents(list.GetAt(1))); + + // TODO(crbug.com/pdfium/2029): `WideString::FromUTF8()` mishandles '📄'. + EXPECT_EQ(WideString::FromUTF8("Aaä€\xED\xA0\xBD\xED\xB3\x84"), + GetDecodedContents(list.GetAt(1))); +} + +TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromEmptyPdfEncoded) { + AddTextAnnotation(""); + + CPDF_AnnotList list(page_); + + EXPECT_EQ(1u, list.Count()); +} + +TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromEmptyUnicode) { + const ByteString kContents = MakeByteString({0xFE, 0xFF}); + AddTextAnnotation(kContents); + + CPDF_AnnotList list(page_); + + EXPECT_EQ(1u, list.Count()); +} + +TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromEmptyUnicodedWithEscape) { + const ByteString kContents = + MakeByteString({0xFE, 0xFF, 0x00, 0x1B, 'j', 'a', 0x00, 0x1B}); + AddTextAnnotation(kContents); + + CPDF_AnnotList list(page_); + + EXPECT_EQ(1u, list.Count()); +}