Don't re-encode annotation /Contents
Copies the /Contents string of an annotation unmodified.
Bug: pdfium:1098
Change-Id: I33a20e012fefb8b8061885d224ae059dab0d8bf0
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/106550
Commit-Queue: K. Moon <kmoon@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfdoc/BUILD.gn b/core/fpdfdoc/BUILD.gn
index 2e7a150..5e11adb 100644
--- a/core/fpdfdoc/BUILD.gn
+++ b/core/fpdfdoc/BUILD.gn
@@ -97,6 +97,7 @@
sources = [
"cpdf_action_unittest.cpp",
"cpdf_annot_unittest.cpp",
+ "cpdf_annotlist_unittest.cpp",
"cpdf_bafontmap_unittest.cpp",
"cpdf_defaultappearance_unittest.cpp",
"cpdf_dest_unittest.cpp",
diff --git a/core/fpdfdoc/cpdf_annotlist.cpp b/core/fpdfdoc/cpdf_annotlist.cpp
index c421e95..ff3ed81 100644
--- a/core/fpdfdoc/cpdf_annotlist.cpp
+++ b/core/fpdfdoc/cpdf_annotlist.cpp
@@ -23,6 +23,7 @@
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fpdfapi/parser/fpdf_parser_decode.h"
#include "core/fpdfapi/render/cpdf_renderoptions.h"
#include "core/fpdfdoc/cpdf_annot.h"
#include "core/fpdfdoc/cpdf_formfield.h"
@@ -78,12 +79,13 @@
if (!pParentDict)
return nullptr;
- // TODO(jaepark): We shouldn't strip BOM for some strings and not for others.
- // See pdfium:593.
- WideString sContents =
- pParentDict->GetUnicodeTextFor(pdfium::annotation::kContents);
- if (sContents.IsEmpty())
+ // TODO(crbug.com/pdfium/1098): Determine if we really need to check if
+ // /Contents is empty or not. If so, optimize decoding for empty check.
+ ByteString contents =
+ pParentDict->GetByteStringFor(pdfium::annotation::kContents);
+ if (PDF_DecodeText(contents.raw_span()).IsEmpty()) {
return nullptr;
+ }
auto pAnnotDict = pDocument->New<CPDF_Dictionary>();
pAnnotDict->SetNewFor<CPDF_Name>(pdfium::annotation::kType, "Annot");
@@ -91,8 +93,8 @@
pAnnotDict->SetNewFor<CPDF_String>(
pdfium::form_fields::kT,
pParentDict->GetByteStringFor(pdfium::form_fields::kT), false);
- pAnnotDict->SetNewFor<CPDF_String>(pdfium::annotation::kContents,
- sContents.ToUTF8(), false);
+ pAnnotDict->SetNewFor<CPDF_String>(pdfium::annotation::kContents, contents,
+ /*bHex=*/false);
CFX_FloatRect rect = pParentDict->GetRectFor(pdfium::annotation::kRect);
rect.Normalize();
diff --git a/core/fpdfdoc/cpdf_annotlist_unittest.cpp b/core/fpdfdoc/cpdf_annotlist_unittest.cpp
new file mode 100644
index 0000000..5d7e1e2
--- /dev/null
+++ b/core/fpdfdoc/cpdf_annotlist_unittest.cpp
@@ -0,0 +1,127 @@
+// Copyright 2023 The PDFium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfdoc/cpdf_annotlist.h"
+
+#include <stdint.h>
+
+#include <initializer_list>
+#include <memory>
+
+#include "constants/annotation_common.h"
+#include "core/fpdfapi/page/cpdf_page.h"
+#include "core/fpdfapi/page/test_with_page_module.h"
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fpdfapi/parser/cpdf_test_document.h"
+#include "core/fpdfdoc/cpdf_annot.h"
+#include "core/fxcrt/bytestring.h"
+#include "core/fxcrt/retain_ptr.h"
+#include "core/fxcrt/widestring.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+class CPDFAnnotListTest : public TestWithPageModule {
+ public:
+ void SetUp() override {
+ TestWithPageModule::SetUp();
+
+ document_ = std::make_unique<CPDF_TestDocument>();
+ document_->SetRoot(pdfium::MakeRetain<CPDF_Dictionary>());
+ page_ = pdfium::MakeRetain<CPDF_Page>(
+ document_.get(), pdfium::MakeRetain<CPDF_Dictionary>());
+ }
+
+ void TearDown() override {
+ page_.Reset();
+ document_.reset();
+
+ TestWithPageModule::TearDown();
+ }
+
+ protected:
+ void AddTextAnnotation(const ByteString& contents) {
+ RetainPtr<CPDF_Dictionary> annotation =
+ page_->GetOrCreateAnnotsArray()->AppendNew<CPDF_Dictionary>();
+ annotation->SetNewFor<CPDF_Name>(pdfium::annotation::kSubtype, "Text");
+ annotation->SetNewFor<CPDF_String>(pdfium::annotation::kContents, contents,
+ /*bHex=*/false);
+ }
+
+ std::unique_ptr<CPDF_TestDocument> document_;
+ RetainPtr<CPDF_Page> page_;
+};
+
+ByteString MakeByteString(std::initializer_list<uint8_t> bytes) {
+ return ByteString(std::data(bytes), std::size(bytes));
+}
+
+ByteString GetRawContents(const CPDF_Annot* annotation) {
+ return annotation->GetAnnotDict()->GetByteStringFor(
+ pdfium::annotation::kContents);
+}
+
+WideString GetDecodedContents(const CPDF_Annot* annotation) {
+ return annotation->GetAnnotDict()->GetUnicodeTextFor(
+ pdfium::annotation::kContents);
+}
+
+} // namespace
+
+TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromPdfEncoded) {
+ const ByteString kContents = MakeByteString({'A', 'a', 0xE4, 0xA0});
+ AddTextAnnotation(kContents);
+
+ CPDF_AnnotList list(page_);
+
+ ASSERT_EQ(2u, list.Count());
+ EXPECT_EQ(kContents, GetRawContents(list.GetAt(1)));
+ EXPECT_EQ(WideString::FromUTF8("Aaä€"), GetDecodedContents(list.GetAt(1)));
+}
+
+TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromUnicode) {
+ const ByteString kContents =
+ MakeByteString({0xFE, 0xFF, 0x00, 'A', 0x00, 'a', 0x00, 0xE4, 0x20, 0xAC,
+ 0xD8, 0x3D, 0xDC, 0xC4});
+ AddTextAnnotation(kContents);
+
+ CPDF_AnnotList list(page_);
+
+ ASSERT_EQ(2u, list.Count());
+ EXPECT_EQ(kContents, GetRawContents(list.GetAt(1)));
+
+ // TODO(crbug.com/pdfium/2029): `WideString::FromUTF8()` mishandles '📄'.
+ EXPECT_EQ(WideString::FromUTF8("Aaä€\xED\xA0\xBD\xED\xB3\x84"),
+ GetDecodedContents(list.GetAt(1)));
+}
+
+TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromEmptyPdfEncoded) {
+ AddTextAnnotation("");
+
+ CPDF_AnnotList list(page_);
+
+ EXPECT_EQ(1u, list.Count());
+}
+
+TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromEmptyUnicode) {
+ const ByteString kContents = MakeByteString({0xFE, 0xFF});
+ AddTextAnnotation(kContents);
+
+ CPDF_AnnotList list(page_);
+
+ EXPECT_EQ(1u, list.Count());
+}
+
+TEST_F(CPDFAnnotListTest, CreatePopupAnnotFromEmptyUnicodedWithEscape) {
+ const ByteString kContents =
+ MakeByteString({0xFE, 0xFF, 0x00, 0x1B, 'j', 'a', 0x00, 0x1B});
+ AddTextAnnotation(kContents);
+
+ CPDF_AnnotList list(page_);
+
+ EXPECT_EQ(1u, list.Count());
+}