M109: Undo check for invalid URIs in FPDFAction_GetURIPath(). This effectively reverts [1], but updates comments to explain what happened historically and asks the caller to validate the output. [1] https://pdfium.googlesource.com/pdfium.git/+/d609e84cee2e14a18333247485af91df48a40592 Bug: chromium:1396248 Change-Id: Ia965603f47b90d1751b851189a2963deb10ca8a9 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/102291 Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org> (cherry picked from commit ccf7cba44041d4124b4f1f4486737a7d3fdb782e) Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/102810

commit: 89aa9f9ebe803480ec697fb39470ed106869e272 [log] [tgz]
author: Lei Zhang <thestig@chromium.org> Mon Dec 19 19:17:47 2022 +0000
committer: Lei Zhang <thestig@chromium.org> Mon Dec 19 19:17:47 2022 +0000
tree: 6fa900b3bacb0799cbf896cb88a0f0ff40b5c530
parent: 1d94eaddb44f1e8c16387e5207ef23de70cb3cd8 [diff]
diff --git a/fpdfsdk/fpdf_doc.cpp b/fpdfsdk/fpdf_doc.cpp
index 746e45c..4f57144 100644
--- a/fpdfsdk/fpdf_doc.cpp
+++ b/fpdfsdk/fpdf_doc.cpp

@@ -235,11 +235,6 @@
   CPDF_Action cAction(pdfium::WrapRetain(CPDFDictionaryFromFPDFAction(action)));
   ByteString path = cAction.GetURI(pDoc);
 
-  // Table 206 in the ISO 32000-1:2008 spec states the type for the URI field is
-  // ASCII string. If the data is not 7-bit ASCII, consider that a failure.
-  if (!path.AsStringView().IsASCII())
-    return 0;
-
   const unsigned long len =
       pdfium::base::checked_cast<unsigned long>(path.GetLength() + 1);
   if (buffer && len <= buflen)

diff --git a/fpdfsdk/fpdf_doc_embeddertest.cpp b/fpdfsdk/fpdf_doc_embeddertest.cpp
index b4889b1..f06e596 100644
--- a/fpdfsdk/fpdf_doc_embeddertest.cpp
+++ b/fpdfsdk/fpdf_doc_embeddertest.cpp

@@ -355,8 +355,18 @@
   EXPECT_EQ(static_cast<unsigned long>(PDFACTION_URI),
             FPDFAction_GetType(action));
 
-  // Call fails because the URI embedded in the PDF is invalid.
-  EXPECT_EQ(0u, FPDFAction_GetURIPath(document(), action, nullptr, 0));
+  // FPDFAction_GetURIPath() may return data in any encoding, or even with bad
+  // encoding.
+  const char kExpectedResult[] =
+      "https://example.com/\xA5octal\xC7"
+      "chars";
+  const unsigned long kExpectedLength = sizeof(kExpectedResult);
+  unsigned long bufsize = FPDFAction_GetURIPath(document(), action, nullptr, 0);
+  ASSERT_EQ(kExpectedLength, bufsize);
+
+  char buf[1024];
+  EXPECT_EQ(bufsize, FPDFAction_GetURIPath(document(), action, buf, bufsize));
+  EXPECT_STREQ(kExpectedResult, buf);
 
   UnloadPage(page);
 }

diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h
index d276376..b073f4d 100644
--- a/public/fpdf_doc.h
+++ b/public/fpdf_doc.h

@@ -189,8 +189,18 @@
 // character, or 0 on error, typically because the arguments were bad or the
 // action was of the wrong type.
 //
-// The |buffer| is always encoded in 7-bit ASCII. If |buflen| is less than the
-// returned length, or |buffer| is NULL, |buffer| will not be modified.
+// The |buffer| may contain badly encoded data. The caller should validate the
+// output. e.g. Check to see if it is UTF-8.
+//
+// If |buflen| is less than the returned length, or |buffer| is NULL, |buffer|
+// will not be modified.
+//
+// Historically, the documentation for this API claimed |buffer| is always
+// encoded in 7-bit ASCII, but did not actually enforce it.
+// https://pdfium.googlesource.com/pdfium.git/+/d609e84cee2e14a18333247485af91df48a40592
+// added that enforcement, but that did not work well for real world PDFs that
+// used UTF-8. As of this writing, this API reverted back to its original
+// behavior prior to commit d609e84cee.
 FPDF_EXPORT unsigned long FPDF_CALLCONV
 FPDFAction_GetURIPath(FPDF_DOCUMENT document,
                       FPDF_ACTION action,
commit	89aa9f9ebe803480ec697fb39470ed106869e272	[log] [tgz]
author	Lei Zhang <thestig@chromium.org>	Mon Dec 19 19:17:47 2022 +0000
committer	Lei Zhang <thestig@chromium.org>	Mon Dec 19 19:17:47 2022 +0000
tree	6fa900b3bacb0799cbf896cb88a0f0ff40b5c530
parent	1d94eaddb44f1e8c16387e5207ef23de70cb3cd8 [diff]