M109: Undo check for invalid URIs in FPDFAction_GetURIPath().
This effectively reverts [1], but updates comments to explain what
happened historically and asks the caller to validate the output.
[1] https://pdfium.googlesource.com/pdfium.git/+/d609e84cee2e14a18333247485af91df48a40592
Bug: chromium:1396248
Change-Id: Ia965603f47b90d1751b851189a2963deb10ca8a9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/102291
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
(cherry picked from commit ccf7cba44041d4124b4f1f4486737a7d3fdb782e)
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/102810
diff --git a/fpdfsdk/fpdf_doc.cpp b/fpdfsdk/fpdf_doc.cpp
index 746e45c..4f57144 100644
--- a/fpdfsdk/fpdf_doc.cpp
+++ b/fpdfsdk/fpdf_doc.cpp
@@ -235,11 +235,6 @@
CPDF_Action cAction(pdfium::WrapRetain(CPDFDictionaryFromFPDFAction(action)));
ByteString path = cAction.GetURI(pDoc);
- // Table 206 in the ISO 32000-1:2008 spec states the type for the URI field is
- // ASCII string. If the data is not 7-bit ASCII, consider that a failure.
- if (!path.AsStringView().IsASCII())
- return 0;
-
const unsigned long len =
pdfium::base::checked_cast<unsigned long>(path.GetLength() + 1);
if (buffer && len <= buflen)
diff --git a/fpdfsdk/fpdf_doc_embeddertest.cpp b/fpdfsdk/fpdf_doc_embeddertest.cpp
index b4889b1..f06e596 100644
--- a/fpdfsdk/fpdf_doc_embeddertest.cpp
+++ b/fpdfsdk/fpdf_doc_embeddertest.cpp
@@ -355,8 +355,18 @@
EXPECT_EQ(static_cast<unsigned long>(PDFACTION_URI),
FPDFAction_GetType(action));
- // Call fails because the URI embedded in the PDF is invalid.
- EXPECT_EQ(0u, FPDFAction_GetURIPath(document(), action, nullptr, 0));
+ // FPDFAction_GetURIPath() may return data in any encoding, or even with bad
+ // encoding.
+ const char kExpectedResult[] =
+ "https://example.com/\xA5octal\xC7"
+ "chars";
+ const unsigned long kExpectedLength = sizeof(kExpectedResult);
+ unsigned long bufsize = FPDFAction_GetURIPath(document(), action, nullptr, 0);
+ ASSERT_EQ(kExpectedLength, bufsize);
+
+ char buf[1024];
+ EXPECT_EQ(bufsize, FPDFAction_GetURIPath(document(), action, buf, bufsize));
+ EXPECT_STREQ(kExpectedResult, buf);
UnloadPage(page);
}
diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h
index d276376..b073f4d 100644
--- a/public/fpdf_doc.h
+++ b/public/fpdf_doc.h
@@ -189,8 +189,18 @@
// character, or 0 on error, typically because the arguments were bad or the
// action was of the wrong type.
//
-// The |buffer| is always encoded in 7-bit ASCII. If |buflen| is less than the
-// returned length, or |buffer| is NULL, |buffer| will not be modified.
+// The |buffer| may contain badly encoded data. The caller should validate the
+// output. e.g. Check to see if it is UTF-8.
+//
+// If |buflen| is less than the returned length, or |buffer| is NULL, |buffer|
+// will not be modified.
+//
+// Historically, the documentation for this API claimed |buffer| is always
+// encoded in 7-bit ASCII, but did not actually enforce it.
+// https://pdfium.googlesource.com/pdfium.git/+/d609e84cee2e14a18333247485af91df48a40592
+// added that enforcement, but that did not work well for real world PDFs that
+// used UTF-8. As of this writing, this API reverted back to its original
+// behavior prior to commit d609e84cee.
FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDFAction_GetURIPath(FPDF_DOCUMENT document,
FPDF_ACTION action,