Add API to retrieve MIME type from PDF attachments
Adds new API to retrieve values from the embedded file stream
dictionary of PDF attachments:
FPDFAttachment_GetSubtype() - Specifically retrieves the MIME type
Bug: 408241034
Change-Id: Ia42813d0423dbdad3105f9ac1c8da4f3976e92c9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/130970
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_attachment.cpp b/fpdfsdk/fpdf_attachment.cpp
index c5f2515..eaba2a7 100644
--- a/fpdfsdk/fpdf_attachment.cpp
+++ b/fpdfsdk/fpdf_attachment.cpp
@@ -304,3 +304,30 @@
static_cast<size_t>(buflen))));
return true;
}
+
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFAttachment_GetSubtype(FPDF_ATTACHMENT attachment,
+ FPDF_WCHAR* buffer,
+ unsigned long buflen) {
+ CPDF_Object* file = CPDFObjectFromFPDFAttachment(attachment);
+ if (!file) {
+ return 0;
+ }
+
+ // SAFETY: required from caller.
+ auto buffer_span = UNSAFE_BUFFERS(SpanFromFPDFApiArgs(buffer, buflen));
+ CPDF_FileSpec spec(pdfium::WrapRetain(file));
+ RetainPtr<const CPDF_Stream> file_stream = spec.GetFileStream();
+ if (!file_stream) {
+ return Utf16EncodeMaybeCopyAndReturnLength(WideString(), buffer_span);
+ }
+
+ ByteString subtype = file_stream->GetDict()->GetNameFor("Subtype");
+ if (subtype.IsEmpty()) {
+ // Per API description, return an empty string in these cases.
+ return Utf16EncodeMaybeCopyAndReturnLength(WideString(), buffer_span);
+ }
+
+ return Utf16EncodeMaybeCopyAndReturnLength(
+ PDF_DecodeText(subtype.unsigned_span()), buffer_span);
+}
diff --git a/fpdfsdk/fpdf_attachment_embeddertest.cpp b/fpdfsdk/fpdf_attachment_embeddertest.cpp
index 5f969a3..72250e1 100644
--- a/fpdfsdk/fpdf_attachment_embeddertest.cpp
+++ b/fpdfsdk/fpdf_attachment_embeddertest.cpp
@@ -403,3 +403,39 @@
kExpectedLength));
EXPECT_EQ(L"", GetPlatformWString(buf.data()));
}
+
+TEST_F(FPDFAttachmentEmbedderTest, GetSubtype) {
+ ASSERT_TRUE(OpenDocument("embedded_attachments.pdf"));
+ FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(document(), 0);
+ ASSERT_TRUE(attachment);
+
+ // Test getting Subtype (MIME type)
+ constexpr char kExpectedSubtype[] = "text/plain";
+ unsigned long length = FPDFAttachment_GetSubtype(attachment, nullptr, 0);
+ ASSERT_EQ(2u * (strlen(kExpectedSubtype) + 1), length);
+
+ std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length);
+ EXPECT_EQ(length, FPDFAttachment_GetSubtype(attachment, buf.data(), length));
+ EXPECT_EQ(kExpectedSubtype, GetPlatformString(buf.data()));
+
+ // Test with buffer too small
+ std::vector<FPDF_WCHAR> small_buf(length - 1);
+ const FPDF_WCHAR kPattern = 0xDEAD;
+ std::ranges::fill(small_buf, kPattern);
+ EXPECT_EQ(length, FPDFAttachment_GetSubtype(attachment, small_buf.data(),
+ length - 1));
+ EXPECT_THAT(small_buf, testing::Each(kPattern));
+}
+
+TEST_F(FPDFAttachmentEmbedderTest, GetSubtypeInvalid) {
+ ASSERT_TRUE(OpenDocument("embedded_attachments.pdf"));
+ FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(document(), 0);
+ ASSERT_TRUE(attachment);
+
+ std::vector<FPDF_WCHAR> buf(1);
+ EXPECT_EQ(0u, FPDFAttachment_GetSubtype(nullptr, buf.data(), 1));
+
+ constexpr char kExpectedSubtype[] = "text/plain";
+ EXPECT_EQ(2u * (strlen(kExpectedSubtype) + 1),
+ FPDFAttachment_GetSubtype(attachment, nullptr, 10));
+}
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index bb33a52..9b11e32 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -109,6 +109,7 @@
CHK(FPDFAttachment_GetFile);
CHK(FPDFAttachment_GetName);
CHK(FPDFAttachment_GetStringValue);
+ CHK(FPDFAttachment_GetSubtype);
CHK(FPDFAttachment_GetValueType);
CHK(FPDFAttachment_HasKey);
CHK(FPDFAttachment_SetFile);
diff --git a/public/fpdf_attachment.h b/public/fpdf_attachment.h
index d25bdda..a8a40b3 100644
--- a/public/fpdf_attachment.h
+++ b/public/fpdf_attachment.h
@@ -172,6 +172,23 @@
unsigned long buflen,
unsigned long* out_buflen);
+// Experimental API.
+// Get the MIME type (Subtype) of the embedded file |attachment|. |buffer| is
+// only modified if |buflen| is longer than the length of the MIME type string.
+// If the Subtype is not found or if there is no file stream, an empty string
+// would be copied to |buffer| and the return value would be 2. On other errors,
+// nothing would be added to |buffer| and the return value would be 0.
+//
+// attachment - handle to an attachment.
+// buffer - buffer for holding the MIME type string encoded in UTF-16LE.
+// buflen - length of the buffer in bytes.
+//
+// Returns the length of the MIME type string in bytes.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFAttachment_GetSubtype(FPDF_ATTACHMENT attachment,
+ FPDF_WCHAR* buffer,
+ unsigned long buflen);
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus