Add API to retrieve MIME type from PDF attachments

Adds new API to retrieve values from the embedded file stream
dictionary of PDF attachments:

FPDFAttachment_GetSubtype() - Specifically retrieves the MIME type

Bug: 408241034
Change-Id: Ia42813d0423dbdad3105f9ac1c8da4f3976e92c9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/130970
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_attachment.cpp b/fpdfsdk/fpdf_attachment.cpp
index c5f2515..eaba2a7 100644
--- a/fpdfsdk/fpdf_attachment.cpp
+++ b/fpdfsdk/fpdf_attachment.cpp
@@ -304,3 +304,30 @@
                                        static_cast<size_t>(buflen))));
   return true;
 }
+
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFAttachment_GetSubtype(FPDF_ATTACHMENT attachment,
+                          FPDF_WCHAR* buffer,
+                          unsigned long buflen) {
+  CPDF_Object* file = CPDFObjectFromFPDFAttachment(attachment);
+  if (!file) {
+    return 0;
+  }
+
+  // SAFETY: required from caller.
+  auto buffer_span = UNSAFE_BUFFERS(SpanFromFPDFApiArgs(buffer, buflen));
+  CPDF_FileSpec spec(pdfium::WrapRetain(file));
+  RetainPtr<const CPDF_Stream> file_stream = spec.GetFileStream();
+  if (!file_stream) {
+    return Utf16EncodeMaybeCopyAndReturnLength(WideString(), buffer_span);
+  }
+
+  ByteString subtype = file_stream->GetDict()->GetNameFor("Subtype");
+  if (subtype.IsEmpty()) {
+    // Per API description, return an empty string in these cases.
+    return Utf16EncodeMaybeCopyAndReturnLength(WideString(), buffer_span);
+  }
+
+  return Utf16EncodeMaybeCopyAndReturnLength(
+      PDF_DecodeText(subtype.unsigned_span()), buffer_span);
+}
diff --git a/fpdfsdk/fpdf_attachment_embeddertest.cpp b/fpdfsdk/fpdf_attachment_embeddertest.cpp
index 5f969a3..72250e1 100644
--- a/fpdfsdk/fpdf_attachment_embeddertest.cpp
+++ b/fpdfsdk/fpdf_attachment_embeddertest.cpp
@@ -403,3 +403,39 @@
                                           kExpectedLength));
   EXPECT_EQ(L"", GetPlatformWString(buf.data()));
 }
+
+TEST_F(FPDFAttachmentEmbedderTest, GetSubtype) {
+  ASSERT_TRUE(OpenDocument("embedded_attachments.pdf"));
+  FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(document(), 0);
+  ASSERT_TRUE(attachment);
+
+  // Test getting Subtype (MIME type)
+  constexpr char kExpectedSubtype[] = "text/plain";
+  unsigned long length = FPDFAttachment_GetSubtype(attachment, nullptr, 0);
+  ASSERT_EQ(2u * (strlen(kExpectedSubtype) + 1), length);
+
+  std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length);
+  EXPECT_EQ(length, FPDFAttachment_GetSubtype(attachment, buf.data(), length));
+  EXPECT_EQ(kExpectedSubtype, GetPlatformString(buf.data()));
+
+  // Test with buffer too small
+  std::vector<FPDF_WCHAR> small_buf(length - 1);
+  const FPDF_WCHAR kPattern = 0xDEAD;
+  std::ranges::fill(small_buf, kPattern);
+  EXPECT_EQ(length, FPDFAttachment_GetSubtype(attachment, small_buf.data(),
+                                              length - 1));
+  EXPECT_THAT(small_buf, testing::Each(kPattern));
+}
+
+TEST_F(FPDFAttachmentEmbedderTest, GetSubtypeInvalid) {
+  ASSERT_TRUE(OpenDocument("embedded_attachments.pdf"));
+  FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(document(), 0);
+  ASSERT_TRUE(attachment);
+
+  std::vector<FPDF_WCHAR> buf(1);
+  EXPECT_EQ(0u, FPDFAttachment_GetSubtype(nullptr, buf.data(), 1));
+
+  constexpr char kExpectedSubtype[] = "text/plain";
+  EXPECT_EQ(2u * (strlen(kExpectedSubtype) + 1),
+            FPDFAttachment_GetSubtype(attachment, nullptr, 10));
+}
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index bb33a52..9b11e32 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -109,6 +109,7 @@
     CHK(FPDFAttachment_GetFile);
     CHK(FPDFAttachment_GetName);
     CHK(FPDFAttachment_GetStringValue);
+    CHK(FPDFAttachment_GetSubtype);
     CHK(FPDFAttachment_GetValueType);
     CHK(FPDFAttachment_HasKey);
     CHK(FPDFAttachment_SetFile);
diff --git a/public/fpdf_attachment.h b/public/fpdf_attachment.h
index d25bdda..a8a40b3 100644
--- a/public/fpdf_attachment.h
+++ b/public/fpdf_attachment.h
@@ -172,6 +172,23 @@
                        unsigned long buflen,
                        unsigned long* out_buflen);
 
+// Experimental API.
+// Get the MIME type (Subtype) of the embedded file |attachment|. |buffer| is
+// only modified if |buflen| is longer than the length of the MIME type string.
+// If the Subtype is not found or if there is no file stream, an empty string
+// would be copied to |buffer| and the return value would be 2. On other errors,
+// nothing would be added to |buffer| and the return value would be 0.
+//
+//   attachment - handle to an attachment.
+//   buffer     - buffer for holding the MIME type string encoded in UTF-16LE.
+//   buflen     - length of the buffer in bytes.
+//
+// Returns the length of the MIME type string in bytes.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFAttachment_GetSubtype(FPDF_ATTACHMENT attachment,
+                          FPDF_WCHAR* buffer,
+                          unsigned long buflen);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus