Add experimental APIs to get XFA data.
Add a family of APIs to get the uncompressed data in the /XFA field.
FPDF_GetXFAPacketCount() returns how many valid XFA packets exist. For
each packet, FPDF_GetXFAPacketName() and FPDF_GetXFAPacketContent() get
the name and content of the packet, respectively.
Bug: pdfium:1568
Change-Id: Iec4ffdf80945161aba9584fe5b1ddf1d435008bd
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/71790
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp
index c145839..22ef51a 100644
--- a/fpdfsdk/fpdf_view.cpp
+++ b/fpdfsdk/fpdf_view.cpp
@@ -21,6 +21,8 @@
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_parser.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdfapi/parser/fpdf_parser_decode.h"
#include "core/fpdfapi/render/cpdf_docrenderdata.h"
#include "core/fpdfapi/render/cpdf_pagerendercache.h"
@@ -46,6 +48,7 @@
#include "public/fpdf_formfill.h"
#include "third_party/base/ptr_util.h"
#include "third_party/base/span.h"
+#include "third_party/base/stl_util.h"
#ifdef PDF_ENABLE_V8
#include "fxjs/cfx_v8.h"
@@ -87,6 +90,58 @@
bool g_bLibraryInitialized = false;
+const CPDF_Object* GetXFAEntryFromDocument(FPDF_DOCUMENT document) {
+ const CPDF_Document* doc = CPDFDocumentFromFPDFDocument(document);
+ if (!doc)
+ return nullptr;
+
+ const CPDF_Dictionary* root = doc->GetRoot();
+ if (!root)
+ return nullptr;
+
+ const CPDF_Dictionary* acro_form = root->GetDictFor("AcroForm");
+ return acro_form ? acro_form->GetObjectFor("XFA") : nullptr;
+}
+
+struct XFAPacket {
+ ByteString name;
+ const CPDF_Stream* data;
+};
+
+std::vector<XFAPacket> GetXFAPackets(const CPDF_Object* xfa_object) {
+ std::vector<XFAPacket> packets;
+
+ if (!xfa_object)
+ return packets;
+
+ const CPDF_Stream* xfa_stream = ToStream(xfa_object->GetDirect());
+ if (xfa_stream) {
+ packets.push_back({"", xfa_stream});
+ return packets;
+ }
+
+ const CPDF_Array* xfa_array = ToArray(xfa_object->GetDirect());
+ if (!xfa_array)
+ return packets;
+
+ packets.reserve(1 + (xfa_array->size() / 2));
+ for (size_t i = 0; i < xfa_array->size(); i += 2) {
+ if (i + 1 == xfa_array->size())
+ break;
+
+ const CPDF_String* name = ToString(xfa_array->GetObjectAt(i));
+ if (!name)
+ continue;
+
+ const CPDF_Stream* data = xfa_array->GetStreamAt(i + 1);
+ if (!data)
+ continue;
+
+ packets.push_back({name->GetString(), data});
+ }
+ return packets;
+}
+
FPDF_DOCUMENT LoadDocumentImpl(
const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
FPDF_BYTESTRING password) {
@@ -1124,3 +1179,47 @@
}
return FPDFDestFromCPDFArray(pDestObj->AsArray());
}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDF_GetXFAPacketCount(FPDF_DOCUMENT document) {
+ if (!document)
+ return -1;
+
+ return pdfium::CollectionSize<int>(
+ GetXFAPackets(GetXFAEntryFromDocument(document)));
+}
+
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetXFAPacketName(FPDF_DOCUMENT document,
+ int index,
+ void* buffer,
+ unsigned long buflen) {
+ if (!document || index < 0)
+ return 0;
+
+ std::vector<XFAPacket> xfa_packets =
+ GetXFAPackets(GetXFAEntryFromDocument(document));
+ if (static_cast<size_t>(index) >= xfa_packets.size())
+ return 0;
+
+ return NulTerminateMaybeCopyAndReturnLength(xfa_packets[index].name, buffer,
+ buflen);
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDF_GetXFAPacketContent(FPDF_DOCUMENT document,
+ int index,
+ void* buffer,
+ unsigned long buflen,
+ unsigned long* out_buflen) {
+ if (!document || index < 0 || !out_buflen)
+ return false;
+
+ std::vector<XFAPacket> xfa_packets =
+ GetXFAPackets(GetXFAEntryFromDocument(document));
+ if (static_cast<size_t>(index) >= xfa_packets.size())
+ return false;
+
+ *out_buflen = DecodeStreamMaybeCopyAndReturnLength(xfa_packets[index].data,
+ buffer, buflen);
+ return true;
+}
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index c82e22e..67617b5 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -446,6 +446,9 @@
CHK(FPDF_GetRecommendedV8Flags);
#endif
CHK(FPDF_GetSecurityHandlerRevision);
+ CHK(FPDF_GetXFAPacketContent);
+ CHK(FPDF_GetXFAPacketCount);
+ CHK(FPDF_GetXFAPacketName);
CHK(FPDF_InitLibrary);
CHK(FPDF_InitLibraryWithConfig);
CHK(FPDF_LoadCustomDocument);
diff --git a/fpdfsdk/fpdf_view_embeddertest.cpp b/fpdfsdk/fpdf_view_embeddertest.cpp
index 4abe282..292837d 100644
--- a/fpdfsdk/fpdf_view_embeddertest.cpp
+++ b/fpdfsdk/fpdf_view_embeddertest.cpp
@@ -19,7 +19,9 @@
#include "testing/fx_string_testhelpers.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/utils/file_util.h"
+#include "testing/utils/hash.h"
#include "testing/utils/path_service.h"
+#include "third_party/base/stl_util.h"
using pdfium::kManyRectanglesChecksum;
@@ -1063,6 +1065,98 @@
UnloadPage(page);
}
+TEST_F(FPDFViewEmbedderTest, GetXFAArrayData) {
+ ASSERT_TRUE(OpenDocument("simple_xfa.pdf"));
+
+ static constexpr struct {
+ const char* name;
+ size_t content_length;
+ const char* content_checksum;
+ } kExpectedResults[]{
+ {"preamble", 124u, "71be364e53292596412242bfcdb46eab"},
+ {"config", 642u, "bcd1ca1d420ee31a561273a54a06435f"},
+ {"template", 541u, "0f48cb2fa1bb9cbf9eee802d66e81bf4"},
+ {"localeSet", 3455u, "bb1f253d3e5c719ac0da87d055bc164e"},
+ {"postamble", 11u, "6b79e25da35d86634ea27c38f64cf243"},
+ };
+
+ ASSERT_EQ(static_cast<int>(pdfium::size(kExpectedResults)),
+ FPDF_GetXFAPacketCount(document()));
+ for (size_t i = 0; i < pdfium::size(kExpectedResults); ++i) {
+ char name_buffer[20] = {};
+ ASSERT_EQ(strlen(kExpectedResults[i].name) + 1,
+ FPDF_GetXFAPacketName(document(), i, nullptr, 0));
+ EXPECT_EQ(
+ strlen(kExpectedResults[i].name) + 1,
+ FPDF_GetXFAPacketName(document(), i, name_buffer, sizeof(name_buffer)));
+ EXPECT_STREQ(kExpectedResults[i].name, name_buffer);
+
+ unsigned long buflen;
+ ASSERT_TRUE(FPDF_GetXFAPacketContent(document(), i, nullptr, 0, &buflen));
+ ASSERT_EQ(kExpectedResults[i].content_length, buflen);
+ std::vector<uint8_t> data_buffer(buflen);
+ EXPECT_TRUE(FPDF_GetXFAPacketContent(document(), i, data_buffer.data(),
+ data_buffer.size(), &buflen));
+ EXPECT_EQ(kExpectedResults[i].content_length, buflen);
+ EXPECT_STREQ(
+ kExpectedResults[i].content_checksum,
+ GenerateMD5Base16(data_buffer.data(), data_buffer.size()).c_str());
+ }
+
+ // Test bad parameters.
+ EXPECT_EQ(-1, FPDF_GetXFAPacketCount(nullptr));
+
+ EXPECT_EQ(0u, FPDF_GetXFAPacketName(nullptr, 0, nullptr, 0));
+ EXPECT_EQ(0u, FPDF_GetXFAPacketName(document(), -1, nullptr, 0));
+ EXPECT_EQ(0u, FPDF_GetXFAPacketName(
+ document(), pdfium::size(kExpectedResults), nullptr, 0));
+
+ unsigned long buflen = 123;
+ EXPECT_FALSE(FPDF_GetXFAPacketContent(nullptr, 0, nullptr, 0, &buflen));
+ EXPECT_EQ(123u, buflen);
+ EXPECT_FALSE(FPDF_GetXFAPacketContent(document(), -1, nullptr, 0, &buflen));
+ EXPECT_EQ(123u, buflen);
+ EXPECT_FALSE(FPDF_GetXFAPacketContent(
+ document(), pdfium::size(kExpectedResults), nullptr, 0, &buflen));
+ EXPECT_EQ(123u, buflen);
+ EXPECT_FALSE(FPDF_GetXFAPacketContent(document(), 0, nullptr, 0, nullptr));
+}
+
+TEST_F(FPDFViewEmbedderTest, GetXFAStreamData) {
+ ASSERT_TRUE(OpenDocument("bug_1265.pdf"));
+
+ ASSERT_EQ(1, FPDF_GetXFAPacketCount(document()));
+
+ char name_buffer[20] = {};
+ ASSERT_EQ(1u, FPDF_GetXFAPacketName(document(), 0, nullptr, 0));
+ EXPECT_EQ(1u, FPDF_GetXFAPacketName(document(), 0, name_buffer,
+ sizeof(name_buffer)));
+ EXPECT_STREQ("", name_buffer);
+
+ unsigned long buflen;
+ ASSERT_TRUE(FPDF_GetXFAPacketContent(document(), 0, nullptr, 0, &buflen));
+ ASSERT_EQ(121u, buflen);
+ std::vector<uint8_t> data_buffer(buflen);
+ EXPECT_TRUE(FPDF_GetXFAPacketContent(document(), 0, data_buffer.data(),
+ data_buffer.size(), &buflen));
+ EXPECT_EQ(121u, buflen);
+ EXPECT_STREQ(
+ "8f912eaa1e66c9341cb3032ede71e147",
+ GenerateMD5Base16(data_buffer.data(), data_buffer.size()).c_str());
+}
+
+TEST_F(FPDFViewEmbedderTest, GetXFADataForNoForm) {
+ ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+
+ EXPECT_EQ(0, FPDF_GetXFAPacketCount(document()));
+}
+
+TEST_F(FPDFViewEmbedderTest, GetXFADataForAcroForm) {
+ ASSERT_TRUE(OpenDocument("text_form.pdf"));
+
+ EXPECT_EQ(0, FPDF_GetXFAPacketCount(document()));
+}
+
class RecordUnsupportedErrorDelegate final : public EmbedderTest::Delegate {
public:
RecordUnsupportedErrorDelegate() = default;
diff --git a/public/fpdfview.h b/public/fpdfview.h
index 3181df3..94bf776 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h
@@ -1291,6 +1291,65 @@
void* buffer,
long* buflen);
+// Experimental API.
+// Function: FPDF_GetXFAPacketCount
+// Get the number of valid packets in the XFA entry.
+// Parameters:
+// document - Handle to the document.
+// Return value:
+// The number of valid packets, or -1 on error.
+FPDF_EXPORT int FPDF_CALLCONV FPDF_GetXFAPacketCount(FPDF_DOCUMENT document);
+
+// Experimental API.
+// Function: FPDF_GetXFAPacketName
+// Get the name of a packet in the XFA array.
+// Parameters:
+// document - Handle to the document.
+// index - Index number of the packet. 0 for the first packet.
+// buffer - Buffer for holding the name of the XFA packet.
+// buflen - Length of |buffer| in bytes.
+// Return value:
+// The length of the packet name in bytes, or 0 on error.
+//
+// |document| must be valid and |index| must be in the range [0, N), where N is
+// the value returned by FPDF_GetXFAPacketCount().
+// |buffer| is only modified if it is non-NULL and |buflen| is greater than or
+// equal to the length of the packet name. The packet name includes a
+// terminating NUL character. |buffer| is unmodified on error.
+FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_GetXFAPacketName(
+ FPDF_DOCUMENT document,
+ int index,
+ void* buffer,
+ unsigned long buflen);
+
+// Experimental API.
+// Function: FPDF_GetXFAPacketContent
+// Get the content of a packet in the XFA array.
+// Parameters:
+// document - Handle to the document.
+// index - Index number of the packet. 0 for the first packet.
+// buffer - Buffer for holding the content of the XFA packet.
+// buflen - Length of |buffer| in bytes.
+// out_buflen - Pointer to the variable that will receive the minimum
+// buffer size needed to contain the content of the XFA
+// packet.
+// Return value:
+// Whether the operation succeeded or not.
+//
+// |document| must be valid and |index| must be in the range [0, N), where N is
+// the value returned by FPDF_GetXFAPacketCount(). |out_buflen| must not be
+// NULL. When the aforementioned arguments are valid, the operation succeeds,
+// and |out_buflen| receives the content size. |buffer| is only modified if
+// |buffer| is non-null and long enough to contain the content. Callers must
+// check both the return value and the input |buflen| is no less than the
+// returned |out_buflen| before using the data in |buffer|.
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_GetXFAPacketContent(
+ FPDF_DOCUMENT document,
+ int index,
+ void* buffer,
+ unsigned long buflen,
+ unsigned long* out_buflen);
+
#ifdef PDF_ENABLE_V8
// Function: FPDF_GetRecommendedV8Flags
// Returns a space-separated string of command line flags that are