Add FPDF_GetFileIdentifier() to public API

Add an experimental function to the public API to read the file
identifier defined in the document trailer.

Bug: pdfium:1047
Change-Id: I47dc9b75aad510d8cf6fec956d088ae9c3201e84
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/68370
Commit-Queue: Daniel Hosseinian <dhoss@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 05b5f47..76c65b5 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -465,6 +465,10 @@
   return m_pInfoDict.Get();
 }
 
+const CPDF_Array* CPDF_Document::GetFileIdentifier() const {
+  return m_pParser ? m_pParser->GetIDArray() : nullptr;
+}
+
 void CPDF_Document::DeletePage(int iPage) {
   CPDF_Dictionary* pPages = GetPagesDict();
   if (!pPages)
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index cf738f2..3506e59 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -90,6 +90,7 @@
   CPDF_Parser* GetParser() const { return m_pParser.get(); }
   CPDF_Dictionary* GetRoot() const { return m_pRootDict.Get(); }
   CPDF_Dictionary* GetInfo();
+  const CPDF_Array* GetFileIdentifier() const;
 
   void DeletePage(int iPage);
   int GetPageCount() const;
diff --git a/fpdfsdk/fpdf_doc.cpp b/fpdfsdk/fpdf_doc.cpp
index 0a5f593..350d1aa 100644
--- a/fpdfsdk/fpdf_doc.cpp
+++ b/fpdfsdk/fpdf_doc.cpp
@@ -15,6 +15,8 @@
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_document.h"
 #include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fpdfapi/parser/fpdf_parser_decode.h"
 #include "core/fpdfdoc/cpdf_bookmark.h"
 #include "core/fpdfdoc/cpdf_bookmarktree.h"
 #include "core/fpdfdoc/cpdf_dest.h"
@@ -391,6 +393,32 @@
                               quad_points);
 }
 
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetFileIdentifier(FPDF_DOCUMENT document,
+                       FPDF_FILEIDTYPE id_type,
+                       void* buffer,
+                       unsigned long buflen) {
+  CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+  if (!pDoc)
+    return 0;
+
+  // Check if |id_type| is valid.
+  if (id_type != FILEIDTYPE_PERMANENT && id_type != FILEIDTYPE_CHANGING)
+    return 0;
+
+  const CPDF_Array* pFileId = pDoc->GetFileIdentifier();
+  if (!pFileId)
+    return 0;
+
+  size_t nIndex = id_type == FILEIDTYPE_PERMANENT ? 0 : 1;
+  const CPDF_String* pValue = ToString(pFileId->GetDirectObjectAt(nIndex));
+  if (!pValue)
+    return 0;
+
+  return NulTerminateMaybeCopyAndReturnLength(pValue->GetString(), buffer,
+                                              buflen);
+}
+
 FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_GetMetaText(FPDF_DOCUMENT document,
                                                          FPDF_BYTESTRING tag,
                                                          void* buffer,
diff --git a/fpdfsdk/fpdf_doc_embeddertest.cpp b/fpdfsdk/fpdf_doc_embeddertest.cpp
index 49c226e..fc54344 100644
--- a/fpdfsdk/fpdf_doc_embeddertest.cpp
+++ b/fpdfsdk/fpdf_doc_embeddertest.cpp
@@ -482,6 +482,57 @@
   EXPECT_EQ(0, FPDF_GetPageCount(document()));
 }
 
+TEST_F(FPDFDocEmbedderTest, GetFileIdentifier) {
+  ASSERT_TRUE(OpenDocument("split_streams.pdf"));
+  constexpr size_t kMd5Length = 17;
+  char buf[kMd5Length];
+  EXPECT_EQ(0u,
+            FPDF_GetFileIdentifier(document(), static_cast<FPDF_FILEIDTYPE>(-1),
+                                   buf, sizeof(buf)));
+  EXPECT_EQ(0u,
+            FPDF_GetFileIdentifier(document(), static_cast<FPDF_FILEIDTYPE>(2),
+                                   buf, sizeof(buf)));
+  EXPECT_EQ(0u, FPDF_GetFileIdentifier(nullptr, FILEIDTYPE_PERMANENT, buf,
+                                       sizeof(buf)));
+  EXPECT_EQ(kMd5Length, FPDF_GetFileIdentifier(document(), FILEIDTYPE_PERMANENT,
+                                               nullptr, 0));
+
+  constexpr char kExpectedPermanent[] =
+      "\xF3\x41\xAE\x65\x4A\x77\xAC\xD5\x06\x5A\x76\x45\xE5\x96\xE6\xE6";
+  ASSERT_EQ(kMd5Length, FPDF_GetFileIdentifier(document(), FILEIDTYPE_PERMANENT,
+                                               buf, sizeof(buf)));
+  EXPECT_EQ(kExpectedPermanent, ByteString(buf));
+
+  constexpr char kExpectedChanging[] =
+      "\xBC\x37\x29\x8A\x3F\x87\xF4\x79\x22\x9B\xCE\x99\x7C\xA7\x91\xF7";
+  ASSERT_EQ(kMd5Length, FPDF_GetFileIdentifier(document(), FILEIDTYPE_CHANGING,
+                                               buf, sizeof(buf)));
+  EXPECT_EQ(kExpectedChanging, ByteString(buf));
+}
+
+TEST_F(FPDFDocEmbedderTest, GetNonHexFileIdentifier) {
+  ASSERT_TRUE(OpenDocument("non_hex_file_id.pdf"));
+  char buf[18];
+
+  constexpr char kPermanentNonHex[] = "permanent non-hex";
+  ASSERT_EQ(18u, FPDF_GetFileIdentifier(document(), FILEIDTYPE_PERMANENT, buf,
+                                        sizeof(buf)));
+  EXPECT_EQ(kPermanentNonHex, ByteString(buf));
+
+  constexpr char kChangingNonHex[] = "changing non-hex";
+  ASSERT_EQ(17u, FPDF_GetFileIdentifier(document(), FILEIDTYPE_CHANGING, buf,
+                                        sizeof(buf)));
+  EXPECT_EQ(kChangingNonHex, ByteString(buf));
+}
+
+TEST_F(FPDFDocEmbedderTest, GetNonexistentFileIdentifier) {
+  ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+  EXPECT_EQ(
+      0u, FPDF_GetFileIdentifier(document(), FILEIDTYPE_PERMANENT, nullptr, 0));
+  EXPECT_EQ(
+      0u, FPDF_GetFileIdentifier(document(), FILEIDTYPE_CHANGING, nullptr, 0));
+}
+
 TEST_F(FPDFDocEmbedderTest, GetMetaText) {
   ASSERT_TRUE(OpenDocument("bug_601362.pdf"));
 
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 8c48367..b7353aa 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -134,6 +134,7 @@
     CHK(FPDFLink_GetLinkAtPoint);
     CHK(FPDFLink_GetLinkZOrderAtPoint);
     CHK(FPDFLink_GetQuadPoints);
+    CHK(FPDF_GetFileIdentifier);
     CHK(FPDF_GetMetaText);
     CHK(FPDF_GetPageLabel);
 
diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h
index 565314e..ab7abed 100644
--- a/public/fpdf_doc.h
+++ b/public/fpdf_doc.h
@@ -36,6 +36,13 @@
 #define PDFDEST_VIEW_FITBH 7
 #define PDFDEST_VIEW_FITBV 8
 
+// The file identifier entry type. See section 14.4 "File Identifiers" of the
+// ISO 32000-1 standard.
+typedef enum {
+  FILEIDTYPE_PERMANENT = 0,
+  FILEIDTYPE_CHANGING = 1
+} FPDF_FILEIDTYPE;
+
 typedef struct _FS_QUADPOINTSF {
   FS_FLOAT x1;
   FS_FLOAT y1;
@@ -312,6 +319,26 @@
                        int quad_index,
                        FS_QUADPOINTSF* quad_points);
 
+// Get the file identifer defined in the trailer of |document|.
+// Experimental API. Subject to change.
+//
+//   document - handle to the document.
+//   id_type  - the file identifier type to retrieve.
+//   buffer   - a buffer for the file identifier. May be NULL.
+//   buflen   - the length of the buffer, in bytes. May be 0.
+//
+// Returns the number of bytes in the file identifier, including the NUL
+// terminator.
+//
+// The |buffer| is always a byte string. The |buffer| is followed by a NUL
+// terminator.  If |buflen| is less than the returned length, or |buffer| is
+// NULL, |buffer| will not be modified.
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDF_GetFileIdentifier(FPDF_DOCUMENT document,
+                       FPDF_FILEIDTYPE id_type,
+                       void* buffer,
+                       unsigned long buflen);
+
 // Get meta-data |tag| content from |document|.
 //
 //   document - handle to the document.
diff --git a/testing/resources/non_hex_file_id.in b/testing/resources/non_hex_file_id.in
new file mode 100644
index 0000000..7a93439
--- /dev/null
+++ b/testing/resources/non_hex_file_id.in
@@ -0,0 +1,26 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+>>
+endobj
+{{xref}}
+trailer <<
+  /Root 1 0 R
+  /ID [(permanent non-hex) (changing non-hex)]
+  {{trailersize}}
+>>
+{{startxref}}
+%%EOF
diff --git a/testing/resources/non_hex_file_id.pdf b/testing/resources/non_hex_file_id.pdf
new file mode 100644
index 0000000..6f11eb6
--- /dev/null
+++ b/testing/resources/non_hex_file_id.pdf
@@ -0,0 +1,33 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+>>
+endobj
+xref
+0 4
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000161 00000 n 
+trailer <<
+  /Root 1 0 R
+  /ID [(permanent non-hex) (changing non-hex)]
+  /Size 4
+>>
+startxref
+212
+%%EOF