Add support for ICC profile extraction

Introduces functionality to extract ICC profile metadata associated
with images embedded in PDF documents. Add new public API to access raw
data of the ICC profile.

Bug: 42270471
Change-Id: I1bca935dc19cb24df990455e0ff1963a737403dd
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/126170
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/AUTHORS b/AUTHORS
index 5b79420..3cd57a7 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -23,6 +23,7 @@
 Dorian Rudolph <dorianrudo97@gmail.com>
 Felix Kauselmann <licorn@gmail.com>
 GiWan Go <gogil@stealien.com>
+Haoran Tang <haoran.tang.personal@gmail.com>
 Helmut Januschka <helmut@januschka.com>
 Huy Ngo <huyna89@gmail.com>
 Ivan Odulo <ivanodulo@gmail.com>
diff --git a/core/fpdfapi/page/cpdf_colorspace.cpp b/core/fpdfapi/page/cpdf_colorspace.cpp
index 8cbd9c0..b9949f8 100644
--- a/core/fpdfapi/page/cpdf_colorspace.cpp
+++ b/core/fpdfapi/page/cpdf_colorspace.cpp
@@ -217,6 +217,7 @@
   // CPDF_ColorSpace:
   std::optional<FX_RGB_STRUCT<float>> GetRGB(
       pdfium::span<const float> pBuf) const override;
+  RetainPtr<CPDF_IccProfile> GetIccProfile() const override;
   void TranslateImageLine(pdfium::span<uint8_t> dest_span,
                           pdfium::span<const uint8_t> src_span,
                           int pixels,
@@ -602,6 +603,13 @@
   return m_nComponents;
 }
 
+// Returns nullptr because only the CPDF_ICCBasedCS subclass supports ICC
+// profiles. CPDF_ICCBasedCS overrides this method to return valid ICC
+// profile data.
+RetainPtr<CPDF_IccProfile> CPDF_ColorSpace::GetIccProfile() const {
+  return nullptr;
+}
+
 void CPDF_ColorSpace::GetDefaultValue(int iComponent,
                                       float* value,
                                       float* min,
@@ -954,6 +962,10 @@
   return FX_RGB_STRUCT<float>{};
 }
 
+RetainPtr<CPDF_IccProfile> CPDF_ICCBasedCS::GetIccProfile() const {
+  return profile_;
+}
+
 void CPDF_ICCBasedCS::TranslateImageLine(pdfium::span<uint8_t> dest_span,
                                          pdfium::span<const uint8_t> src_span,
                                          int pixels,
diff --git a/core/fpdfapi/page/cpdf_colorspace.h b/core/fpdfapi/page/cpdf_colorspace.h
index b63e0f1..6b3adcc 100644
--- a/core/fpdfapi/page/cpdf_colorspace.h
+++ b/core/fpdfapi/page/cpdf_colorspace.h
@@ -27,6 +27,7 @@
 #include "core/fxge/dib/fx_dib.h"
 
 class CPDF_Document;
+class CPDF_IccProfile;
 class CPDF_IndexedCS;
 class CPDF_PatternCS;
 
@@ -110,6 +111,8 @@
   virtual std::optional<FX_RGB_STRUCT<float>> GetRGB(
       pdfium::span<const float> pBuf) const = 0;
 
+  virtual RetainPtr<CPDF_IccProfile> GetIccProfile() const;
+
   virtual void GetDefaultValue(int iComponent,
                                float* value,
                                float* min,
diff --git a/core/fpdfapi/page/cpdf_iccprofile.cpp b/core/fpdfapi/page/cpdf_iccprofile.cpp
index 6207065..c469edd 100644
--- a/core/fpdfapi/page/cpdf_iccprofile.cpp
+++ b/core/fpdfapi/page/cpdf_iccprofile.cpp
@@ -60,3 +60,7 @@
                                         int pixels) {
   m_Transform->TranslateScanline(pDest, pSrc, pixels);
 }
+
+RetainPtr<const CPDF_StreamAcc> CPDF_IccProfile::GetStreamAcc() const {
+  return m_pStreamAcc;
+}
diff --git a/core/fpdfapi/page/cpdf_iccprofile.h b/core/fpdfapi/page/cpdf_iccprofile.h
index 8e364d2..287dabe 100644
--- a/core/fpdfapi/page/cpdf_iccprofile.h
+++ b/core/fpdfapi/page/cpdf_iccprofile.h
@@ -36,6 +36,8 @@
                          pdfium::span<const uint8_t> pSrc,
                          int pixels);
 
+  RetainPtr<const CPDF_StreamAcc> GetStreamAcc() const;
+
  private:
   CPDF_IccProfile(RetainPtr<const CPDF_StreamAcc> stream_acc,
                   uint32_t expected_components);
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index f57902a..c35e874 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -4812,6 +4812,66 @@
   EXPECT_EQ(106u, height);
 }
 
+TEST_F(FPDFEditEmbedderTest, GetIccProfileDataDecoded) {
+  ASSERT_TRUE(OpenDocument("bug_42270471.pdf"));
+  ScopedEmbedderTestPage page = LoadScopedPage(0);
+  ASSERT_TRUE(page);
+
+  EXPECT_EQ(1, FPDFPage_CountObjects(page.get()));
+
+  // Retrieve the image object and validate its type.
+  FPDF_PAGEOBJECT image_obj = FPDFPage_GetObject(page.get(), 0);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(image_obj));
+
+  // Validate failure cases for null parameters.
+  // 12345 is an arbitrary non-zero value to verify that the length is not
+  // modified.
+  size_t icc_length = 12345;
+  EXPECT_FALSE(FPDFImageObj_GetIccProfileDataDecoded(nullptr, page.get(),
+                                                     nullptr, 0, &icc_length));
+  EXPECT_EQ(12345u, icc_length);
+
+  EXPECT_FALSE(FPDFImageObj_GetIccProfileDataDecoded(image_obj, nullptr,
+                                                     nullptr, 0, &icc_length));
+  EXPECT_EQ(12345u, icc_length);
+
+  EXPECT_FALSE(FPDFImageObj_GetIccProfileDataDecoded(image_obj, page.get(),
+                                                     nullptr, 0, nullptr));
+  EXPECT_FALSE(FPDFImageObj_GetIccProfileDataDecoded(nullptr, nullptr, nullptr,
+                                                     0, nullptr));
+  // Retrieve the raw ICC profile data length
+  icc_length = 0;
+  EXPECT_TRUE(FPDFImageObj_GetIccProfileDataDecoded(image_obj, page.get(),
+                                                    nullptr, 0, &icc_length));
+  EXPECT_EQ(525u, icc_length);
+
+  // Check that the raw ICC profile data has the correct length and hash value.
+  std::vector<uint8_t> icc_data(icc_length);
+  EXPECT_TRUE(FPDFImageObj_GetIccProfileDataDecoded(
+      image_obj, page.get(), icc_data.data(), icc_data.size(), &icc_length));
+  EXPECT_EQ(525u, icc_length);
+  EXPECT_EQ(icc_data.size(), icc_length);
+  EXPECT_EQ("6f10cf8865bf3ae7e49aa766f78bfba8", GenerateMD5Base16(icc_data));
+}
+
+TEST_F(FPDFEditEmbedderTest, GetIccProfileDataDecodedNoIccProfile) {
+  ASSERT_TRUE(OpenDocument("embedded_images.pdf"));
+  ScopedEmbedderTestPage page = LoadScopedPage(0);
+  ASSERT_TRUE(page);
+
+  // Retrieve the image object without an ICC profile.
+  FPDF_PAGEOBJECT image_obj = FPDFPage_GetObject(page.get(), 37);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(image_obj));
+
+  // Validate failure cases for image object without an ICC profile.
+  // 12345 is an arbitrary non-zero value to verify that the length is not
+  // modified.
+  size_t icc_length = 12345;
+  EXPECT_FALSE(FPDFImageObj_GetIccProfileDataDecoded(image_obj, page.get(),
+                                                     nullptr, 0, &icc_length));
+  EXPECT_EQ(12345u, icc_length);
+}
+
 TEST_F(FPDFEditEmbedderTest, GetRenderedBitmapForHelloWorldText) {
   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
   ScopedEmbedderTestPage page = LoadScopedPage(0);
diff --git a/fpdfsdk/fpdf_editimg.cpp b/fpdfsdk/fpdf_editimg.cpp
index c864cde..656d0ed 100644
--- a/fpdfsdk/fpdf_editimg.cpp
+++ b/fpdfsdk/fpdf_editimg.cpp
@@ -12,6 +12,8 @@
 #include <utility>
 
 #include "core/fpdfapi/page/cpdf_dib.h"
+#include "core/fpdfapi/page/cpdf_docpagedata.h"
+#include "core/fpdfapi/page/cpdf_iccprofile.h"
 #include "core/fpdfapi/page/cpdf_image.h"
 #include "core/fpdfapi/page/cpdf_imageobject.h"
 #include "core/fpdfapi/page/cpdf_page.h"
@@ -510,3 +512,73 @@
   *height = pImg->GetPixelHeight();
   return true;
 }
+
+FPDF_BOOL FPDF_CALLCONV
+FPDFImageObj_GetIccProfileDataDecoded(FPDF_PAGEOBJECT image_object,
+                                      FPDF_PAGE page,
+                                      uint8_t* buffer,
+                                      size_t buflen,
+                                      size_t* out_buflen) {
+  CPDF_ImageObject* image_obj = CPDFImageObjectFromFPDFPageObject(image_object);
+  CPDF_Page* pdf_page = CPDFPageFromFPDFPage(page);
+  if (!image_obj || !pdf_page || !out_buflen) {
+    return false;
+  }
+
+  RetainPtr<CPDF_Image> image = image_obj->GetImage();
+  if (!image) {
+    return false;
+  }
+
+  const CPDF_Stream* stream = image->GetStream();
+  if (!stream) {
+    return false;
+  }
+
+  RetainPtr<const CPDF_Dictionary> stream_dict = stream->GetDict();
+  if (!stream_dict) {
+    return false;
+  }
+
+  RetainPtr<const CPDF_Object> color_space_obj =
+      stream_dict->GetDirectObjectFor("ColorSpace");
+  RetainPtr<const CPDF_Dictionary> page_resources =
+      pdf_page->GetPageResources();
+  if (!page_resources) {
+    return false;
+  }
+
+  CPDF_Document* document = pdf_page->GetDocument();
+  if (!document) {
+    return false;
+  }
+
+  auto* doc_data = CPDF_DocPageData::FromDocument(document);
+  if (!doc_data) {
+    return true;
+  }
+
+  RetainPtr<CPDF_ColorSpace> color_space =
+      doc_data->GetColorSpace(color_space_obj.Get(), page_resources);
+  if (!color_space) {
+    return false;
+  }
+
+  RetainPtr<CPDF_IccProfile> icc_profile = color_space->GetIccProfile();
+  if (!icc_profile || !icc_profile->IsValid()) {
+    return false;
+  }
+
+  RetainPtr<const CPDF_StreamAcc> stream_acc = icc_profile->GetStreamAcc();
+
+  pdfium::span<const uint8_t> data = stream_acc->GetSpan();
+  *out_buflen = data.size();
+  if (!buffer || buflen < *out_buflen) {
+    return true;
+  }
+
+  // SAFETY: required from caller.
+  auto result_span = UNSAFE_BUFFERS(SpanFromFPDFApiArgs(buffer, buflen));
+  fxcrt::spancpy(result_span, data);
+  return true;
+}
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 911cdcb..bb33a52 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -179,6 +179,7 @@
     CHK(FPDFGlyphPath_CountGlyphSegments);
     CHK(FPDFGlyphPath_GetGlyphPathSegment);
     CHK(FPDFImageObj_GetBitmap);
+    CHK(FPDFImageObj_GetIccProfileDataDecoded);
     CHK(FPDFImageObj_GetImageDataDecoded);
     CHK(FPDFImageObj_GetImageDataRaw);
     CHK(FPDFImageObj_GetImageFilter);
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h
index 3a068e0..5dc11c4 100644
--- a/public/fpdf_edit.h
+++ b/public/fpdf_edit.h
@@ -842,6 +842,31 @@
                                unsigned int* width,
                                unsigned int* height);
 
+// Experimental API.
+// Get ICC profile decoded data of |image_object|. If the |image_object| is not
+// an image object or if it does not have an image, then the return value will
+// be false. It also returns false if the |image_object| has no ICC profile.
+// |buffer| is only modified if ICC profile exists and |buflen| is longer than
+// the length of the ICC profile decoded data.
+//
+//   image_object - handle to an image object; must not be NULL.
+//   page         - handle to the page containing |image_object|; must not be
+//                  NULL. Required for retrieving the image's colorspace.
+//   buffer       - Buffer to receive ICC profile data; may be NULL if querying
+//                  required size via |out_buflen|.
+//   buflen       - Length of the buffer in bytes. Ignored if |buffer| is NULL.
+//   out_buflen   - Pointer to receive the ICC profile data size in bytes; must
+//                  not be NULL. Will be set if this API returns true.
+//
+// Returns true if |out_buflen| is not null and an ICC profile exists for the
+// given |image_object|.
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDFImageObj_GetIccProfileDataDecoded(FPDF_PAGEOBJECT image_object,
+                                      FPDF_PAGE page,
+                                      uint8_t* buffer,
+                                      size_t buflen,
+                                      size_t* out_buflen);
+
 // Create a new path object at an initial position.
 //
 //   x - initial horizontal position.
diff --git a/testing/resources/bug_42270471.in b/testing/resources/bug_42270471.in
new file mode 100644
index 0000000..1baff9f
--- /dev/null
+++ b/testing/resources/bug_42270471.in
@@ -0,0 +1,64 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents [4 0 R]
+  /MediaBox [0 0 100 100]
+  /Resources <<
+    /XObject <<
+      /X0 6 0 R
+    >>
+  >>
+>>
+endobj
+{{object 4 0}} <<
+  {{streamlen}}
+>>
+stream
+50 0 0 50 0 0 cm
+/X0 Do
+endstream
+endobj
+{{object 5 0}} <<
+  /Alternate /DeviceGray
+  /Filter [/ASCII85Decode /FlateDecode]
+  /N 3
+  {{streamlen}}
+>>
+stream
+GhQY8@,aa/.*,h+?sq(#.k(gGG>a6QN)F%Jc+q7?`k]Wf`1W<]>[62(&E!Xi_>mKW^qdh.!!,b)lJ!$X
+YfLBm#1<Nb8nr?$OT#6uKge8M)&O2hJ8C?i#eL/V:4f]s5jY5,+D2gD!KbQm%Uj8UZ'-<s"!Dc48O3ZE
+I0$--\Y'5HIKF&\6gXpUECZ[/;ABV"PnZKt%PC>s91^[,6#miO'ncV7h4X7jE0@aK(ej`C(mkaD@_oJ]
+%-u+T(duOY@Leq;$o7,d(j7b_QPPiP9k%WqP?a!;*ta>`j0LR%:)MHPQ_[ZHJ4(*Ao*[%+1=J$O'HeH<
+cHe'j/P.ND<\Unf1h#;D!28(Hn,~>
+endstream
+endobj
+{{object 6 0}} <<
+  /Type /XObject
+  /Subtype /Image
+  /Width 50
+  /Height 50
+  /BitsPerComponent 8
+  /ColorSpace [/ICCBased 5 0 R]
+  /Filter [/ASCIIHexDecode /FlateDecode]
+  {{streamlen}}
+>>
+stream
+789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/bug_42270471.pdf b/testing/resources/bug_42270471.pdf
new file mode 100644
index 0000000..ca740a2
--- /dev/null
+++ b/testing/resources/bug_42270471.pdf
@@ -0,0 +1,77 @@
+%PDF-1.7
+%���
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents [4 0 R]
+  /MediaBox [0 0 100 100]
+  /Resources <<
+    /XObject <<
+      /X0 6 0 R
+    >>
+  >>
+>>
+endobj
+4 0 obj <<
+  /Length 24
+>>
+stream
+50 0 0 50 0 0 cm
+/X0 Do
+endstream
+endobj
+5 0 obj <<
+  /Alternate /DeviceGray
+  /Filter [/ASCII85Decode /FlateDecode]
+  /N 3
+  /Length 354
+>>
+stream
+GhQY8@,aa/.*,h+?sq(#.k(gGG>a6QN)F%Jc+q7?`k]Wf`1W<]>[62(&E!Xi_>mKW^qdh.!!,b)lJ!$X
+YfLBm#1<Nb8nr?$OT#6uKge8M)&O2hJ8C?i#eL/V:4f]s5jY5,+D2gD!KbQm%Uj8UZ'-<s"!Dc48O3ZE
+I0$--\Y'5HIKF&\6gXpUECZ[/;ABV"PnZKt%PC>s91^[,6#miO'ncV7h4X7jE0@aK(ej`C(mkaD@_oJ]
+%-u+T(duOY@Leq;$o7,d(j7b_QPPiP9k%WqP?a!;*ta>`j0LR%:)MHPQ_[ZHJ4(*Ao*[%+1=J$O'HeH<
+cHe'j/P.ND<\Unf1h#;D!28(Hn,~>
+endstream
+endobj
+6 0 obj <<
+  /Type /XObject
+  /Subtype /Image
+  /Width 50
+  /Height 50
+  /BitsPerComponent 8
+  /ColorSpace [/ICCBased 5 0 R]
+  /Filter [/ASCIIHexDecode /FlateDecode]
+  /Length 71
+>>
+stream
+789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
+endstream
+endobj
+xref
+0 7
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000131 00000 n 
+0000000288 00000 n 
+0000000363 00000 n
+0000000841 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 7
+>>
+startxref
+1118
+%%EOF