Improve invalid ICC profile handling

Currently, when an ICC profile's component count does not match the
expected value in the ICC profile stream dictionary, CPDF_ICCBasedCS
will consider the entire ICC colorspace as invalid. This can result in
images not rendering.

Instead, plumb the expected component count into CPDF_IccProfile, and
let it detect this situation and mark the ICC profile as unsupported.
Then CPDF_ICCBasedCS will go through its unsupported ICC profile path
and try an alternate profile. This allows images to render correctly in
certain PDFs that do not fully follow the PDF spec.

Update CPDF_DocPageData to make its ICC caching code take the component
count into consideration to prevent rare cases where 2 ICC profile
stream dictionaries have the same ICC data, but different component
counts.

Test this scenario with a pixel test. The pixel test is a mashup of the
existing matte.in pixel test with ICC data from the
fx/color/color_icc_based.pdf corpus test.

Bug: chromium:1479436
Change-Id: If331ecf06dba88232dd6f2e13b0e1354dc405074
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/111731
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/page/cpdf_colorspace.cpp b/core/fpdfapi/page/cpdf_colorspace.cpp
index 6bc7033..c52cbe2 100644
--- a/core/fpdfapi/page/cpdf_colorspace.cpp
+++ b/core/fpdfapi/page/cpdf_colorspace.cpp
@@ -894,23 +894,17 @@
   // PDF viewers tolerate invalid values, Acrobat does not, so be consistent
   // with Acrobat and reject bad values.
   RetainPtr<const CPDF_Dictionary> pDict = pStream->GetDict();
-  int32_t nDictComponents = pDict ? pDict->GetIntegerFor("N") : 0;
+  const int32_t nDictComponents = pDict ? pDict->GetIntegerFor("N") : 0;
   if (!fxcodec::IccTransform::IsValidIccComponents(nDictComponents)) {
     return 0;
   }
 
-  uint32_t nComponents = static_cast<uint32_t>(nDictComponents);
+  // Safe to cast, as the value just got validated.
+  const uint32_t nComponents = static_cast<uint32_t>(nDictComponents);
   m_pProfile = CPDF_DocPageData::FromDocument(pDoc)->GetIccProfile(pStream);
   if (!m_pProfile)
     return 0;
 
-  // The PDF 1.7 spec also says the number of components in the ICC profile
-  // must match the N value. However, that assumes the viewer actually
-  // understands the ICC profile.
-  // If the valid ICC profile has a mismatch, fail.
-  if (m_pProfile->IsValid() && m_pProfile->GetComponents() != nComponents)
-    return 0;
-
   // If PDFium does not understand the ICC profile format at all, or if it's
   // SRGB, a profile PDFium recognizes but does not support well, then try the
   // alternate profile.
diff --git a/core/fpdfapi/page/cpdf_docpagedata.cpp b/core/fpdfapi/page/cpdf_docpagedata.cpp
index b00a616..001f2df 100644
--- a/core/fpdfapi/page/cpdf_docpagedata.cpp
+++ b/core/fpdfapi/page/cpdf_docpagedata.cpp
@@ -30,6 +30,7 @@
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
 #include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fxcodec/icc/icc_transform.h"
 #include "core/fxcrt/fx_codepage.h"
 #include "core/fxcrt/fx_memory.h"
 #include "core/fxcrt/fx_safe_types.h"
@@ -173,6 +174,20 @@
   }
 }
 
+CPDF_DocPageData::HashIccProfileKey::HashIccProfileKey(ByteString digest,
+                                                       uint32_t components)
+    : digest(std::move(digest)), components(components) {}
+
+CPDF_DocPageData::HashIccProfileKey::~HashIccProfileKey() = default;
+
+bool CPDF_DocPageData::HashIccProfileKey::operator<(
+    const HashIccProfileKey& other) const {
+  if (components == other.components) {
+    return digest < other.digest;
+  }
+  return components < other.components;
+}
+
 void CPDF_DocPageData::ClearStockFont() {
   CPDF_PageModule::GetInstance()->ClearStockFont(GetDocument());
 }
@@ -394,8 +409,7 @@
 
 RetainPtr<CPDF_IccProfile> CPDF_DocPageData::GetIccProfile(
     RetainPtr<const CPDF_Stream> pProfileStream) {
-  if (!pProfileStream)
-    return nullptr;
+  CHECK(pProfileStream);
 
   auto it = m_IccProfileMap.find(pProfileStream);
   if (it != m_IccProfileMap.end() && it->second)
@@ -404,17 +418,25 @@
   auto pAccessor = pdfium::MakeRetain<CPDF_StreamAcc>(pProfileStream);
   pAccessor->LoadAllDataFiltered();
 
-  ByteString bsDigest = pAccessor->ComputeDigest();
-  auto hash_it = m_HashProfileMap.find(bsDigest);
-  if (hash_it != m_HashProfileMap.end()) {
+  // This should not fail, as the caller should have checked this already.
+  const int expected_components = pProfileStream->GetDict()->GetIntegerFor("N");
+  CHECK(fxcodec::IccTransform::IsValidIccComponents(expected_components));
+
+  // Since CPDF_IccProfile can behave differently depending on
+  // `expected_components`, `hash_profile_key` needs to take that into
+  // consideration, in addition to the digest value.
+  const HashIccProfileKey hash_profile_key(pAccessor->ComputeDigest(),
+                                           expected_components);
+  auto hash_it = m_HashIccProfileMap.find(hash_profile_key);
+  if (hash_it != m_HashIccProfileMap.end()) {
     auto it_copied_stream = m_IccProfileMap.find(hash_it->second);
     if (it_copied_stream != m_IccProfileMap.end() && it_copied_stream->second)
       return pdfium::WrapRetain(it_copied_stream->second.Get());
   }
-  auto pProfile =
-      pdfium::MakeRetain<CPDF_IccProfile>(pProfileStream, pAccessor->GetSpan());
+  auto pProfile = pdfium::MakeRetain<CPDF_IccProfile>(
+      pProfileStream, pAccessor->GetSpan(), expected_components);
   m_IccProfileMap[pProfileStream].Reset(pProfile.Get());
-  m_HashProfileMap[bsDigest] = std::move(pProfileStream);
+  m_HashIccProfileMap[hash_profile_key] = std::move(pProfileStream);
   return pProfile;
 }
 
diff --git a/core/fpdfapi/page/cpdf_docpagedata.h b/core/fpdfapi/page/cpdf_docpagedata.h
index 59c0354..85aa7f4 100644
--- a/core/fpdfapi/page/cpdf_docpagedata.h
+++ b/core/fpdfapi/page/cpdf_docpagedata.h
@@ -88,6 +88,16 @@
       RetainPtr<const CPDF_Stream> pProfileStream);
 
  private:
+  struct HashIccProfileKey {
+    HashIccProfileKey(ByteString digest, uint32_t components);
+    ~HashIccProfileKey();
+
+    bool operator<(const HashIccProfileKey& other) const;
+
+    ByteString digest;
+    uint32_t components;
+  };
+
   // Loads a colorspace in a context that might be while loading another
   // colorspace, or even in a recursive call from this method itself. |pVisited|
   // is passed recursively to avoid circular calls involving
@@ -109,7 +119,7 @@
   bool m_bForceClear = false;
 
   // Specific destruction order may be required between maps.
-  std::map<ByteString, RetainPtr<const CPDF_Stream>> m_HashProfileMap;
+  std::map<HashIccProfileKey, RetainPtr<const CPDF_Stream>> m_HashIccProfileMap;
   std::map<RetainPtr<const CPDF_Array>, ObservedPtr<CPDF_ColorSpace>>
       m_ColorSpaceMap;
   std::map<RetainPtr<const CPDF_Stream>, RetainPtr<CPDF_StreamAcc>>
diff --git a/core/fpdfapi/page/cpdf_iccprofile.cpp b/core/fpdfapi/page/cpdf_iccprofile.cpp
index 82bbe9b..67cb42a 100644
--- a/core/fpdfapi/page/cpdf_iccprofile.cpp
+++ b/core/fpdfapi/page/cpdf_iccprofile.cpp
@@ -21,16 +21,26 @@
 }  // namespace
 
 CPDF_IccProfile::CPDF_IccProfile(RetainPtr<const CPDF_Stream> pStream,
-                                 pdfium::span<const uint8_t> span)
+                                 pdfium::span<const uint8_t> span,
+                                 uint32_t expected_components)
     : m_bsRGB(DetectSRGB(span)), m_pStream(std::move(pStream)) {
   if (m_bsRGB) {
     m_nSrcComponents = 3;
     return;
   }
 
-  m_Transform = fxcodec::IccTransform::CreateTransformSRGB(span);
-  if (m_Transform)
-    m_nSrcComponents = m_Transform->components();
+  auto transform = fxcodec::IccTransform::CreateTransformSRGB(span);
+  if (!transform) {
+    return;
+  }
+
+  uint32_t components = transform->components();
+  if (components != expected_components) {
+    return;
+  }
+
+  m_nSrcComponents = components;
+  m_Transform = std::move(transform);
 }
 
 CPDF_IccProfile::~CPDF_IccProfile() = default;
diff --git a/core/fpdfapi/page/cpdf_iccprofile.h b/core/fpdfapi/page/cpdf_iccprofile.h
index 3dd9f14..c49e0e2 100644
--- a/core/fpdfapi/page/cpdf_iccprofile.h
+++ b/core/fpdfapi/page/cpdf_iccprofile.h
@@ -40,7 +40,8 @@
  private:
   // Keeps stream alive for the duration of the CPDF_IccProfile.
   CPDF_IccProfile(RetainPtr<const CPDF_Stream> pStream,
-                  pdfium::span<const uint8_t> span);
+                  pdfium::span<const uint8_t> span,
+                  uint32_t expected_components);
   ~CPDF_IccProfile() override;
 
   const bool m_bsRGB;
diff --git a/testing/resources/pixel/bug_1479436.in b/testing/resources/pixel/bug_1479436.in
new file mode 100644
index 0000000..736ba44
--- /dev/null
+++ b/testing/resources/pixel/bug_1479436.in
@@ -0,0 +1,64 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+  /Type /Pages
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents [4 0 R]
+  /MediaBox [0 0 100 100]
+  /Resources <<
+    /XObject <<
+      /X0 6 0 R
+    >>
+  >>
+>>
+endobj
+{{object 4 0}} <<
+  {{streamlen}}
+>>
+stream
+50 0 0 50 0 0 cm
+/X0 Do
+endstream
+endobj
+{{object 5 0}} <<
+  /Alternate /DeviceGray
+  /Filter [/ASCII85Decode /FlateDecode]
+  /N 1 % Should be 3. Deliberately wrong.
+  {{streamlen}}
+>>
+stream
+GhQY8@,aa/.*,h+?sq(#.k(gGG>a6QN)F%Jc+q7?`k]Wf`1W<]>[62(&E!Xi_>mKW^qdh.!!,b)lJ!$X
+YfLBm#1<Nb8nr?$OT#6uKge8M)&O2hJ8C?i#eL/V:4f]s5jY5,+D2gD!KbQm%Uj8UZ'-<s"!Dc48O3ZE
+I0$--\Y'5HIKF&\6gXpUECZ[/;ABV"PnZKt%PC>s91^[,6#miO'ncV7h4X7jE0@aK(ej`C(mkaD@_oJ]
+%-u+T(duOY@Leq;$o7,d(j7b_QPPiP9k%WqP?a!;*ta>`j0LR%:)MHPQ_[ZHJ4(*Ao*[%+1=J$O'HeH<
+cHe'j/P.ND<\Unf1h#;D!28(Hn,~>
+endstream
+endobj
+{{object 6 0}} <<
+  /Type /XObject
+  /Subtype /Image
+  /Width 50
+  /Height 50
+  /BitsPerComponent 8
+  /ColorSpace [/ICCBased 5 0 R]
+  /Filter [/ASCIIHexDecode /FlateDecode]
+  {{streamlen}}
+>>
+stream
+789cedc2310d00000c03a07f2aaab3ea7bcf03842655555555555555f5bf01cc7818dc
+endstream
+endobj
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/pixel/bug_1479436_expected.pdf.0.png b/testing/resources/pixel/bug_1479436_expected.pdf.0.png
new file mode 100644
index 0000000..2bb8738
--- /dev/null
+++ b/testing/resources/pixel/bug_1479436_expected.pdf.0.png
Binary files differ