Add fallback color space check when choosing JpxDecodeAction

The component number in JPX image info and the component number in a
PDF's color space are two different concepts. The component number
from a JPX image info might have included extra info besides color
channels. Therefore the correct decode action should be based on the
color space type from the PDF entry and the JPX image info instead of
the the component number from the JPX image info.

This CL implements the following improvements:
(1) When a /ColorSpace entry is available in the PDF, use that PDF
    color space component number and JPX image color space type to
    determine the decode action.
(2) When /ColorSpace entry is not available, use JPX image color space
    type to determine the decode action.
(3) Since the component number in JPX image info doesn't stand for the
    correct color channel number, pass the correct color channel
    number into CJPX_Decoder::Decode().

Bug: pdfium:1747
Change-Id: I2c74ef1e15b4cc8353fd5b23de7eb10eca140e64
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/104031
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Nigi <nigi@chromium.org>
diff --git a/core/fpdfapi/page/cpdf_dib.cpp b/core/fpdfapi/page/cpdf_dib.cpp
index 37dbe59..3600788 100644
--- a/core/fpdfapi/page/cpdf_dib.cpp
+++ b/core/fpdfapi/page/cpdf_dib.cpp
@@ -40,6 +40,7 @@
 #include "third_party/base/check.h"
 #include "third_party/base/check_op.h"
 #include "third_party/base/cxx17_backports.h"
+#include "third_party/base/notreached.h"
 
 namespace {
 
@@ -110,45 +111,94 @@
 enum class JpxDecodeAction {
   kFail,
   kDoNothing,
+  kUseGray,
   kUseRgb,
   kUseCmyk,
   kConvertArgbToRgb,
 };
 
-JpxDecodeAction GetJpxDecodeAction(const CJPX_Decoder::JpxImageInfo& jpx_info,
-                                   const CPDF_ColorSpace* pdf_colorspace) {
-  if (pdf_colorspace) {
+// Decides which JpxDecodeAction to use based on the colorspace information from
+// the PDF and the JPX image. Called only when the PDF's image object contains a
+// "/ColorSpace" entry.
+JpxDecodeAction GetJpxDecodeActionFromColorSpaces(
+    const CJPX_Decoder::JpxImageInfo& jpx_info,
+    const CPDF_ColorSpace* pdf_colorspace) {
+  if (pdf_colorspace ==
+      CPDF_ColorSpace::GetStockCS(CPDF_ColorSpace::Family::kDeviceGray)) {
+    return JpxDecodeAction::kUseGray;
+  }
+
+  if (pdf_colorspace ==
+      CPDF_ColorSpace::GetStockCS(CPDF_ColorSpace::Family::kDeviceRGB)) {
     // The channel count of a JPX image can be different from the PDF color
     // space's component count.
-    if (jpx_info.channels != pdf_colorspace->CountComponents()) {
-      // Many PDFs generated by iOS meets this condition. See
-      // https://crbug.com/1012369 for example.
-      if (pdf_colorspace->CountComponents() == 3 && jpx_info.channels == 4 &&
-          jpx_info.colorspace == OPJ_CLRSPC_SRGB) {
+    if (jpx_info.channels > 3) {
+      return JpxDecodeAction::kConvertArgbToRgb;
+    }
+
+    return JpxDecodeAction::kUseRgb;
+  }
+
+  if (pdf_colorspace ==
+      CPDF_ColorSpace::GetStockCS(CPDF_ColorSpace::Family::kDeviceCMYK)) {
+    return JpxDecodeAction::kUseCmyk;
+  }
+
+  return JpxDecodeAction::kDoNothing;
+}
+
+JpxDecodeAction GetJpxDecodeActionFromImageColorSpace(
+    const CJPX_Decoder::JpxImageInfo& jpx_info) {
+  switch (jpx_info.colorspace) {
+    case OPJ_CLRSPC_SYCC:
+    case OPJ_CLRSPC_EYCC:
+    case OPJ_CLRSPC_UNKNOWN:
+    case OPJ_CLRSPC_UNSPECIFIED:
+      return JpxDecodeAction::kDoNothing;
+
+    case OPJ_CLRSPC_SRGB:
+      if (jpx_info.channels > 3) {
         return JpxDecodeAction::kConvertArgbToRgb;
       }
 
-      return JpxDecodeAction::kFail;
-    }
-
-    if (pdf_colorspace ==
-        CPDF_ColorSpace::GetStockCS(CPDF_ColorSpace::Family::kDeviceRGB)) {
-      return JpxDecodeAction::kUseRgb;
-    }
-    return JpxDecodeAction::kDoNothing;
-  }
-
-  // Cases where the PDF did not provide a colorspace.
-  // Choose how to decode based on the number of channels in the JPX image.
-  switch (jpx_info.channels) {
-    case 3:
       return JpxDecodeAction::kUseRgb;
 
-    case 4:
+    case OPJ_CLRSPC_GRAY:
+      return JpxDecodeAction::kUseGray;
+
+    case OPJ_CLRSPC_CMYK:
       return JpxDecodeAction::kUseCmyk;
 
     default:
-      return JpxDecodeAction::kDoNothing;
+      NOTREACHED_NORETURN();
+  }
+}
+
+JpxDecodeAction GetJpxDecodeAction(const CJPX_Decoder::JpxImageInfo& jpx_info,
+                                   const CPDF_ColorSpace* pdf_colorspace) {
+  if (pdf_colorspace) {
+    return GetJpxDecodeActionFromColorSpaces(jpx_info, pdf_colorspace);
+  }
+
+  // When PDF does not provide a color space, check the image color space.
+  return GetJpxDecodeActionFromImageColorSpace(jpx_info);
+}
+
+int GetComponentCountFromOpjColorSpace(OPJ_COLOR_SPACE colorspace) {
+  switch (colorspace) {
+    case OPJ_CLRSPC_GRAY:
+      return 1;
+
+    case OPJ_CLRSPC_SRGB:
+    case OPJ_CLRSPC_SYCC:
+    case OPJ_CLRSPC_EYCC:
+      return 3;
+
+    case OPJ_CLRSPC_CMYK:
+      return 4;
+
+    default:
+      return 0;
   }
 }
 
@@ -595,13 +645,19 @@
   RetainPtr<CPDF_ColorSpace> original_colorspace = m_pColorSpace;
   bool swap_rgb = false;
   bool convert_argb_to_rgb = false;
-  switch (GetJpxDecodeAction(image_info, m_pColorSpace.Get())) {
+  auto action = GetJpxDecodeAction(image_info, m_pColorSpace.Get());
+  switch (action) {
     case JpxDecodeAction::kFail:
       return nullptr;
 
     case JpxDecodeAction::kDoNothing:
       break;
 
+    case JpxDecodeAction::kUseGray:
+      m_pColorSpace =
+          CPDF_ColorSpace::GetStockCS(CPDF_ColorSpace::Family::kDeviceGray);
+      break;
+
     case JpxDecodeAction::kUseRgb:
       DCHECK(image_info.channels >= 3);
       swap_rgb = true;
@@ -625,15 +681,21 @@
     DCHECK_NE(0, m_nComponents);
   } else {
     DCHECK_EQ(0, m_nComponents);
-    m_nComponents = image_info.channels;
+    m_nComponents = GetComponentCountFromOpjColorSpace(image_info.colorspace);
+    if (m_nComponents == 0) {
+      return nullptr;
+    }
   }
 
   FXDIB_Format format;
-  if (image_info.channels == 1) {
+  if (action == JpxDecodeAction::kUseGray) {
     format = FXDIB_Format::k8bppRgb;
-  } else if (image_info.channels <= 3) {
+  } else if (action == JpxDecodeAction::kUseRgb && image_info.channels == 3) {
     format = FXDIB_Format::kRgb;
-  } else if (image_info.channels == 4) {
+  } else if (action == JpxDecodeAction::kConvertArgbToRgb &&
+             image_info.channels == 4) {
+    format = FXDIB_Format::kRgb32;
+  } else if (action == JpxDecodeAction::kUseRgb && image_info.channels == 4) {
     format = FXDIB_Format::kRgb32;
   } else {
     image_info.width = (image_info.width * image_info.channels + 2) / 3;
@@ -646,7 +708,7 @@
 
   result_bitmap->Clear(0xFFFFFFFF);
   if (!decoder->Decode(result_bitmap->GetBuffer(), result_bitmap->GetPitch(),
-                       swap_rgb)) {
+                       swap_rgb, m_nComponents)) {
     return nullptr;
   }
 
@@ -706,6 +768,10 @@
       }
     }
   }
+
+  // TODO(crbug.com/pdfium/1747): Handle SMaskInData entries for different
+  // color space types.
+
   m_bpc = 8;
   return result_bitmap;
 }
diff --git a/core/fxcodec/jpx/cjpx_decoder.cpp b/core/fxcodec/jpx/cjpx_decoder.cpp
index 5f193c7..9391d61 100644
--- a/core/fxcodec/jpx/cjpx_decoder.cpp
+++ b/core/fxcodec/jpx/cjpx_decoder.cpp
@@ -16,6 +16,7 @@
 #include "core/fxcodec/jpx/jpx_decode_utils.h"
 #include "core/fxcrt/fx_safe_types.h"
 #include "core/fxcrt/span_util.h"
+#include "core/fxge/calculate_pitch.h"
 #include "third_party/abseil-cpp/absl/types/optional.h"
 #include "third_party/base/cxx17_backports.h"
 #include "third_party/base/ptr_util.h"
@@ -514,13 +515,31 @@
 
 bool CJPX_Decoder::Decode(pdfium::span<uint8_t> dest_buf,
                           uint32_t pitch,
-                          bool swap_rgb) {
-  if (pitch < ((m_Image->comps[0].w * 8 * m_Image->numcomps + 31) >> 5) << 2)
-    return false;
+                          bool swap_rgb,
+                          uint32_t component_count) {
+  CHECK_LE(component_count, m_Image->numcomps);
+  uint32_t channel_count = component_count;
+  if (channel_count == 3 && m_Image->numcomps == 4) {
+    // When decoding for an ARGB image, include the alpha channel in the channel
+    // count.
+    channel_count = 4;
+  }
 
-  if (swap_rgb && m_Image->numcomps < 3)
+  absl::optional<uint32_t> calculated_pitch =
+      fxge::CalculatePitch32(8 * channel_count, m_Image->comps[0].w);
+  if (!calculated_pitch.has_value() || pitch < calculated_pitch.value()) {
     return false;
+  }
 
+  if (swap_rgb && channel_count < 3) {
+    return false;
+  }
+
+  // Initialize `channel_bufs` and `adjust_comps` to store information from all
+  // the channels of the JPX image. They will contain more information besides
+  // the color component data if `m_Image->numcomps` > `component_count`.
+  // Currently only the color component data is used for rendering.
+  // TODO(crbug.com/pdfium/1747): Make full use of the component information.
   fxcrt::spanset(dest_buf.first(m_Image->comps[0].h * pitch), 0xff);
   std::vector<uint8_t*> channel_bufs(m_Image->numcomps);
   std::vector<int> adjust_comps(m_Image->numcomps);
@@ -540,7 +559,7 @@
 
   uint32_t width = m_Image->comps[0].w;
   uint32_t height = m_Image->comps[0].h;
-  for (uint32_t channel = 0; channel < m_Image->numcomps; ++channel) {
+  for (uint32_t channel = 0; channel < channel_count; ++channel) {
     uint8_t* pChannel = channel_bufs[channel];
     const int adjust = adjust_comps[channel];
     const opj_image_comp_t& comps = m_Image->comps[channel];
@@ -554,7 +573,7 @@
       for (uint32_t row = 0; row < height; ++row) {
         uint8_t* pScanline = pChannel + row * pitch;
         for (uint32_t col = 0; col < width; ++col) {
-          uint8_t* pPixel = pScanline + col * m_Image->numcomps;
+          uint8_t* pPixel = pScanline + col * channel_count;
           int src = comps.data[row * width + col] + src_offset;
           *pPixel = static_cast<uint8_t>(src << -adjust);
         }
@@ -563,7 +582,7 @@
       for (uint32_t row = 0; row < height; ++row) {
         uint8_t* pScanline = pChannel + row * pitch;
         for (uint32_t col = 0; col < width; ++col) {
-          uint8_t* pPixel = pScanline + col * m_Image->numcomps;
+          uint8_t* pPixel = pScanline + col * channel_count;
           int src = comps.data[row * width + col] + src_offset;
           *pPixel = static_cast<uint8_t>(src);
         }
@@ -572,7 +591,7 @@
       for (uint32_t row = 0; row < height; ++row) {
         uint8_t* pScanline = pChannel + row * pitch;
         for (uint32_t col = 0; col < width; ++col) {
-          uint8_t* pPixel = pScanline + col * m_Image->numcomps;
+          uint8_t* pPixel = pScanline + col * channel_count;
           int src = comps.data[row * width + col] + src_offset;
           int pixel = (src >> adjust) + ((src >> (adjust - 1)) % 2);
           pixel = pdfium::clamp(pixel, 0, 255);
diff --git a/core/fxcodec/jpx/cjpx_decoder.h b/core/fxcodec/jpx/cjpx_decoder.h
index 915c57a..36116b3 100644
--- a/core/fxcodec/jpx/cjpx_decoder.h
+++ b/core/fxcodec/jpx/cjpx_decoder.h
@@ -55,8 +55,21 @@
   JpxImageInfo GetInfo() const;
   bool StartDecode();
 
-  // |swap_rgb| can only be set for images with 3 or more channels.
-  bool Decode(pdfium::span<uint8_t> dest_buf, uint32_t pitch, bool swap_rgb);
+  // `swap_rgb` can only be set when an image's color space type contains at
+  // least 3 color components. Note that this `component_count` is not
+  // equivalent to `JpxImageInfo::channels`. The JpxImageInfo channels can
+  // contain extra information for rendering the image besides the color
+  // component information. Therefore the `JpxImageInfo::channels` must be no
+  // less than the component count.
+  //
+  // Example: If a JPX image's color space type is OPJ_CLRSPC_SRGB, the
+  // component count for this color space is 3, and the channel count of its
+  // JpxImageInfo can be 4. This is because the extra channel might contain
+  // extra information, such as the transparency level of the image.
+  bool Decode(pdfium::span<uint8_t> dest_buf,
+              uint32_t pitch,
+              bool swap_rgb,
+              uint32_t component_count);
 
  private:
   // Use Create() to instantiate.
diff --git a/testing/SUPPRESSIONS b/testing/SUPPRESSIONS
index bf49297..b50d3ce 100644
--- a/testing/SUPPRESSIONS
+++ b/testing/SUPPRESSIONS
@@ -334,7 +334,7 @@
 #
 
 # TODO(pdfium:1747): Remove after associated bug is fixed
-bug_1258634.in * * * skia
+bug_1258634.in * * * *
 
 # TODO(pdfium:1331): Remove after associated bug is fixed
 bug_1331.in * * * *
@@ -384,7 +384,6 @@
 jpxdecode.in * * * *
 jpxdecode_without_bitspercomponent.in * * * *
 jpxdecode_without_colorspace.in * * * *
-jpxdecode_without_smaskindata.in * * * *
 
 # TODO(chromium:1028991): Remove after associated bug is fixed
 reset_button.in * * * *
diff --git a/testing/fuzzers/pdf_jpx_fuzzer.cc b/testing/fuzzers/pdf_jpx_fuzzer.cc
index 1a9e8f6..3021d76 100644
--- a/testing/fuzzers/pdf_jpx_fuzzer.cc
+++ b/testing/fuzzers/pdf_jpx_fuzzer.cc
@@ -69,8 +69,8 @@
           static_cast<uint32_t>(bitmap->GetHeight()))
     return 0;
 
-  decoder->Decode(bitmap->GetBuffer(), bitmap->GetPitch(),
-                  /*swap_rgb=*/false);
+  decoder->Decode(bitmap->GetBuffer(), bitmap->GetPitch(), /*swap_rgb=*/false,
+                  GetCompsFromFormat(format));
 
   return 0;
 }
diff --git a/testing/resources/pixel/bug_304_expected.pdf.0.png b/testing/resources/pixel/bug_304_expected.pdf.0.png
index e1b3d43..62de87b 100644
--- a/testing/resources/pixel/bug_304_expected.pdf.0.png
+++ b/testing/resources/pixel/bug_304_expected.pdf.0.png
Binary files differ