Improve some pixel movement loops in CPDF_ColorSpace.

Noticed while converting these to spans, confirmed we were getting
interleaved load/stores.

-- Took conditional out of loop in one spot (just because it might
   improve).

Change-Id: If6d6efc4e33bf9ddb70656c55d221deb81203596
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/86070
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/page/cpdf_colorspace.cpp b/core/fpdfapi/page/cpdf_colorspace.cpp
index f91df8f..7648782 100644
--- a/core/fpdfapi/page/cpdf_colorspace.cpp
+++ b/core/fpdfapi/page/cpdf_colorspace.cpp
@@ -698,9 +698,11 @@
   uint8_t* pDestBuf = dest_span.data();
   const uint8_t* pSrcBuf = src_span.data();
   for (int i = 0; i < pixels; i++) {
-    *pDestBuf++ = pSrcBuf[i];
-    *pDestBuf++ = pSrcBuf[i];
-    *pDestBuf++ = pSrcBuf[i];
+    // Compiler can not conclude that src/dest don't overlap.
+    const uint8_t pix = pSrcBuf[i];
+    *pDestBuf++ = pix;
+    *pDestBuf++ = pix;
+    *pDestBuf++ = pix;
   }
 }
 
diff --git a/core/fpdfapi/page/cpdf_devicecs.cpp b/core/fpdfapi/page/cpdf_devicecs.cpp
index be83369..494a2d0 100644
--- a/core/fpdfapi/page/cpdf_devicecs.cpp
+++ b/core/fpdfapi/page/cpdf_devicecs.cpp
@@ -88,9 +88,12 @@
   switch (m_Family) {
     case Family::kDeviceGray:
       for (int i = 0; i < pixels; i++) {
-        *pDestBuf++ = pSrcBuf[i];
-        *pDestBuf++ = pSrcBuf[i];
-        *pDestBuf++ = pSrcBuf[i];
+        // Compiler can not conclude that src/dest don't overlap, avoid
+        // duplicate loads.
+        const uint8_t pix = pSrcBuf[i];
+        *pDestBuf++ = pix;
+        *pDestBuf++ = pix;
+        *pDestBuf++ = pix;
       }
       break;
     case Family::kDeviceRGB:
@@ -99,27 +102,41 @@
     case Family::kDeviceCMYK:
       if (bTransMask) {
         for (int i = 0; i < pixels; i++) {
-          int k = 255 - pSrcBuf[3];
-          pDestBuf[0] = ((255 - pSrcBuf[0]) * k) / 255;
-          pDestBuf[1] = ((255 - pSrcBuf[1]) * k) / 255;
-          pDestBuf[2] = ((255 - pSrcBuf[2]) * k) / 255;
+          // Compiler can't conclude src/dest don't overlap, avoid interleaved
+          // loads and stores.
+          const uint8_t s0 = pSrcBuf[0];
+          const uint8_t s1 = pSrcBuf[1];
+          const uint8_t s2 = pSrcBuf[2];
+          const int k = 255 - pSrcBuf[3];
+          pDestBuf[0] = ((255 - s0) * k) / 255;
+          pDestBuf[1] = ((255 - s1) * k) / 255;
+          pDestBuf[2] = ((255 - s2) * k) / 255;
           pDestBuf += 3;
           pSrcBuf += 4;
         }
       } else {
-        for (int i = 0; i < pixels; i++) {
-          if (IsStdConversionEnabled()) {
-            uint8_t k = pSrcBuf[3];
-            pDestBuf[2] = 255 - std::min(255, pSrcBuf[0] + k);
-            pDestBuf[1] = 255 - std::min(255, pSrcBuf[1] + k);
-            pDestBuf[0] = 255 - std::min(255, pSrcBuf[2] + k);
-          } else {
+        if (IsStdConversionEnabled()) {
+          for (int i = 0; i < pixels; i++) {
+            // Compiler can't conclude src/dest don't overlap, avoid
+            // interleaved loads and stores.
+            const uint8_t s0 = pSrcBuf[0];
+            const uint8_t s1 = pSrcBuf[1];
+            const uint8_t s2 = pSrcBuf[2];
+            const uint8_t k = pSrcBuf[3];
+            pDestBuf[2] = 255 - std::min(255, s0 + k);
+            pDestBuf[1] = 255 - std::min(255, s1 + k);
+            pDestBuf[0] = 255 - std::min(255, s2 + k);
+            pSrcBuf += 4;
+            pDestBuf += 3;
+          }
+        } else {
+          for (int i = 0; i < pixels; i++) {
             std::tie(pDestBuf[2], pDestBuf[1], pDestBuf[0]) =
                 AdobeCMYK_to_sRGB1(pSrcBuf[0], pSrcBuf[1], pSrcBuf[2],
                                    pSrcBuf[3]);
+            pSrcBuf += 4;
+            pDestBuf += 3;
           }
-          pSrcBuf += 4;
-          pDestBuf += 3;
         }
       }
       break;