Switch to memcpy()/memset() in image conversion code.

In a microbenchmark, memcpy() is 36% faster than doing the same copy
manually. Overall rendering speed up for pdfium_test is about 1%.

Change-Id: I01e5821a1306727012c9dca98069af6c61eed99a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/73013
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fxge/dib/cfx_dibbase.cpp b/core/fxge/dib/cfx_dibbase.cpp
index a2c1a9a..ca57bd3 100644
--- a/core/fxge/dib/cfx_dibbase.cpp
+++ b/core/fxge/dib/cfx_dibbase.cpp
@@ -384,15 +384,9 @@
     uint8_t* dest_scan = dest_buf + row * dest_pitch;
     const uint8_t* src_scan = pSrcBitmap->GetScanline(src_top + row);
     for (int col = src_left; col < src_left + width; ++col) {
-      if (src_scan[col / 8] & (1 << (7 - col % 8))) {
-        dest_scan[0] = kSetGray;
-        dest_scan[1] = kSetGray;
-        dest_scan[2] = kSetGray;
-      } else {
-        dest_scan[0] = kResetGray;
-        dest_scan[1] = kResetGray;
-        dest_scan[2] = kResetGray;
-      }
+      uint8_t value =
+          (src_scan[col / 8] & (1 << (7 - col % 8))) ? kSetGray : kResetGray;
+      memset(dest_scan, value, 3);
       dest_scan += comps;
     }
   }
@@ -410,13 +404,10 @@
   for (int row = 0; row < height; ++row) {
     uint8_t* dest_scan = dest_buf + row * dest_pitch;
     const uint8_t* src_scan = pSrcBitmap->GetScanline(src_top + row) + src_left;
-    uint8_t src_pixel;
     for (int col = 0; col < width; ++col) {
-      src_pixel = *src_scan++;
-      *dest_scan++ = src_pixel;
-      *dest_scan++ = src_pixel;
-      *dest_scan = src_pixel;
-      dest_scan += comps - 2;
+      memset(dest_scan, *src_scan, 3);
+      dest_scan += comps;
+      ++src_scan;
     }
   }
 }
@@ -458,16 +449,9 @@
     uint8_t* dest_scan = dest_buf + row * dest_pitch;
     const uint8_t* src_scan = pSrcBitmap->GetScanline(src_top + row);
     for (int col = src_left; col < src_left + width; ++col) {
-      if (src_scan[col / 8] & (1 << (7 - col % 8))) {
-        *dest_scan++ = bgr_ptr[3];
-        *dest_scan++ = bgr_ptr[4];
-        *dest_scan = bgr_ptr[5];
-      } else {
-        *dest_scan++ = bgr_ptr[0];
-        *dest_scan++ = bgr_ptr[1];
-        *dest_scan = bgr_ptr[2];
-      }
-      dest_scan += comps - 2;
+      size_t offset = (src_scan[col / 8] & (1 << (7 - col % 8))) ? 3 : 0;
+      memcpy(dest_scan, bgr_ptr + offset, 3);
+      dest_scan += comps;
     }
   }
 }
@@ -508,10 +492,8 @@
     const uint8_t* src_scan = pSrcBitmap->GetScanline(src_top + row) + src_left;
     for (int col = 0; col < width; ++col) {
       uint8_t* src_pixel = bgr_ptr + 3 * (*src_scan++);
-      *dest_scan++ = *src_pixel++;
-      *dest_scan++ = *src_pixel++;
-      *dest_scan = *src_pixel++;
-      dest_scan += comps - 2;
+      memcpy(dest_scan, src_pixel, 3);
+      dest_scan += comps;
     }
   }
 }
@@ -543,10 +525,9 @@
     const uint8_t* src_scan =
         pSrcBitmap->GetScanline(src_top + row) + src_left * 4;
     for (int col = 0; col < width; ++col) {
-      *dest_scan++ = *src_scan++;
-      *dest_scan++ = *src_scan++;
-      *dest_scan++ = *src_scan++;
-      ++src_scan;
+      memcpy(dest_scan, src_scan, 3);
+      dest_scan += 3;
+      src_scan += 4;
     }
   }
 }
@@ -564,11 +545,9 @@
     const uint8_t* src_scan =
         pSrcBitmap->GetScanline(src_top + row) + src_left * comps;
     for (int col = 0; col < width; ++col) {
-      *dest_scan++ = *src_scan++;
-      *dest_scan++ = *src_scan++;
-      *dest_scan++ = *src_scan++;
-      ++dest_scan;
-      src_scan += comps - 3;
+      memcpy(dest_scan, src_scan, 3);
+      dest_scan += 4;
+      src_scan += comps;
     }
   }
 }
@@ -1037,9 +1016,7 @@
       }
     } else if (Bpp == 3) {
       for (int col = 0; col < m_Width; ++col) {
-        dest_scan[0] = src_scan[0];
-        dest_scan[1] = src_scan[1];
-        dest_scan[2] = src_scan[2];
+        memcpy(dest_scan, src_scan, 3);
         dest_scan -= 3;
         src_scan += 3;
       }
@@ -1176,10 +1153,9 @@
           }
         } else {
           for (int col = col_start; col < col_end; ++col) {
-            *dest_scan++ = *src_scan++;
-            *dest_scan++ = *src_scan++;
-            *dest_scan = *src_scan++;
-            dest_scan += dest_step;
+            memcpy(dest_scan, src_scan, 3);
+            dest_scan += 2 + dest_step;
+            src_scan += 3;
           }
         }
       }
diff --git a/core/fxge/dib/cfx_scanlinecompositor.cpp b/core/fxge/dib/cfx_scanlinecompositor.cpp
index 3c90d96..5cf6a79 100644
--- a/core/fxge/dib/cfx_scanlinecompositor.cpp
+++ b/core/fxge/dib/cfx_scanlinecompositor.cpp
@@ -12,9 +12,6 @@
 
 #define FX_CCOLOR(val) (255 - (val))
 #define FXDIB_ALPHA_UNION(dest, src) ((dest) + (src) - (dest) * (src) / 255)
-#define FXARGB_COPY(dest, src)                    \
-  *(dest) = *(src), *((dest) + 1) = *((src) + 1), \
-  *((dest) + 2) = *((src) + 2), *((dest) + 3) = *((src) + 3)
 #define FXARGB_RGBORDERCOPY(dest, src)                  \
   *((dest) + 3) = *((src) + 3), *(dest) = *((src) + 2), \
              *((dest) + 1) = *((src) + 1), *((dest) + 2) = *((src))
@@ -407,14 +404,13 @@
           FXARGB_SETDIB(dest_scan, (FXARGB_GETDIB(src_scan) & 0xffffff) |
                                        (src_alpha << 24));
         } else {
-          FXARGB_COPY(dest_scan, src_scan);
+          memcpy(dest_scan, src_scan, 4);
         }
       } else if (has_dest) {
         *dest_alpha_scan = src_alpha;
-        for (int i = 0; i < 3; ++i) {
-          *dest_scan = *src_scan++;
-          ++dest_scan;
-        }
+        memcpy(dest_scan, src_scan, 3);
+        dest_scan += 3;
+        src_scan += 3;
         ++dest_alpha_scan;
         if (!has_src)
           ++src_scan;
@@ -479,10 +475,9 @@
     uint8_t back_alpha = *dest_alpha;
     if (back_alpha == 0) {
       if (dest_alpha_scan) {
-        for (int i = 0; i < 3; ++i) {
-          *dest_scan = *src_scan++;
-          ++dest_scan;
-        }
+        memcpy(dest_scan, src_scan, 3);
+        dest_scan += 3;
+        src_scan += 3;
         *dest_alpha_scan = 0xff;
         ++dest_alpha_scan;
       } else {
@@ -532,11 +527,9 @@
     int src_alpha = *clip_scan++;
     uint8_t back_alpha = has_dest ? *dest_alpha_scan : dest_scan[3];
     if (back_alpha == 0) {
-      for (int i = 0; i < 3; ++i) {
-        *dest_scan = *src_scan++;
-        ++dest_scan;
-      }
-      src_scan += src_gap;
+      memcpy(dest_scan, src_scan, 3);
+      dest_scan += 3;
+      src_scan += src_Bpp;
       if (has_dest)
         dest_alpha_scan++;
       else
@@ -585,11 +578,10 @@
     for (int col = 0; col < width; col++) {
       int src_alpha = clip_scan[col];
       if (src_alpha == 255) {
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
+        memcpy(dest_scan, src_scan, 3);
+        dest_scan += 3;
+        src_scan += src_Bpp;
         *dest_alpha_scan++ = 255;
-        src_scan += src_gap;
         continue;
       }
       if (src_alpha == 0) {
@@ -614,11 +606,10 @@
     for (int col = 0; col < width; col++) {
       int src_alpha = clip_scan[col];
       if (src_alpha == 255) {
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
+        memcpy(dest_scan, src_scan, 3);
+        dest_scan += 3;
         *dest_scan++ = 255;
-        src_scan += src_gap;
+        src_scan += src_Bpp;
         continue;
       }
       if (src_alpha == 0) {
@@ -648,13 +639,11 @@
                                           int src_Bpp,
                                           uint8_t* dest_alpha_scan) {
   if (dest_alpha_scan) {
-    int src_gap = src_Bpp - 3;
     for (int col = 0; col < width; col++) {
-      *dest_scan++ = *src_scan++;
-      *dest_scan++ = *src_scan++;
-      *dest_scan++ = *src_scan++;
-      *dest_alpha_scan++ = 0xff;
-      src_scan += src_gap;
+      memcpy(dest_scan, src_scan, 3);
+      dest_scan += 3;
+      src_scan += src_Bpp;
+      *dest_alpha_scan++ = 255;
     }
   } else {
     for (int col = 0; col < width; col++) {
@@ -754,10 +743,9 @@
         src_alpha = *src_alpha_scan++;
       }
       if (src_alpha == 255) {
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
-        dest_scan += dest_gap;
+        memcpy(dest_scan, src_scan, 3);
+        dest_scan += dest_Bpp;
+        src_scan += 3;
         continue;
       }
       if (src_alpha == 0) {
@@ -781,11 +769,9 @@
         src_alpha = src_scan[3];
       }
       if (src_alpha == 255) {
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
-        *dest_scan++ = *src_scan++;
-        dest_scan += dest_gap;
-        src_scan++;
+        memcpy(dest_scan, src_scan, 3);
+        dest_scan += dest_Bpp;
+        src_scan += 4;
         continue;
       }
       if (src_alpha == 0) {
@@ -879,9 +865,7 @@
     return;
   }
   for (int col = 0; col < width; col++) {
-    dest_scan[0] = src_scan[0];
-    dest_scan[1] = src_scan[1];
-    dest_scan[2] = src_scan[2];
+    memcpy(dest_scan, src_scan, 3);
     dest_scan += dest_Bpp;
     src_scan += src_Bpp;
   }
@@ -896,9 +880,7 @@
   for (int col = 0; col < width; col++) {
     int src_alpha = clip_scan[col];
     if (src_alpha == 255) {
-      dest_scan[0] = src_scan[0];
-      dest_scan[1] = src_scan[1];
-      dest_scan[2] = src_scan[2];
+      memcpy(dest_scan, src_scan, 3);
     } else if (src_alpha) {
       *dest_scan = FXDIB_ALPHA_MERGE(*dest_scan, *src_scan, src_alpha);
       dest_scan++;