Avoid using std::function to improve performance.

In CFX_ImageTransformer, stop using std::function in places that just
need a lambda. Use a template optimization instead, to avoid
std::function which has some overhead. In loop heavy code like
CFX_ImageTransformer, this improves performance by up to 10% in release
builds.

Also shuffle CFX_ImageTransformer code so that the templates are not
exposed in the header file.

Change-Id: Ia833843ea4768eef82ce1986993cb1f1e86a8895
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/64210
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fxge/dib/cfx_imagetransformer.cpp b/core/fxge/dib/cfx_imagetransformer.cpp
index 6342b22..446b881 100644
--- a/core/fxge/dib/cfx_imagetransformer.cpp
+++ b/core/fxge/dib/cfx_imagetransformer.cpp
@@ -123,7 +123,10 @@
   }
 }
 
-void WriteColorResult(std::function<uint8_t(int offset)> func,
+// Let the compiler deduce the type for |func|, which cheaper than specifying it
+// with std::function.
+template <typename F>
+void WriteColorResult(const F& func,
                       bool bHasAlpha,
                       FXDIB_Format format,
                       uint8_t* dest) {
@@ -200,6 +203,109 @@
   }
 };
 
+bool InStretchBounds(const FX_RECT& clip_rect, int col, int row) {
+  return col >= 0 && col <= clip_rect.Width() && row >= 0 &&
+         row <= clip_rect.Height();
+}
+
+void AdjustCoords(const FX_RECT& clip_rect, int* col, int* row) {
+  int& src_col = *col;
+  int& src_row = *row;
+  if (src_col == clip_rect.Width())
+    src_col--;
+  if (src_row == clip_rect.Height())
+    src_row--;
+}
+
+// Let the compiler deduce the type for |func|, which cheaper than specifying it
+// with std::function.
+template <typename F>
+void DoBilinearLoop(const CFX_ImageTransformer::CalcData& cdata,
+                    const FX_RECT& result_rect,
+                    const FX_RECT& clip_rect,
+                    int increment,
+                    const F& func) {
+  CFX_BilinearMatrix matrix_fix(cdata.matrix);
+  for (int row = 0; row < result_rect.Height(); row++) {
+    uint8_t* dest = cdata.bitmap->GetWritableScanline(row);
+    for (int col = 0; col < result_rect.Width(); col++) {
+      CFX_ImageTransformer::BilinearData d;
+      d.res_x = 0;
+      d.res_y = 0;
+      d.src_col_l = 0;
+      d.src_row_l = 0;
+      matrix_fix.Transform(col, row, &d.src_col_l, &d.src_row_l, &d.res_x,
+                           &d.res_y);
+      if (LIKELY(InStretchBounds(clip_rect, d.src_col_l, d.src_row_l))) {
+        AdjustCoords(clip_rect, &d.src_col_l, &d.src_row_l);
+        d.src_col_r = d.src_col_l + 1;
+        d.src_row_r = d.src_row_l + 1;
+        AdjustCoords(clip_rect, &d.src_col_r, &d.src_row_r);
+        d.row_offset_l = d.src_row_l * cdata.pitch;
+        d.row_offset_r = d.src_row_r * cdata.pitch;
+        func(d, dest);
+      }
+      dest += increment;
+    }
+  }
+}
+
+// Let the compiler deduce the type for |func|, which cheaper than specifying it
+// with std::function.
+template <typename F>
+void DoBicubicLoop(const CFX_ImageTransformer::CalcData& cdata,
+                   const FX_RECT& result_rect,
+                   const FX_RECT& clip_rect,
+                   int increment,
+                   const F& func) {
+  CFX_BilinearMatrix matrix_fix(cdata.matrix);
+  for (int row = 0; row < result_rect.Height(); row++) {
+    uint8_t* dest = cdata.bitmap->GetWritableScanline(row);
+    for (int col = 0; col < result_rect.Width(); col++) {
+      CFX_ImageTransformer::BicubicData d;
+      d.res_x = 0;
+      d.res_y = 0;
+      d.src_col_l = 0;
+      d.src_row_l = 0;
+      matrix_fix.Transform(col, row, &d.src_col_l, &d.src_row_l, &d.res_x,
+                           &d.res_y);
+      if (LIKELY(InStretchBounds(clip_rect, d.src_col_l, d.src_row_l))) {
+        AdjustCoords(clip_rect, &d.src_col_l, &d.src_row_l);
+        bicubic_get_pos_weight(d.pos_pixel, d.u_w, d.v_w, d.src_col_l,
+                               d.src_row_l, d.res_x, d.res_y, clip_rect.Width(),
+                               clip_rect.Height());
+        func(d, dest);
+      }
+      dest += increment;
+    }
+  }
+}
+
+// Let the compiler deduce the type for |func|, which cheaper than specifying it
+// with std::function.
+template <typename F>
+void DoDownSampleLoop(const CFX_ImageTransformer::CalcData& cdata,
+                      const FX_RECT& result_rect,
+                      const FX_RECT& clip_rect,
+                      int increment,
+                      const F& func) {
+  CPDF_FixedMatrix matrix_fix(cdata.matrix);
+  for (int row = 0; row < result_rect.Height(); row++) {
+    uint8_t* dest = cdata.bitmap->GetWritableScanline(row);
+    for (int col = 0; col < result_rect.Width(); col++) {
+      CFX_ImageTransformer::DownSampleData d;
+      d.src_col = 0;
+      d.src_row = 0;
+      matrix_fix.Transform(col, row, &d.src_col, &d.src_row);
+      if (LIKELY(InStretchBounds(clip_rect, d.src_col, d.src_row))) {
+        AdjustCoords(clip_rect, &d.src_col, &d.src_row);
+        func(d, dest);
+      }
+      dest += increment;
+    }
+  }
+}
+
 }  // namespace
 
 CFX_ImageTransformer::CFX_ImageTransformer(const RetainPtr<CFX_DIBBase>& pSrc,
@@ -354,18 +460,18 @@
                                 data.src_col_l, data.src_col_r, data.res_x,
                                 data.res_y, 1, 0);
     };
-    DoBilinearLoop(cdata, 1, func);
+    DoBilinearLoop(cdata, m_result, m_StretchClip, 1, func);
   } else if (IsBiCubic()) {
     auto func = [&cdata](const BicubicData& data, uint8_t* dest) {
       *dest = bicubic_interpol(cdata.buf, cdata.pitch, data.pos_pixel, data.u_w,
                                data.v_w, data.res_x, data.res_y, 1, 0);
     };
-    DoBicubicLoop(cdata, 1, func);
+    DoBicubicLoop(cdata, m_result, m_StretchClip, 1, func);
   } else {
     auto func = [&cdata](const DownSampleData& data, uint8_t* dest) {
       *dest = cdata.buf[data.src_row * cdata.pitch + data.src_col];
     };
-    DoDownSampleLoop(cdata, 1, func);
+    DoDownSampleLoop(cdata, m_result, m_StretchClip, 1, func);
   }
 }
 
@@ -376,20 +482,20 @@
                                 data.src_col_l, data.src_col_r, data.res_x,
                                 data.res_y, 1, 0);
     };
-    DoBilinearLoop(cdata, 1, func);
+    DoBilinearLoop(cdata, m_result, m_StretchClip, 1, func);
   } else if (IsBiCubic()) {
     auto func = [&cdata](const BicubicData& data, uint8_t* dest) {
       *dest = bicubic_interpol(cdata.buf, cdata.pitch, data.pos_pixel, data.u_w,
                                data.v_w, data.res_x, data.res_y, 1, 0);
     };
-    DoBicubicLoop(cdata, 1, func);
+    DoBicubicLoop(cdata, m_result, m_StretchClip, 1, func);
   } else {
     auto func = [&cdata](const DownSampleData& data, uint8_t* dest) {
       const uint8_t* src_pixel =
           cdata.buf + cdata.pitch * data.src_row + data.src_col;
       *dest = *src_pixel;
     };
-    DoDownSampleLoop(cdata, 1, func);
+    DoDownSampleLoop(cdata, m_result, m_StretchClip, 1, func);
   }
 }
 
@@ -417,7 +523,7 @@
       uint32_t r_bgra_cmyk = argb[idx];
       WriteMonoResult(r_bgra_cmyk, format, dest);
     };
-    DoBilinearLoop(cdata, destBpp, func);
+    DoBilinearLoop(cdata, m_result, m_StretchClip, destBpp, func);
   } else if (IsBiCubic()) {
     auto func = [&cdata, format, &argb](const BicubicData& data,
                                         uint8_t* dest) {
@@ -426,7 +532,7 @@
           data.res_x, data.res_y, 1, 0)];
       WriteMonoResult(r_bgra_cmyk, format, dest);
     };
-    DoBicubicLoop(cdata, destBpp, func);
+    DoBicubicLoop(cdata, m_result, m_StretchClip, destBpp, func);
   } else {
     auto func = [&cdata, format, &argb](const DownSampleData& data,
                                         uint8_t* dest) {
@@ -434,7 +540,7 @@
           argb[cdata.buf[data.src_row * cdata.pitch + data.src_col]];
       WriteMonoResult(r_bgra_cmyk, format, dest);
     };
-    DoDownSampleLoop(cdata, destBpp, func);
+    DoDownSampleLoop(cdata, m_result, m_StretchClip, destBpp, func);
   }
 }
 
@@ -453,7 +559,7 @@
       };
       WriteColorResult(bilinear_interpol_func, bHasAlpha, format, dest);
     };
-    DoBilinearLoop(cdata, destBpp, func);
+    DoBilinearLoop(cdata, m_result, m_StretchClip, destBpp, func);
   } else if (IsBiCubic()) {
     auto func = [&cdata, format, Bpp, bHasAlpha](const BicubicData& data,
                                                  uint8_t* dest) {
@@ -464,7 +570,7 @@
       };
       WriteColorResult(bicubic_interpol_func, bHasAlpha, format, dest);
     };
-    DoBicubicLoop(cdata, destBpp, func);
+    DoBicubicLoop(cdata, m_result, m_StretchClip, destBpp, func);
   } else {
     auto func = [&cdata, format, bHasAlpha, Bpp](const DownSampleData& data,
                                                  uint8_t* dest) {
@@ -473,7 +579,7 @@
       auto sample_func = [src_pos](int offset) { return src_pos[offset]; };
       WriteColorResult(sample_func, bHasAlpha, format, dest);
     };
-    DoDownSampleLoop(cdata, destBpp, func);
+    DoDownSampleLoop(cdata, m_result, m_StretchClip, destBpp, func);
   }
 }
 
@@ -484,89 +590,3 @@
 bool CFX_ImageTransformer::IsBiCubic() const {
   return m_ResampleOptions.bInterpolateBicubic;
 }
-
-void CFX_ImageTransformer::AdjustCoords(int* col, int* row) const {
-  int& src_col = *col;
-  int& src_row = *row;
-  if (src_col == stretch_width())
-    src_col--;
-  if (src_row == stretch_height())
-    src_row--;
-}
-
-void CFX_ImageTransformer::DoBilinearLoop(
-    const CalcData& cdata,
-    int increment,
-    std::function<void(const BilinearData&, uint8_t*)> func) {
-  CFX_BilinearMatrix matrix_fix(cdata.matrix);
-  for (int row = 0; row < m_result.Height(); row++) {
-    uint8_t* dest = cdata.bitmap->GetWritableScanline(row);
-    for (int col = 0; col < m_result.Width(); col++) {
-      BilinearData d;
-      d.res_x = 0;
-      d.res_y = 0;
-      d.src_col_l = 0;
-      d.src_row_l = 0;
-      matrix_fix.Transform(col, row, &d.src_col_l, &d.src_row_l, &d.res_x,
-                           &d.res_y);
-      if (LIKELY(InStretchBounds(d.src_col_l, d.src_row_l))) {
-        AdjustCoords(&d.src_col_l, &d.src_row_l);
-        d.src_col_r = d.src_col_l + 1;
-        d.src_row_r = d.src_row_l + 1;
-        AdjustCoords(&d.src_col_r, &d.src_row_r);
-        d.row_offset_l = d.src_row_l * cdata.pitch;
-        d.row_offset_r = d.src_row_r * cdata.pitch;
-        func(d, dest);
-      }
-      dest += increment;
-    }
-  }
-}
-
-void CFX_ImageTransformer::DoBicubicLoop(
-    const CalcData& cdata,
-    int increment,
-    std::function<void(const BicubicData&, uint8_t*)> func) {
-  CFX_BilinearMatrix matrix_fix(cdata.matrix);
-  for (int row = 0; row < m_result.Height(); row++) {
-    uint8_t* dest = cdata.bitmap->GetWritableScanline(row);
-    for (int col = 0; col < m_result.Width(); col++) {
-      BicubicData d;
-      d.res_x = 0;
-      d.res_y = 0;
-      d.src_col_l = 0;
-      d.src_row_l = 0;
-      matrix_fix.Transform(col, row, &d.src_col_l, &d.src_row_l, &d.res_x,
-                           &d.res_y);
-      if (LIKELY(InStretchBounds(d.src_col_l, d.src_row_l))) {
-        AdjustCoords(&d.src_col_l, &d.src_row_l);
-        bicubic_get_pos_weight(d.pos_pixel, d.u_w, d.v_w, d.src_col_l,
-                               d.src_row_l, d.res_x, d.res_y, stretch_width(),
-                               stretch_height());
-        func(d, dest);
-      }
-      dest += increment;
-    }
-  }
-}
-
-void CFX_ImageTransformer::DoDownSampleLoop(
-    const CalcData& cdata,
-    int increment,
-    std::function<void(const DownSampleData&, uint8_t*)> func) {
-  CPDF_FixedMatrix matrix_fix(cdata.matrix);
-  for (int row = 0; row < m_result.Height(); row++) {
-    uint8_t* dest = cdata.bitmap->GetWritableScanline(row);
-    for (int col = 0; col < m_result.Width(); col++) {
-      DownSampleData d;
-      d.src_col = 0;
-      d.src_row = 0;
-      matrix_fix.Transform(col, row, &d.src_col, &d.src_row);
-      if (LIKELY(InStretchBounds(d.src_col, d.src_row))) {
-        AdjustCoords(&d.src_col, &d.src_row);
-        func(d, dest);
-      }
-      dest += increment;
-    }
-  }
-}
diff --git a/core/fxge/dib/cfx_imagetransformer.h b/core/fxge/dib/cfx_imagetransformer.h
index 1b38047..c95b441 100644
--- a/core/fxge/dib/cfx_imagetransformer.h
+++ b/core/fxge/dib/cfx_imagetransformer.h
@@ -20,18 +20,6 @@
 
 class CFX_ImageTransformer {
  public:
-  CFX_ImageTransformer(const RetainPtr<CFX_DIBBase>& pSrc,
-                       const CFX_Matrix& matrix,
-                       const FXDIB_ResampleOptions& options,
-                       const FX_RECT* pClip);
-  ~CFX_ImageTransformer();
-
-  bool Continue(PauseIndicatorIface* pPause);
-
-  const FX_RECT& result() const { return m_result; }
-  RetainPtr<CFX_DIBitmap> DetachBitmap();
-
- private:
   struct BilinearData {
     int res_x;
     int res_y;
@@ -67,6 +55,18 @@
     uint32_t pitch;
   };
 
+  CFX_ImageTransformer(const RetainPtr<CFX_DIBBase>& pSrc,
+                       const CFX_Matrix& matrix,
+                       const FXDIB_ResampleOptions& options,
+                       const FX_RECT* pClip);
+  ~CFX_ImageTransformer();
+
+  bool Continue(PauseIndicatorIface* pPause);
+
+  const FX_RECT& result() const { return m_result; }
+  RetainPtr<CFX_DIBitmap> DetachBitmap();
+
+ private:
   void CalcMask(const CalcData& cdata);
   void CalcAlpha(const CalcData& cdata);
   void CalcMono(const CalcData& cdata, FXDIB_Format format);
@@ -75,27 +75,6 @@
   bool IsBilinear() const;
   bool IsBiCubic() const;
 
-  int stretch_width() const { return m_StretchClip.Width(); }
-  int stretch_height() const { return m_StretchClip.Height(); }
-
-  bool InStretchBounds(int col, int row) const {
-    return col >= 0 && col <= stretch_width() && row >= 0 &&
-           row <= stretch_height();
-  }
-
-  void AdjustCoords(int* col, int* row) const;
-
-  void DoBilinearLoop(const CalcData& cdata,
-                      int increment,
-                      std::function<void(const BilinearData&, uint8_t*)> func);
-  void DoBicubicLoop(const CalcData& cdata,
-                     int increment,
-                     std::function<void(const BicubicData&, uint8_t*)> func);
-  void DoDownSampleLoop(
-      const CalcData& cdata,
-      int increment,
-      std::function<void(const DownSampleData&, uint8_t*)> func);
-
   RetainPtr<CFX_DIBBase> const m_pSrc;
   const CFX_Matrix m_matrix;
   FX_RECT m_StretchClip;