diff --git a/core/fpdfapi/fpdf_render/fpdf_render.cpp b/core/fpdfapi/fpdf_render/fpdf_render.cpp
index add2a8a..81a02cc 100644
--- a/core/fpdfapi/fpdf_render/fpdf_render.cpp
+++ b/core/fpdfapi/fpdf_render/fpdf_render.cpp
@@ -213,6 +213,9 @@
 void CPDF_RenderStatus::RenderObjectList(
     const CPDF_PageObjectHolder* pObjectHolder,
     const CFX_Matrix* pObj2Device) {
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   CFX_FloatRect clip_rect(m_pDevice->GetClipBox());
   CFX_Matrix device2object;
   device2object.SetReverse(*pObj2Device);
@@ -236,9 +239,16 @@
     if (m_bStopped)
       return;
   }
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
 }
+
 void CPDF_RenderStatus::RenderSingleObject(const CPDF_PageObject* pObj,
                                            const CFX_Matrix* pObj2Device) {
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
   if (++s_CurrentRecursionDepth > kRenderMaxRecursionDepth) {
     return;
@@ -254,6 +264,9 @@
     return;
   }
   ProcessObjectNoClip(pObj, pObj2Device);
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
 }
 
 FX_BOOL CPDF_RenderStatus::ContinueSingleObject(const CPDF_PageObject* pObj,
@@ -319,6 +332,9 @@
 
 void CPDF_RenderStatus::ProcessObjectNoClip(const CPDF_PageObject* pObj,
                                             const CFX_Matrix* pObj2Device) {
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   FX_BOOL bRet = FALSE;
   switch (pObj->GetType()) {
     case CPDF_PageObject::TEXT:
@@ -339,6 +355,9 @@
   }
   if (!bRet)
     DrawObjWithBackground(pObj, pObj2Device);
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
 }
 
 FX_BOOL CPDF_RenderStatus::DrawObjWithBlend(const CPDF_PageObject* pObj,
@@ -396,8 +415,12 @@
   status.RenderSingleObject(pObj, &matrix);
   buffer.OutputToDevice();
 }
+
 FX_BOOL CPDF_RenderStatus::ProcessForm(const CPDF_FormObject* pFormObj,
                                        const CFX_Matrix* pObj2Device) {
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   CPDF_Dictionary* pOC = pFormObj->m_pForm->m_pFormDict->GetDictBy("OC");
   if (pOC && m_Options.m_pOCContext &&
       !m_Options.m_pOCContext->CheckOCGVisible(pOC)) {
@@ -418,8 +441,12 @@
   status.RenderObjectList(pFormObj->m_pForm, &matrix);
   m_bStopped = status.m_bStopped;
   m_pDevice->RestoreState(false);
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   return TRUE;
 }
+
 FX_BOOL IsAvailableMatrix(const CFX_Matrix& matrix) {
   if (matrix.a == 0 || matrix.d == 0) {
     return matrix.b != 0 && matrix.c != 0;
@@ -645,6 +672,9 @@
 }
 FX_BOOL CPDF_RenderStatus::ProcessTransparency(const CPDF_PageObject* pPageObj,
                                                const CFX_Matrix* pObj2Device) {
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   const CPDF_GeneralStateData* pGeneralState =
       pPageObj->m_GeneralState.GetObject();
   int blend_type =
@@ -745,12 +775,10 @@
     oriDevice.reset(new CFX_DIBitmap);
     if (!m_pDevice->CreateCompatibleBitmap(oriDevice.get(), width, height))
       return TRUE;
-
     m_pDevice->GetDIBits(oriDevice.get(), rect.left, rect.top);
   }
   if (!bitmap_device.Create(width, height, FXDIB_Argb, oriDevice.get()))
     return TRUE;
-
   CFX_DIBitmap* bitmap = bitmap_device.GetBitmap();
   bitmap->Clear(0);
   CFX_Matrix new_matrix = *pObj2Device;
@@ -813,6 +841,9 @@
   }
   CompositeDIBitmap(bitmap, rect.left, rect.top, 0, blitAlpha, blend_type,
                     Transparency);
+#if defined _SKIA_SUPPORT_
+  DebugVerifyDeviceIsPreMultiplied();
+#endif
   return TRUE;
 }
 
@@ -1263,3 +1294,9 @@
                              m_Rect.top, m_Rect.Width(), m_Rect.Height());
   }
 }
+
+#if defined _SKIA_SUPPORT_
+void CPDF_RenderStatus::DebugVerifyDeviceIsPreMultiplied() const {
+  m_pDevice->DebugVerifyBitmapIsPreMultiplied();
+}
+#endif
diff --git a/core/fpdfapi/fpdf_render/fpdf_render_image.cpp b/core/fpdfapi/fpdf_render/fpdf_render_image.cpp
index 10fd5f3..b9826a9 100644
--- a/core/fpdfapi/fpdf_render/fpdf_render_image.cpp
+++ b/core/fpdfapi/fpdf_render/fpdf_render_image.cpp
@@ -63,8 +63,7 @@
 #endif
       }
 #ifdef _SKIA_SUPPORT_
-      static_cast<CFX_SkiaDeviceDriver*>(m_pDevice->GetDeviceDriver())
-          ->PreMultiply(pDIBitmap);
+      CFX_SkiaDeviceDriver::PreMultiply(pDIBitmap);
 #endif
       if (m_pDevice->SetDIBits(pDIBitmap, left, top)) {
         return;
@@ -717,7 +716,7 @@
     bitmap_device2.GetBitmap()->ConvertFormat(FXDIB_8bppMask);
     bitmap_device1.GetBitmap()->MultiplyAlpha(bitmap_device2.GetBitmap());
 #ifdef _SKIA_SUPPORT_
-    bitmap_device1.PreMultiply();  // convert unpremultiplied to premultiplied
+    CFX_SkiaDeviceDriver::PreMultiply(bitmap_device1.GetBitmap());
 #endif
     if (m_BitmapAlpha < 255) {
       bitmap_device1.GetBitmap()->MultiplyAlpha(m_BitmapAlpha);
@@ -737,6 +736,19 @@
       m_Flags |= RENDER_FORCE_DOWNSAMPLE;
     }
   }
+#ifdef _SKIA_SUPPORT_
+  CFX_DIBitmap* premultiplied = m_pDIBSource->Clone();
+  CFX_SkiaDeviceDriver::PreMultiply(premultiplied);
+  if (m_pRenderStatus->m_pDevice->StartDIBitsWithBlend(
+          premultiplied, m_BitmapAlpha, m_FillArgb, &m_ImageMatrix, m_Flags,
+          m_DeviceHandle, m_BlendType)) {
+    if (m_DeviceHandle) {
+      m_Status = 3;
+      return TRUE;
+    }
+    return FALSE;
+  }
+#else
   if (m_pRenderStatus->m_pDevice->StartDIBitsWithBlend(
           m_pDIBSource, m_BitmapAlpha, m_FillArgb, &m_ImageMatrix, m_Flags,
           m_DeviceHandle, m_BlendType)) {
@@ -746,6 +758,7 @@
     }
     return FALSE;
   }
+#endif
   CFX_FloatRect image_rect_f = m_ImageMatrix.GetUnitRect();
   FX_RECT image_rect = image_rect_f.GetOutterRect();
   int dest_width = image_rect.Width();
diff --git a/core/fpdfapi/fpdf_render/render_int.h b/core/fpdfapi/fpdf_render/render_int.h
index 67eae91..d86a62a 100644
--- a/core/fpdfapi/fpdf_render/render_int.h
+++ b/core/fpdfapi/fpdf_render/render_int.h
@@ -150,6 +150,10 @@
                                IFX_Pause* pPause);
   CPDF_RenderContext* GetContext() { return m_pContext; }
 
+#if defined _SKIA_SUPPORT_
+  void DebugVerifyDeviceIsPreMultiplied() const;
+#endif
+
   CPDF_RenderOptions m_Options;
   CPDF_Dictionary* m_pFormResource;
   CPDF_Dictionary* m_pPageResource;
diff --git a/core/fxge/ge/fx_ge_device.cpp b/core/fxge/ge/fx_ge_device.cpp
index 00cc541..9bb950b 100644
--- a/core/fxge/ge/fx_ge_device.cpp
+++ b/core/fxge/ge/fx_ge_device.cpp
@@ -7,6 +7,10 @@
 #include "core/fxge/include/fx_ge.h"
 #include "core/fxge/include/ifx_renderdevicedriver.h"
 
+#if defined _SKIA_SUPPORT_
+#include "third_party/skia/include/core/SkTypes.h"
+#endif
+
 CFX_RenderDevice::CFX_RenderDevice()
     : m_pBitmap(nullptr),
       m_Width(0),
@@ -20,6 +24,11 @@
   delete m_pDeviceDriver;
 }
 
+void CFX_RenderDevice::Flush() {
+  delete m_pDeviceDriver;
+  m_pDeviceDriver = nullptr;
+}
+
 void CFX_RenderDevice::SetDeviceDriver(IFX_RenderDeviceDriver* pDriver) {
   delete m_pDeviceDriver;
   m_pDeviceDriver = pDriver;
@@ -480,3 +489,10 @@
 void CFX_RenderDevice::CancelDIBits(void* handle) {
   m_pDeviceDriver->CancelDIBits(handle);
 }
+
+#ifdef _SKIA_SUPPORT_
+
+void CFX_RenderDevice::DebugVerifyBitmapIsPreMultiplied() const {
+  SkASSERT(0);
+}
+#endif
diff --git a/core/fxge/include/fx_dib.h b/core/fxge/include/fx_dib.h
index 5a31722..8d9f27e 100644
--- a/core/fxge/include/fx_dib.h
+++ b/core/fxge/include/fx_dib.h
@@ -247,6 +247,10 @@
                       int& src_top,
                       const CFX_ClipRgn* pClipRgn);
 
+#if defined _SKIA_SUPPORT_
+  void DebugVerifyBitmapIsPreMultiplied(void* buffer = nullptr) const;
+#endif
+
   CFX_DIBitmap* m_pAlphaMask;
 
  protected:
diff --git a/core/fxge/include/fx_ge.h b/core/fxge/include/fx_ge.h
index 5599a75..ea2adec 100644
--- a/core/fxge/include/fx_ge.h
+++ b/core/fxge/include/fx_ge.h
@@ -215,6 +215,7 @@
   CFX_RenderDevice();
   virtual ~CFX_RenderDevice();
 
+  void Flush();
   void SetDeviceDriver(IFX_RenderDeviceDriver* pDriver);
   IFX_RenderDeviceDriver* GetDeviceDriver() const { return m_pDeviceDriver; }
 
@@ -378,6 +379,10 @@
                                 CFX_PathData* pClippingPath,
                                 int nFlag);
 
+#ifdef _SKIA_SUPPORT_
+  virtual void DebugVerifyBitmapIsPreMultiplied() const;
+#endif
+
  private:
   void InitDeviceInfo();
   void UpdateClipBox();
@@ -416,7 +421,7 @@
 #ifdef _SKIA_SUPPORT_
   bool AttachRecorder(SkPictureRecorder* recorder);
   SkPictureRecorder* CreateRecorder(int size_x, int size_y);
-  void PreMultiply();
+  void DebugVerifyBitmapIsPreMultiplied() const override;
 #endif
 
  protected:
diff --git a/core/fxge/skia/fx_skia_device.cpp b/core/fxge/skia/fx_skia_device.cpp
index 148f623..222c846 100644
--- a/core/fxge/skia/fx_skia_device.cpp
+++ b/core/fxge/skia/fx_skia_device.cpp
@@ -86,41 +86,18 @@
 #undef SHOW_SKIA_PATH
 #undef DRAW_SKIA_CLIP
 
-static void DebugVerifyBitmapIsPreMultiplied(void* buffer,
-                                             int width,
-                                             int height) {
-#ifdef SK_DEBUG
-  // verify that input is really premultiplied
-  for (int y = 0; y < height; ++y) {
-    const uint32_t* srcRow = static_cast<const uint32_t*>(buffer) + y * width;
-    for (int x = 0; x < width; ++x) {
-      uint8_t a = SkGetPackedA32(srcRow[x]);
-      uint8_t r = SkGetPackedR32(srcRow[x]);
-      uint8_t g = SkGetPackedG32(srcRow[x]);
-      uint8_t b = SkGetPackedB32(srcRow[x]);
-      SkA32Assert(a);
-      SkASSERT(r <= a);
-      SkASSERT(g <= a);
-      SkASSERT(b <= a);
-    }
-  }
-#endif
-}
-
 static void DebugValidate(const CFX_DIBitmap* bitmap,
                           const CFX_DIBitmap* device) {
   if (bitmap) {
     SkASSERT(bitmap->GetBPP() == 8 || bitmap->GetBPP() == 32);
     if (bitmap->GetBPP() == 32) {
-      DebugVerifyBitmapIsPreMultiplied(bitmap->GetBuffer(), bitmap->GetWidth(),
-                                       bitmap->GetHeight());
+      bitmap->DebugVerifyBitmapIsPreMultiplied();
     }
   }
   if (device) {
     SkASSERT(device->GetBPP() == 8 || device->GetBPP() == 32);
     if (device->GetBPP() == 32) {
-      DebugVerifyBitmapIsPreMultiplied(device->GetBuffer(), device->GetWidth(),
-                                       device->GetHeight());
+      device->DebugVerifyBitmapIsPreMultiplied();
     }
   }
 }
@@ -291,98 +268,6 @@
   return true;
 }
 
-void RgbByteOrderTransferBitmap(CFX_DIBitmap* pBitmap,
-                                int dest_left,
-                                int dest_top,
-                                int width,
-                                int height,
-                                const CFX_DIBSource* pSrcBitmap,
-                                int src_left,
-                                int src_top) {
-  if (!pBitmap)
-    return;
-  pBitmap->GetOverlapRect(dest_left, dest_top, width, height,
-                          pSrcBitmap->GetWidth(), pSrcBitmap->GetHeight(),
-                          src_left, src_top, nullptr);
-  if (width == 0 || height == 0)
-    return;
-  int Bpp = pBitmap->GetBPP() / 8;
-  FXDIB_Format dest_format = pBitmap->GetFormat();
-  FXDIB_Format src_format = pSrcBitmap->GetFormat();
-  int pitch = pBitmap->GetPitch();
-  uint8_t* buffer = pBitmap->GetBuffer();
-  if (dest_format == src_format) {
-    for (int row = 0; row < height; row++) {
-      uint8_t* dest_scan = buffer + (dest_top + row) * pitch + dest_left * Bpp;
-      uint8_t* src_scan =
-          (uint8_t*)pSrcBitmap->GetScanline(src_top + row) + src_left * Bpp;
-      if (Bpp == 4) {
-        for (int col = 0; col < width; col++) {
-          FXARGB_SETDIB(dest_scan, FXARGB_MAKE(src_scan[3], src_scan[0],
-                                               src_scan[1], src_scan[2]));
-          dest_scan += 4;
-          src_scan += 4;
-        }
-      } else {
-        for (int col = 0; col < width; col++) {
-          *dest_scan++ = src_scan[2];
-          *dest_scan++ = src_scan[1];
-          *dest_scan++ = src_scan[0];
-          src_scan += 3;
-        }
-      }
-    }
-    return;
-  }
-  uint8_t* dest_buf = buffer + dest_top * pitch + dest_left * Bpp;
-  if (dest_format == FXDIB_Rgb) {
-    if (src_format == FXDIB_Rgb32) {
-      for (int row = 0; row < height; row++) {
-        uint8_t* dest_scan = dest_buf + row * pitch;
-        uint8_t* src_scan =
-            (uint8_t*)pSrcBitmap->GetScanline(src_top + row) + src_left * 4;
-        for (int col = 0; col < width; col++) {
-          *dest_scan++ = src_scan[2];
-          *dest_scan++ = src_scan[1];
-          *dest_scan++ = src_scan[0];
-          src_scan += 4;
-        }
-      }
-    } else {
-      ASSERT(FALSE);
-    }
-  } else if (dest_format == FXDIB_Argb || dest_format == FXDIB_Rgb32) {
-    if (src_format == FXDIB_Rgb) {
-      for (int row = 0; row < height; row++) {
-        uint8_t* dest_scan = (uint8_t*)(dest_buf + row * pitch);
-        uint8_t* src_scan =
-            (uint8_t*)pSrcBitmap->GetScanline(src_top + row) + src_left * 3;
-        for (int col = 0; col < width; col++) {
-          FXARGB_SETDIB(dest_scan, FXARGB_MAKE(0xff, src_scan[0], src_scan[1],
-                                               src_scan[2]));
-          dest_scan += 4;
-          src_scan += 3;
-        }
-      }
-    } else if (src_format == FXDIB_Rgb32) {
-      ASSERT(dest_format == FXDIB_Argb);
-      for (int row = 0; row < height; row++) {
-        uint8_t* dest_scan = dest_buf + row * pitch;
-        uint8_t* src_scan =
-            (uint8_t*)(pSrcBitmap->GetScanline(src_top + row) + src_left * 4);
-        for (int col = 0; col < width; col++) {
-          FXARGB_SETDIB(dest_scan, FXARGB_MAKE(0xff, src_scan[0], src_scan[1],
-                                               src_scan[2]));
-          src_scan += 4;
-          dest_scan += 4;
-        }
-      }
-    }
-  } else {
-    ASSERT(FALSE);
-  }
-}
-
 // see https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
 SkScalar LineSide(const SkPoint line[2], const SkPoint& pt) {
   return (line[1].fY - line[0].fY) * pt.fX - (line[1].fX - line[0].fX) * pt.fY +
@@ -525,11 +410,13 @@
       m_skPath.setFillType((fill_mode & 3) == FXFILL_ALTERNATE
                                ? SkPath::kEvenOdd_FillType
                                : SkPath::kWinding_FillType);
-      m_drawState = *pDrawState;
+      if (pDrawState)
+        m_drawState.Copy(*pDrawState);
       m_fillColor = fill_color;
       m_strokeColor = stroke_color;
       m_blendType = blend_type;
-      m_drawMatrix = *pMatrix;
+      if (pMatrix)
+        m_drawMatrix = *pMatrix;
     }
     SkPath skPath = BuildPath(pPathData);
     SkPoint delta;
@@ -633,6 +520,7 @@
     skPaint.setTextEncoding(SkPaint::kGlyphID_TextEncoding);
     skPaint.setTextSize(m_fontSize);
     skPaint.setSubpixelText(true);
+    skPaint.setHinting(SkPaint::kNo_Hinting);
     SkCanvas* skCanvas = pDriver->SkiaCanvas();
     skCanvas->save();
     skCanvas->concat(skMatrix);
@@ -690,6 +578,9 @@
   }
 
   bool MatrixOffset(const CFX_Matrix* pMatrix, SkPoint* delta) {
+    CFX_Matrix identityMatrix;
+    if (!pMatrix)
+      pMatrix = &identityMatrix;
     delta->set(pMatrix->e - m_drawMatrix.e, pMatrix->f - m_drawMatrix.f);
     if (!delta->fX && !delta->fY)
       return true;
@@ -789,12 +680,18 @@
   }
 
   bool MatrixChanged(const CFX_Matrix* pMatrix, const CFX_Matrix& refMatrix) {
+    CFX_Matrix identityMatrix;
+    if (!pMatrix)
+      pMatrix = &identityMatrix;
     return pMatrix->a != refMatrix.a || pMatrix->b != refMatrix.b ||
            pMatrix->c != refMatrix.c || pMatrix->d != refMatrix.d;
   }
 
   bool StateChanged(const CFX_GraphStateData* pState,
                     const CFX_GraphStateData& refState) {
+    CFX_GraphStateData identityState;
+    if (!pState)
+      pState = &identityState;
     return pState->m_LineWidth != refState.m_LineWidth ||
            pState->m_LineCap != refState.m_LineCap ||
            pState->m_LineJoin != refState.m_LineJoin ||
@@ -804,9 +701,10 @@
 
   bool DashChanged(const CFX_GraphStateData* pState,
                    const CFX_GraphStateData& refState) {
-    if (!pState->m_DashArray && !refState.m_DashArray)
+    bool dashArray = pState && pState->m_DashArray;
+    if (!dashArray && !refState.m_DashArray)
       return false;
-    if (!pState->m_DashArray || !refState.m_DashArray)
+    if (!dashArray || !refState.m_DashArray)
       return true;
     if (pState->m_DashPhase != refState.m_DashPhase ||
         pState->m_DashCount != refState.m_DashCount) {
@@ -946,7 +844,6 @@
       m_pOriDevice(pOriDevice),
       m_pRecorder(nullptr),
       m_pCache(new SkiaState),
-      m_bRgbByteOrder(bRgbByteOrder),
       m_bGroupKnockout(bGroupKnockout) {
   SkBitmap skBitmap;
   SkASSERT(pBitmap->GetBPP() == 8 || pBitmap->GetBPP() == 32);
@@ -967,7 +864,6 @@
       m_pOriDevice(nullptr),
       m_pRecorder(new SkPictureRecorder),
       m_pCache(new SkiaState),
-      m_bRgbByteOrder(FALSE),
       m_bGroupKnockout(FALSE) {
   m_pRecorder->beginRecording(SkIntToScalar(size_x), SkIntToScalar(size_y));
   m_pCanvas = m_pRecorder->getRecordingCanvas();
@@ -978,7 +874,6 @@
       m_pOriDevice(nullptr),
       m_pRecorder(recorder),
       m_pCache(new SkiaState),
-      m_bRgbByteOrder(FALSE),
       m_bGroupKnockout(FALSE) {
   m_pCanvas = m_pRecorder->getRecordingCanvas();
 }
@@ -1012,6 +907,7 @@
   paint.setColor(color);
   paint.setTypeface(typeface);
   paint.setTextEncoding(SkPaint::kGlyphID_TextEncoding);
+  paint.setHinting(SkPaint::kNo_Hinting);
   paint.setTextSize(font_size);
   paint.setSubpixelText(true);
   m_pCanvas->save();
@@ -1354,35 +1250,33 @@
 FX_BOOL CFX_SkiaDeviceDriver::GetDIBits(CFX_DIBitmap* pBitmap,
                                         int left,
                                         int top) {
-  if (!m_pBitmap || !m_pBitmap->GetBuffer())
+  if (!m_pBitmap)
     return TRUE;
-
-  FX_RECT rect(left, top, left + pBitmap->GetWidth(),
-               top + pBitmap->GetHeight());
-  std::unique_ptr<CFX_DIBitmap> pBack;
-  if (m_pOriDevice) {
-    pBack.reset(m_pOriDevice->Clone(&rect));
-    if (!pBack)
-      return TRUE;
-
-    pBack->CompositeBitmap(0, 0, pBack->GetWidth(), pBack->GetHeight(),
-                           m_pBitmap, 0, 0);
-  } else {
-    pBack.reset(m_pBitmap->Clone(&rect));
-    if (!pBack)
-      return TRUE;
-  }
-
-  left = std::min(left, 0);
-  top = std::min(top, 0);
-  if (m_bRgbByteOrder) {
-    RgbByteOrderTransferBitmap(pBitmap, 0, 0, rect.Width(), rect.Height(),
-                               pBack.get(), left, top);
+  uint8_t* srcBuffer = m_pBitmap->GetBuffer();
+  if (!srcBuffer)
     return TRUE;
-  }
-
-  return pBitmap->TransferBitmap(0, 0, rect.Width(), rect.Height(), pBack.get(),
-                                 left, top);
+  int srcWidth = m_pBitmap->GetWidth();
+  int srcHeight = m_pBitmap->GetHeight();
+  int srcRowBytes = srcWidth * sizeof(uint32_t);
+  SkImageInfo srcImageInfo = SkImageInfo::Make(
+      srcWidth, srcHeight, SkColorType::kN32_SkColorType, kPremul_SkAlphaType);
+  SkBitmap skSrcBitmap;
+  skSrcBitmap.installPixels(srcImageInfo, srcBuffer, srcRowBytes, nullptr,
+                            nullptr, nullptr);
+  SkASSERT(pBitmap);
+  uint8_t* dstBuffer = pBitmap->GetBuffer();
+  SkASSERT(dstBuffer);
+  int dstWidth = pBitmap->GetWidth();
+  int dstHeight = pBitmap->GetHeight();
+  int dstRowBytes = dstWidth * sizeof(uint32_t);
+  SkImageInfo dstImageInfo = SkImageInfo::Make(
+      dstWidth, dstHeight, SkColorType::kN32_SkColorType, kPremul_SkAlphaType);
+  SkBitmap skDstBitmap;
+  skDstBitmap.installPixels(dstImageInfo, dstBuffer, dstRowBytes, nullptr,
+                            nullptr, nullptr);
+  SkCanvas canvas(skDstBitmap);
+  canvas.drawBitmap(skSrcBitmap, left, top, nullptr);
+  return TRUE;
 }
 
 CFX_DIBitmap* CFX_SkiaDeviceDriver::GetBackDrop() {
@@ -1491,7 +1385,7 @@
     case 32:
       colorType = SkColorType::kN32_SkColorType;
       alphaType = kPremul_SkAlphaType;
-      DebugVerifyBitmapIsPreMultiplied(buffer, width, height);
+      pSource->DebugVerifyBitmapIsPreMultiplied(buffer);
       break;
     default:
       SkASSERT(0);  // TODO(caryclark) ensure that all cases are covered
@@ -1528,10 +1422,6 @@
   return FALSE;
 }
 
-void CFX_SkiaDeviceDriver::PreMultiply() {
-  PreMultiply(m_pBitmap);
-}
-
 void CFX_SkiaDeviceDriver::PreMultiply(CFX_DIBitmap* pDIBitmap) {
   void* buffer = pDIBitmap->GetBuffer();
   if (!buffer)
@@ -1549,7 +1439,7 @@
       SkImageInfo::Make(width, height, kN32_SkColorType, kPremul_SkAlphaType);
   SkPixmap premultiplied(premultipliedInfo, buffer, rowBytes);
   unpremultiplied.readPixels(premultiplied);
-  DebugVerifyBitmapIsPreMultiplied(buffer, width, height);
+  pDIBitmap->DebugVerifyBitmapIsPreMultiplied();
 }
 
 void CFX_SkiaDeviceDriver::Dump() const {
@@ -1559,6 +1449,11 @@
 #endif
 }
 
+void CFX_SkiaDeviceDriver::DebugVerifyBitmapIsPreMultiplied() const {
+  if (m_pOriDevice)
+    m_pOriDevice->DebugVerifyBitmapIsPreMultiplied();
+}
+
 CFX_FxgeDevice::CFX_FxgeDevice() {
   m_bOwnedBitmap = FALSE;
 }
@@ -1606,12 +1501,42 @@
 }
 
 CFX_FxgeDevice::~CFX_FxgeDevice() {
+  Flush();
+  // call destructor of CFX_RenderDevice / CFX_SkiaDeviceDriver immediately
   if (m_bOwnedBitmap && GetBitmap())
     delete GetBitmap();
 }
 
-void CFX_FxgeDevice::PreMultiply() {
-  (static_cast<CFX_SkiaDeviceDriver*>(GetDeviceDriver()))->PreMultiply();
+void CFX_FxgeDevice::DebugVerifyBitmapIsPreMultiplied() const {
+#ifdef SK_DEBUG
+  CFX_SkiaDeviceDriver* skDriver =
+      static_cast<CFX_SkiaDeviceDriver*>(GetDeviceDriver());
+  if (skDriver)
+    skDriver->DebugVerifyBitmapIsPreMultiplied();
+#endif
+}
+
+void CFX_DIBSource::DebugVerifyBitmapIsPreMultiplied(void* opt) const {
+#ifdef SK_DEBUG
+  SkASSERT(32 == GetBPP());
+  const uint32_t* buffer = (const uint32_t*)(opt ? opt : GetBuffer());
+  int width = GetWidth();
+  int height = GetHeight();
+  // verify that input is really premultiplied
+  for (int y = 0; y < height; ++y) {
+    const uint32_t* srcRow = buffer + y * width;
+    for (int x = 0; x < width; ++x) {
+      uint8_t a = SkGetPackedA32(srcRow[x]);
+      uint8_t r = SkGetPackedR32(srcRow[x]);
+      uint8_t g = SkGetPackedG32(srcRow[x]);
+      uint8_t b = SkGetPackedB32(srcRow[x]);
+      SkA32Assert(a);
+      SkASSERT(r <= a);
+      SkASSERT(g <= a);
+      SkASSERT(b <= a);
+    }
+  }
+#endif
 }
 
 #endif
diff --git a/core/fxge/skia/fx_skia_device.h b/core/fxge/skia/fx_skia_device.h
index f7e5306..a7a5f85 100644
--- a/core/fxge/skia/fx_skia_device.h
+++ b/core/fxge/skia/fx_skia_device.h
@@ -125,9 +125,9 @@
                    const SkMatrix& matrix);
   void Flush();
   SkPictureRecorder* GetRecorder() const { return m_pRecorder; }
-  void PreMultiply();
   static void PreMultiply(CFX_DIBitmap* pDIBitmap);
   SkCanvas* SkiaCanvas() { return m_pCanvas; }
+  void DebugVerifyBitmapIsPreMultiplied() const;
   void Dump() const;
 
  private:
@@ -138,7 +138,6 @@
   SkCanvas* m_pCanvas;
   SkPictureRecorder* const m_pRecorder;
   std::unique_ptr<SkiaState> m_pCache;
-  FX_BOOL m_bRgbByteOrder;
   FX_BOOL m_bGroupKnockout;
 };
 #endif  // defined(_SKIA_SUPPORT_)
