APIs and tests for extracting bitmaps from image objects

Added FPDFImageObj_GetBitmap() that returns the bitmap of an image
object, and a FPDFBitmap_GetFormat() that returns the format of a
bitmap.
    * Fixed a small bitmap conversion bug in cfx_dibsource.cpp.
    * Enabled EmbedderTest::CompareBitmap() to support different formats
      of bitmaps.
    * Added an embedder test and a test PDF file with images of many
      different formats.

Bug=pdfium:677

Change-Id: I6a72f9d969cf5f3577db9400ca33197c213622ed
Reviewed-on: https://pdfium-review.googlesource.com/9690
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Jane Liu <janeliulwq@google.com>
diff --git a/core/fxge/dib/cfx_dibsource.cpp b/core/fxge/dib/cfx_dibsource.cpp
index a0ed950..3a21796 100644
--- a/core/fxge/dib/cfx_dibsource.cpp
+++ b/core/fxge/dib/cfx_dibsource.cpp
@@ -271,12 +271,14 @@
   if (pSrcBitmap->GetBPP() == 1) {
     for (int row = 0; row < height; row++) {
       uint8_t* dest_scan = dest_buf + row * dest_pitch;
-      memset(dest_scan, 0, width);
+      // Set all destination pixels to be white initially.
+      memset(dest_scan, 255, width);
       const uint8_t* src_scan = pSrcBitmap->GetScanline(src_top + row);
       for (int col = src_left; col < src_left + width; col++) {
-        if (src_scan[col / 8] & (1 << (7 - col % 8))) {
-          *dest_scan = 1;
-        }
+        // If the source bit is set, then set the destination pixel to be black.
+        if (src_scan[col / 8] & (1 << (7 - col % 8)))
+          *dest_scan = 0;
+
         dest_scan++;
       }
     }
diff --git a/fpdfsdk/fpdfedit_embeddertest.cpp b/fpdfsdk/fpdfedit_embeddertest.cpp
index cc3e50b..dcaeb94 100644
--- a/fpdfsdk/fpdfedit_embeddertest.cpp
+++ b/fpdfsdk/fpdfedit_embeddertest.cpp
@@ -925,3 +925,57 @@
   }
   TestAndCloseSaved(612, 792, md5);
 }
+
+TEST_F(FPDFEditEmbeddertest, ExtractImageBitmap) {
+  ASSERT_TRUE(OpenDocument("embedded_images.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+  ASSERT_EQ(39, FPDFPage_CountObject(page));
+
+  FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, 32);
+  EXPECT_NE(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  EXPECT_FALSE(FPDFImageObj_GetBitmap(obj));
+
+  obj = FPDFPage_GetObject(page, 33);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  FPDF_BITMAP bitmap = FPDFImageObj_GetBitmap(obj);
+  EXPECT_EQ(FPDFBitmap_BGR, FPDFBitmap_GetFormat(bitmap));
+  CompareBitmap(bitmap, 109, 88, "d65e98d968d196abf13f78aec655ffae");
+  FPDFBitmap_Destroy(bitmap);
+
+  obj = FPDFPage_GetObject(page, 34);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  bitmap = FPDFImageObj_GetBitmap(obj);
+  EXPECT_EQ(FPDFBitmap_BGR, FPDFBitmap_GetFormat(bitmap));
+  CompareBitmap(bitmap, 103, 75, "1287711c84dbef767c435d11697661d6");
+  FPDFBitmap_Destroy(bitmap);
+
+  obj = FPDFPage_GetObject(page, 35);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  bitmap = FPDFImageObj_GetBitmap(obj);
+  EXPECT_EQ(FPDFBitmap_Gray, FPDFBitmap_GetFormat(bitmap));
+  CompareBitmap(bitmap, 92, 68, "9c6d76cb1e37ef8514f9455d759391f3");
+  FPDFBitmap_Destroy(bitmap);
+
+  obj = FPDFPage_GetObject(page, 36);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  bitmap = FPDFImageObj_GetBitmap(obj);
+  EXPECT_EQ(FPDFBitmap_BGR, FPDFBitmap_GetFormat(bitmap));
+  CompareBitmap(bitmap, 79, 60, "15cb6a49a2e354ed0e9f45dd34e3da1a");
+  FPDFBitmap_Destroy(bitmap);
+
+  obj = FPDFPage_GetObject(page, 37);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  bitmap = FPDFImageObj_GetBitmap(obj);
+  EXPECT_EQ(FPDFBitmap_BGR, FPDFBitmap_GetFormat(bitmap));
+  CompareBitmap(bitmap, 126, 106, "be5a64ba7890d2657522af6524118534");
+  FPDFBitmap_Destroy(bitmap);
+
+  obj = FPDFPage_GetObject(page, 38);
+  ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+  bitmap = FPDFImageObj_GetBitmap(obj);
+  EXPECT_EQ(FPDFBitmap_BGR, FPDFBitmap_GetFormat(bitmap));
+  CompareBitmap(bitmap, 194, 119, "f9e24207ee1bc0db6c543d33a5f12ec5");
+  FPDFBitmap_Destroy(bitmap);
+  UnloadPage(page);
+}
diff --git a/fpdfsdk/fpdfeditimg.cpp b/fpdfsdk/fpdfeditimg.cpp
index fdc98e0..bfd12b2 100644
--- a/fpdfsdk/fpdfeditimg.cpp
+++ b/fpdfsdk/fpdfeditimg.cpp
@@ -110,3 +110,30 @@
   pImgObj->SetDirty(true);
   return true;
 }
+
+DLLEXPORT FPDF_BITMAP STDCALL
+FPDFImageObj_GetBitmap(FPDF_PAGEOBJECT image_object) {
+  CPDF_PageObject* pObj = CPDFPageObjectFromFPDFPageObject(image_object);
+  if (!pObj || !pObj->IsImage())
+    return nullptr;
+
+  CFX_RetainPtr<CPDF_Image> pImg = pObj->AsImage()->GetImage();
+  if (!pImg)
+    return nullptr;
+
+  CFX_RetainPtr<CFX_DIBSource> pSource = pImg->LoadDIBSource();
+  if (!pSource)
+    return nullptr;
+
+  CFX_RetainPtr<CFX_DIBitmap> pBitmap;
+  // If the source image has a representation of 1 bit per pixel, then convert
+  // it to a grayscale bitmap having 1 byte per pixel, since bitmaps have no
+  // concept of bits. Otherwise, convert the source image to a bitmap directly,
+  // retaining its color representation.
+  if (pSource->GetBPP() == 1)
+    pBitmap = pSource->CloneConvert(FXDIB_8bppRgb);
+  else
+    pBitmap = pSource->Clone(nullptr);
+
+  return pBitmap.Leak();
+}
diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp
index 355fcb3..5aa8013 100644
--- a/fpdfsdk/fpdfview.cpp
+++ b/fpdfsdk/fpdfview.cpp
@@ -1126,6 +1126,26 @@
   return pBitmap.Leak();
 }
 
+DLLEXPORT int STDCALL FPDFBitmap_GetFormat(FPDF_BITMAP bitmap) {
+  if (!bitmap)
+    return FPDFBitmap_Unknown;
+
+  FXDIB_Format format = CFXBitmapFromFPDFBitmap(bitmap)->GetFormat();
+  switch (format) {
+    case FXDIB_8bppRgb:
+    case FXDIB_8bppMask:
+      return FPDFBitmap_Gray;
+    case FXDIB_Rgb:
+      return FPDFBitmap_BGR;
+    case FXDIB_Rgb32:
+      return FPDFBitmap_BGRx;
+    case FXDIB_Argb:
+      return FPDFBitmap_BGRA;
+    default:
+      return FPDFBitmap_Unknown;
+  }
+}
+
 DLLEXPORT void STDCALL FPDFBitmap_FillRect(FPDF_BITMAP bitmap,
                                            int left,
                                            int top,
diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c
index 7d42be8..e47f4d1 100644
--- a/fpdfsdk/fpdfview_c_api_test.c
+++ b/fpdfsdk/fpdfview_c_api_test.c
@@ -132,6 +132,7 @@
     CHK(FPDFImageObj_LoadJpegFileInline);
     CHK(FPDFImageObj_SetMatrix);
     CHK(FPDFImageObj_SetBitmap);
+    CHK(FPDFImageObj_GetBitmap);
     CHK(FPDFPageObj_CreateNewPath);
     CHK(FPDFPageObj_CreateNewRect);
     CHK(FPDFPath_SetStrokeColor);
@@ -302,6 +303,7 @@
     CHK(FPDF_PageToDevice);
     CHK(FPDFBitmap_Create);
     CHK(FPDFBitmap_CreateEx);
+    CHK(FPDFBitmap_GetFormat);
     CHK(FPDFBitmap_FillRect);
     CHK(FPDFBitmap_GetBuffer);
     CHK(FPDFBitmap_GetWidth);
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h
index cfed931..e5607d1 100644
--- a/public/fpdf_edit.h
+++ b/public/fpdf_edit.h
@@ -288,6 +288,16 @@
                                                    FPDF_PAGEOBJECT image_object,
                                                    FPDF_BITMAP bitmap);
 
+// Get a bitmap rasterisation of |image_object|. The returned bitmap will be
+// owned by the caller, and FPDFBitmap_Destroy() must be called on the returned
+// bitmap when it is no longer needed.
+//
+//   image_object - handle to an image object.
+//
+// Returns the bitmap.
+DLLEXPORT FPDF_BITMAP STDCALL
+FPDFImageObj_GetBitmap(FPDF_PAGEOBJECT image_object);
+
 // Create a new path object at an initial position.
 //
 //   x - initial horizontal position.
diff --git a/public/fpdfview.h b/public/fpdfview.h
index 55897eb..4e2e698 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h
@@ -626,7 +626,8 @@
 // Parameters:
 //          bitmap      -   Handle to the device independent bitmap (as the
 //                          output buffer). The bitmap handle can be created
-//                          by FPDFBitmap_Create.
+//                          by FPDFBitmap_Create or retrieved from an image
+//                          object by FPDFImageObj_GetBitmap.
 //          page        -   Handle to the page. Returned by FPDF_LoadPage
 //          start_x     -   Left pixel position of the display area in
 //                          bitmap coordinates.
@@ -660,7 +661,8 @@
 // Parameters:
 //          bitmap      -   Handle to the device independent bitmap (as the
 //                          output buffer). The bitmap handle can be created
-//                          by FPDFBitmap_Create.
+//                          by FPDFBitmap_Create or retrieved by
+//                          FPDFImageObj_GetBitmap.
 //          page        -   Handle to the page. Returned by FPDF_LoadPage
 //          matrix      -   The transform matrix.
 //          clipping    -   The rect to clip to.
@@ -820,6 +822,8 @@
                                                 int alpha);
 
 // More DIB formats
+// Unknown or unsupported format.
+#define FPDFBitmap_Unknown 0
 // Gray scale bitmap, one byte per pixel.
 #define FPDFBitmap_Gray 1
 // 3 bytes per pixel, byte order: blue, green, red.
@@ -860,6 +864,18 @@
                                                   void* first_scan,
                                                   int stride);
 
+// Function: FPDFBitmap_GetFormat
+//          Get the format of the bitmap.
+// Parameters:
+//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create
+//                          or FPDFImageObj_GetBitmap.
+// Return value:
+//          The format of the bitmap.
+// Comments:
+//          Only formats supported by FPDFBitmap_CreateEx are supported by this
+//          function; see the list of such formats above.
+DLLEXPORT int STDCALL FPDFBitmap_GetFormat(FPDF_BITMAP bitmap);
+
 // Function: FPDFBitmap_FillRect
 //          Fill a rectangle in a bitmap.
 // Parameters:
@@ -894,7 +910,8 @@
 // Function: FPDFBitmap_GetBuffer
 //          Get data buffer of a bitmap.
 // Parameters:
-//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create.
+//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create
+//                          or FPDFImageObj_GetBitmap.
 // Return value:
 //          The pointer to the first byte of the bitmap buffer.
 // Comments:
@@ -911,7 +928,8 @@
 // Function: FPDFBitmap_GetWidth
 //          Get width of a bitmap.
 // Parameters:
-//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create.
+//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create
+//                          or FPDFImageObj_GetBitmap.
 // Return value:
 //          The width of the bitmap in pixels.
 DLLEXPORT int STDCALL FPDFBitmap_GetWidth(FPDF_BITMAP bitmap);
@@ -919,7 +937,8 @@
 // Function: FPDFBitmap_GetHeight
 //          Get height of a bitmap.
 // Parameters:
-//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create.
+//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create
+//                          or FPDFImageObj_GetBitmap.
 // Return value:
 //          The height of the bitmap in pixels.
 DLLEXPORT int STDCALL FPDFBitmap_GetHeight(FPDF_BITMAP bitmap);
@@ -927,7 +946,8 @@
 // Function: FPDFBitmap_GetStride
 //          Get number of bytes for each line in the bitmap buffer.
 // Parameters:
-//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create.
+//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create
+//                          or FPDFImageObj_GetBitmap.
 // Return value:
 //          The number of bytes for each line in the bitmap buffer.
 // Comments:
@@ -937,7 +957,8 @@
 // Function: FPDFBitmap_Destroy
 //          Destroy a bitmap and release all related buffers.
 // Parameters:
-//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create.
+//          bitmap      -   Handle to the bitmap. Returned by FPDFBitmap_Create
+//                          or FPDFImageObj_GetBitmap.
 // Return value:
 //          None.
 // Comments:
diff --git a/testing/embedder_test.cpp b/testing/embedder_test.cpp
index 0846d8c..82ffb3b 100644
--- a/testing/embedder_test.cpp
+++ b/testing/embedder_test.cpp
@@ -42,6 +42,22 @@
 
 void Add_Segment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {}
 
+int GetBitmapBytesPerPixel(FPDF_BITMAP bitmap) {
+  const int format = FPDFBitmap_GetFormat(bitmap);
+  switch (format) {
+    case FPDFBitmap_Gray:
+      return 1;
+    case FPDFBitmap_BGR:
+      return 3;
+    case FPDFBitmap_BGRx:
+    case FPDFBitmap_BGRA:
+      return 4;
+    default:
+      ASSERT(false);
+      return 0;
+  }
+}
+
 }  // namespace
 
 EmbedderTest::EmbedderTest()
@@ -381,8 +397,10 @@
                                      int expected_width,
                                      int expected_height) {
   uint8_t digest[16];
-  CRYPT_MD5Generate(static_cast<uint8_t*>(FPDFBitmap_GetBuffer(bitmap)),
-                    expected_width * 4 * expected_height, digest);
+  CRYPT_MD5Generate(
+      static_cast<uint8_t*>(FPDFBitmap_GetBuffer(bitmap)),
+      expected_width * GetBitmapBytesPerPixel(bitmap) * expected_height,
+      digest);
   return CryptToBase16(digest);
 }
 
@@ -393,7 +411,11 @@
                                  const char* expected_md5sum) {
   ASSERT_EQ(expected_width, FPDFBitmap_GetWidth(bitmap));
   ASSERT_EQ(expected_height, FPDFBitmap_GetHeight(bitmap));
-  const int expected_stride = expected_width * 4;
+
+  // The expected stride is calculated using the same formula as in
+  // CFX_DIBitmap::CalculatePitchAndSize(), which sets the bitmap stride.
+  const int expected_stride =
+      (expected_width * GetBitmapBytesPerPixel(bitmap) * 8 + 31) / 32 * 4;
   ASSERT_EQ(expected_stride, FPDFBitmap_GetStride(bitmap));
 
   if (!expected_md5sum)
diff --git a/testing/resources/embedded_images.pdf b/testing/resources/embedded_images.pdf
new file mode 100644
index 0000000..8184582
--- /dev/null
+++ b/testing/resources/embedded_images.pdf
Binary files differ