Added testing flag --save-images that saves embedded images

1. Added --save-images flag in pdfium_test to save embedded images using
FPDFImageObj_GetBitmap() API and the bitmap-to-png conversion tool.
    * Added support for bitmaps of other common color spaces in the
    bitmap-to-png conversion tool.

Bug=pdfium:677

Change-Id: Ide29f51021695af0a1afb5205355f4b78b918d35
Reviewed-on: https://pdfium-review.googlesource.com/9710
Commit-Queue: Jane Liu <janeliulwq@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc
index ee95e2f..f8096ab 100644
--- a/samples/pdfium_test.cc
+++ b/samples/pdfium_test.cc
@@ -88,6 +88,7 @@
         send_events(false),
         render_oneshot(false),
         save_attachments(false),
+        save_images(false),
 #ifdef ENABLE_CALLGRIND
         callgrind_delimiters(false),
 #endif  // ENABLE_CALLGRIND
@@ -101,6 +102,7 @@
   bool send_events;
   bool render_oneshot;
   bool save_attachments;
+  bool save_images;
 #ifdef ENABLE_CALLGRIND
   bool callgrind_delimiters;
 #endif  // ENABLE_CALLGRIND
@@ -727,6 +729,8 @@
       options->render_oneshot = true;
     } else if (cur_arg == "--save-attachments") {
       options->save_attachments = true;
+    } else if (cur_arg == "--save-images") {
+      options->save_images = true;
 #ifdef ENABLE_CALLGRIND
     } else if (cur_arg == "--callgrind-delim") {
       options->callgrind_delimiters = true;
@@ -1117,6 +1121,88 @@
   }
 }
 
+void SaveImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
+  for (int i = 0; i < FPDFPage_CountObject(page); ++i) {
+    FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
+    if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE)
+      continue;
+
+    std::unique_ptr<void, FPDFBitmapDeleter> bitmap(
+        FPDFImageObj_GetBitmap(obj));
+    if (!bitmap) {
+      fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
+              i + 1, page_num + 1);
+      continue;
+    }
+
+    int format = FPDFBitmap_GetFormat(bitmap.get());
+    if (format == FPDFBitmap_Unknown) {
+      fprintf(stderr,
+              "Image object #%d on page #%d has a bitmap of unknown format.\n",
+              i + 1, page_num + 1);
+      continue;
+    }
+
+    std::vector<unsigned char> png_encoding;
+    const unsigned char* buffer =
+        static_cast<const unsigned char*>(FPDFBitmap_GetBuffer(bitmap.get()));
+    int width = FPDFBitmap_GetWidth(bitmap.get());
+    int height = FPDFBitmap_GetHeight(bitmap.get());
+    int stride = FPDFBitmap_GetStride(bitmap.get());
+    bool ret = false;
+    switch (format) {
+      case FPDFBitmap_Gray:
+        ret = image_diff_png::EncodeGrayPNG(buffer, width, height, stride,
+                                            &png_encoding);
+        break;
+      case FPDFBitmap_BGR:
+        ret = image_diff_png::EncodeBGRPNG(buffer, width, height, stride,
+                                           &png_encoding);
+        break;
+      case FPDFBitmap_BGRx:
+        ret = image_diff_png::EncodeBGRAPNG(buffer, width, height, stride, true,
+                                            &png_encoding);
+        break;
+      case FPDFBitmap_BGRA:
+        ret = image_diff_png::EncodeBGRAPNG(buffer, width, height, stride,
+                                            false, &png_encoding);
+        break;
+      default:
+        NOTREACHED();
+    }
+    if (!ret) {
+      fprintf(stderr,
+              "Failed to convert image object #%d on page #%d to png.\n", i + 1,
+              page_num + 1);
+      continue;
+    }
+
+    char filename[256];
+    int chars_formatted = snprintf(filename, sizeof(filename), "%s.%d.%d.png",
+                                   pdf_name, page_num, i);
+    if (chars_formatted < 0 ||
+        static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
+      fprintf(stderr, "Filename %s for saving image is too long\n", filename);
+      continue;
+    }
+
+    FILE* fp = fopen(filename, "wb");
+    if (!fp) {
+      fprintf(stderr, "Failed to open %s for saving image.\n", filename);
+      continue;
+    }
+
+    size_t bytes_written =
+        fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
+    if (bytes_written != png_encoding.size())
+      fprintf(stderr, "Failed to write to %s.\n", filename);
+    else
+      fprintf(stderr, "Successfully wrote embedded image %s.\n", filename);
+
+    (void)fclose(fp);
+  }
+}
+
 // Note, for a client using progressive rendering you'd want to determine if you
 // need the rendering to pause instead of always saying |true|. This is for
 // testing to force the renderer to break whenever possible.
@@ -1136,6 +1222,8 @@
     return false;
   if (options.send_events)
     SendPageEvents(form, page, events);
+  if (options.save_images)
+    SaveImages(page, name.c_str(), page_index);
   if (options.output_format == OUTPUT_STRUCTURE) {
     DumpPageStructure(page, page_index);
     return true;
@@ -1413,6 +1501,8 @@
     "  --render-oneshot    - render image without using progressive renderer\n"
     "  --save-attachments  - write embedded attachments "
     "<pdf-name>.attachment.<attachment-name>\n"
+    "  --save-images       - write embedded images "
+    "<pdf-name>.<page-number>.<object-number>.png\n"
 #ifdef ENABLE_CALLGRIND
     "  --callgrind-delim   - delimit interesting section when using callgrind\n"
 #endif  // ENABLE_CALLGRIND
diff --git a/testing/image_diff/image_diff_png.cpp b/testing/image_diff/image_diff_png.cpp
index a5e8cdb..56be539 100644
--- a/testing/image_diff/image_diff_png.cpp
+++ b/testing/image_diff/image_diff_png.cpp
@@ -29,12 +29,18 @@
   // This is the native JPEG format.
   FORMAT_RGB,
 
+  // 3 bytes per pixel, in BGR order regardless of endianness.
+  FORMAT_BGR,
+
   // 4 bytes per pixel, in RGBA order in memory regardless of endianness.
   FORMAT_RGBA,
 
   // 4 bytes per pixel, in BGRA order in memory regardless of endianness.
   // This is the default Windows DIB order.
   FORMAT_BGRA,
+
+  // 1 byte per pixel.
+  FORMAT_GRAY,
 };
 
 // Represents a comment in the tEXt ancillary chunk of the png.
@@ -58,6 +64,19 @@
   }
 }
 
+void ConvertBGRtoRGB(const unsigned char* bgr,
+                     int pixel_width,
+                     unsigned char* rgb,
+                     bool* is_opaque) {
+  for (int x = 0; x < pixel_width; x++) {
+    const unsigned char* pixel_in = &bgr[x * 3];
+    unsigned char* pixel_out = &rgb[x * 3];
+    pixel_out[0] = pixel_in[2];
+    pixel_out[1] = pixel_in[1];
+    pixel_out[2] = pixel_in[0];
+  }
+}
+
 void ConvertRGBAtoRGB(const unsigned char* rgba,
                       int pixel_width,
                       unsigned char* rgb,
@@ -93,7 +112,7 @@
         output_channels(0),
         is_opaque(true),
         output(o),
-        row_converter(NULL),
+        row_converter(nullptr),
         width(0),
         height(0),
         done(false) {}
@@ -217,7 +236,7 @@
   if (channels == 3) {
     switch (state->output_format) {
       case FORMAT_RGB:
-        state->row_converter = NULL;  // no conversion necessary
+        state->row_converter = nullptr;  // no conversion necessary
         state->output_channels = 3;
         break;
       case FORMAT_RGBA:
@@ -228,6 +247,10 @@
         state->row_converter = &ConvertRGBtoBGRA;
         state->output_channels = 4;
         break;
+      case FORMAT_GRAY:
+        state->row_converter = nullptr;
+        state->output_channels = 1;
+        break;
       default:
         NOTREACHED();
         break;
@@ -239,7 +262,7 @@
         state->output_channels = 3;
         break;
       case FORMAT_RGBA:
-        state->row_converter = NULL;  // no conversion necessary
+        state->row_converter = nullptr;  // no conversion necessary
         state->output_channels = 4;
         break;
       case FORMAT_BGRA:
@@ -546,11 +569,14 @@
                                 std::vector<unsigned char>* output) {
   // Run to convert an input row into the output row format, NULL means no
   // conversion is necessary.
-  FormatConverter converter = NULL;
+  FormatConverter converter = nullptr;
 
   int input_color_components, output_color_components;
   int png_output_color_type;
   switch (format) {
+    case FORMAT_BGR:
+      converter = ConvertBGRtoRGB;
+
     case FORMAT_RGB:
       input_color_components = 3;
       output_color_components = 3;
@@ -567,7 +593,7 @@
       } else {
         output_color_components = 4;
         png_output_color_type = PNG_COLOR_TYPE_RGB_ALPHA;
-        converter = NULL;
+        converter = nullptr;
       }
       break;
 
@@ -584,13 +610,20 @@
       }
       break;
 
+    case FORMAT_GRAY:
+      input_color_components = 1;
+      output_color_components = 1;
+      png_output_color_type = PNG_COLOR_TYPE_GRAY;
+      discard_transparency = false;
+      break;
+
     default:
       NOTREACHED();
       return false;
   }
 
   // Row stride should be at least as long as the length of the data.
-  if (input_color_components * width < row_byte_width)
+  if (row_byte_width < input_color_components * width)
     return false;
 
   png_struct* png_ptr =
@@ -636,6 +669,16 @@
   return Decode(input, input_size, FORMAT_RGBA, output, width, height);
 }
 
+// Encode a BGR pixel array into a PNG.
+bool EncodeBGRPNG(const unsigned char* input,
+                  int width,
+                  int height,
+                  int row_byte_width,
+                  std::vector<unsigned char>* output) {
+  return Encode(input, FORMAT_BGR, width, height, row_byte_width, false,
+                std::vector<Comment>(), output);
+}
+
 // Encode an RGBA pixel array into a PNG.
 bool EncodeRGBAPNG(const unsigned char* input,
                    int width,
@@ -657,4 +700,14 @@
                 discard_transparency, std::vector<Comment>(), output);
 }
 
+// Encode a grayscale pixel array into a PNG.
+bool EncodeGrayPNG(const unsigned char* input,
+                   int width,
+                   int height,
+                   int row_byte_width,
+                   std::vector<unsigned char>* output) {
+  return Encode(input, FORMAT_GRAY, width, height, row_byte_width, false,
+                std::vector<Comment>(), output);
+}
+
 }  // namespace image_diff_png
diff --git a/testing/image_diff/image_diff_png.h b/testing/image_diff/image_diff_png.h
index 4d87aa1..b334b20 100644
--- a/testing/image_diff/image_diff_png.h
+++ b/testing/image_diff/image_diff_png.h
@@ -18,6 +18,13 @@
                int* width,
                int* height);
 
+// Encode a BGR pixel array into a PNG.
+bool EncodeBGRPNG(const unsigned char* input,
+                  int width,
+                  int height,
+                  int row_byte_width,
+                  std::vector<unsigned char>* output);
+
 // Encode an RGBA pixel array into a PNG.
 bool EncodeRGBAPNG(const unsigned char* input,
                    int width,
@@ -33,6 +40,13 @@
                    bool discard_transparency,
                    std::vector<unsigned char>* output);
 
+// Encode a grayscale pixel array into a PNG.
+bool EncodeGrayPNG(const unsigned char* input,
+                   int width,
+                   int height,
+                   int row_byte_width,
+                   std::vector<unsigned char>* output);
+
 }  // namespace image_diff_png
 
 #endif  // TESTING_IMAGE_DIFF_IMAGE_DIFF_PNG_H_