safetynet_compare.py generates pngs to compare visually

Change-Id: Ief6853cbfdb5dca0c81d76978075a2fd04989f41
Reviewed-on: https://pdfium-review.googlesource.com/8830
Reviewed-by: dsinclair <dsinclair@chromium.org>
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
diff --git a/testing/tools/safetynet_compare.py b/testing/tools/safetynet_compare.py
index 0cf1aec..3c5cff8 100755
--- a/testing/tools/safetynet_compare.py
+++ b/testing/tools/safetynet_compare.py
@@ -7,6 +7,7 @@
 
 import argparse
 import functools
+import glob
 import json
 import multiprocessing
 import os
@@ -24,6 +25,7 @@
 from safetynet_conclusions import PrintConclusionsDictHumanReadable
 from safetynet_conclusions import RATING_IMPROVEMENT
 from safetynet_conclusions import RATING_REGRESSION
+from safetynet_image import ImageComparison
 
 
 def RunSingleTestCaseParallel(this, run_label, build_dir, test_case):
@@ -107,6 +109,15 @@
 
     self._CleanUp(conclusions)
 
+    if self.args.png_dir:
+      image_comparison = ImageComparison(
+          self.after_build_dir,
+          self.args.png_dir,
+          ('before', 'after'),
+          self.args.num_workers,
+          self.args.png_threshold)
+      image_comparison.Run()
+
     return 0
 
   def _FreezeMeasureScript(self):
@@ -484,11 +495,20 @@
     if profile_file_path:
       command.append('--output-path=%s' % profile_file_path)
 
+    if self.args.png_dir:
+      command.append('--png')
+
+    if self.args.pages:
+      command.extend(['--pages', self.args.pages])
+
     output = RunCommandPropagateErr(command)
 
     if output is None:
       return None
 
+    if self.args.png_dir:
+      self._MoveImages(test_case, run_label)
+
     # Get the time number as output, making sure it's just a number
     output = output.strip()
     if re.match('^[0-9]+$', output):
@@ -496,6 +516,17 @@
 
     return None
 
+  def _MoveImages(self, test_case, run_label):
+    png_dir = os.path.join(self.args.png_dir, run_label)
+    if not os.path.exists(png_dir):
+      os.makedirs(png_dir)
+
+    test_case_dir, test_case_filename = os.path.split(test_case)
+    test_case_png_matcher = '%s.*.png' % test_case_filename
+    for output_png in glob.glob(os.path.join(test_case_dir,
+                                             test_case_png_matcher)):
+      shutil.move(output_png, png_dir)
+
   def _GetProfileFilePath(self, run_label, test_case):
     if self.args.output_dir:
       output_filename = ('callgrind.out.%s.%s'
@@ -620,10 +651,22 @@
                            'the whole test harness. Limiting to only the '
                            'interesting section does not work on Release since '
                            'the delimiters are optimized out')
+  parser.add_argument('--pages',
+                      help='selects some pages to be rendered. Page numbers '
+                           'are 0-based. "--pages A" will render only page A. '
+                           '"--pages A-B" will render pages A to B '
+                           '(inclusive).')
   parser.add_argument('--num-workers', default=multiprocessing.cpu_count(),
                       type=int, help='run NUM_WORKERS jobs in parallel')
   parser.add_argument('--output-dir',
                       help='directory to write the profile data output files')
+  parser.add_argument('--png-dir', default=None,
+                      help='outputs pngs to the specified directory that can '
+                           'be compared with a static html generated. Will '
+                           'affect performance measurements.')
+  parser.add_argument('--png-threshold', default=0.0, type=float,
+                      help='Requires --png-dir. Threshold above which a png '
+                           'is considered to have changed.')
   parser.add_argument('--threshold-significant', default=0.02, type=float,
                       help='variations in performance above this factor are '
                            'considered significant')
@@ -668,10 +711,31 @@
       PrintErr('"%s" is not a directory' % args.output_dir)
       return 1
 
+  if args.png_dir:
+    args.png_dir = os.path.expanduser(args.png_dir)
+    if not os.path.isdir(args.png_dir):
+      PrintErr('"%s" is not a directory' % args.png_dir)
+      return 1
+
   if args.threshold_significant <= 0.0:
     PrintErr('--threshold-significant should receive a positive float')
     return 1
 
+  if args.png_threshold:
+    if not args.png_dir:
+      PrintErr('--png-threshold requires --png-dir to be specified.')
+      return 1
+
+    if args.png_threshold <= 0.0:
+      PrintErr('--png-threshold should receive a positive float')
+      return 1
+
+  if args.pages:
+    if not re.match(r'^\d+(-\d+)?$', args.pages):
+      PrintErr('Supported formats for --pages are "--pages 7" and '
+               '"--pages 3-6"')
+      return 1
+
   run = CompareRun(args)
   return run.Run()
 
diff --git a/testing/tools/safetynet_image.py b/testing/tools/safetynet_image.py
new file mode 100644
index 0000000..28df3c5
--- /dev/null
+++ b/testing/tools/safetynet_image.py
@@ -0,0 +1,300 @@
+# Copyright 2017 The PDFium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Compares pairs of page images and generates an HTML to look at differences.
+"""
+
+import functools
+import glob
+import multiprocessing
+import os
+import re
+import subprocess
+import sys
+import webbrowser
+
+from common import DirectoryFinder
+
+
+def GenerateOneDiffParallel(image_comparison, image):
+  return image_comparison.GenerateOneDiff(image)
+
+
+class ImageComparison(object):
+  """Compares pairs of page images and generates an HTML to look at differences.
+
+  The images are all assumed to have the same name and be in two directories:
+  [output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example,
+  if output_path is "/tmp/images" and two_labels is ("before", "after"),
+  images in /tmp/images/before will be compared to /tmp/images/after. The HTML
+  produced will be in /tmp/images/compare.html and have relative links to these
+  images, so /tmp/images is self-contained and can be moved around or shared.
+  """
+
+  def __init__(self, build_dir, output_path, two_labels,
+               num_workers, threshold_fraction):
+    """Constructor.
+
+    Args:
+      build_dir: Path to the build directory.
+      output_path: Path with the pngs and where the html will be created.
+      two_labels: Tuple of two strings that name the subdirectories in
+          output_path containing the images.
+      num_workers: Number of worker threads to start.
+      threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which
+          an image is considered to have only small changes. They will not be
+          displayed on the HTML, only listed.
+    """
+    self.build_dir = build_dir
+    self.output_path = output_path
+    self.two_labels = two_labels
+    self.num_workers = num_workers
+    self.threshold = threshold_fraction * 100
+
+  def Run(self):
+    """Runs the comparison and generates an HTML with the results.
+
+    Returns:
+        Exit status.
+    """
+
+    if len(self.two_labels) != 2:
+      print >> sys.stderr, 'two_labels must be a tuple of length 2'
+      return 1
+
+    finder = DirectoryFinder(self.build_dir)
+    self.img_diff_bin = finder.ExecutablePath('pdfium_diff')
+
+    html_path = os.path.join(self.output_path, 'compare.html')
+
+    self.diff_path = os.path.join(self.output_path, 'diff')
+    if not os.path.exists(self.diff_path):
+      os.makedirs(self.diff_path)
+
+    self.image_locations = ImageLocations(
+        self.output_path, self.diff_path, self.two_labels)
+
+    difference = self._GenerateDiffs()
+
+    small_changes = []
+
+    with open(html_path, 'w') as f:
+      f.write('<html><body>')
+      f.write('<table>')
+      for image in self.image_locations.Images():
+        diff = difference[image]
+        if diff is None:
+          print >> sys.stderr, 'Failed to compare image %s' % image
+        elif diff > self.threshold:
+          self._WriteImageRows(f, image, diff)
+        else:
+          small_changes.append((image, diff))
+      self._WriteSmallChanges(f, small_changes)
+      f.write('</table>')
+      f.write('</body></html>')
+
+    webbrowser.open(html_path)
+    return 0
+
+  def _GenerateDiffs(self):
+    """Runs a diff over all pairs of page images, producing diff images.
+
+    As a side effect, the diff images will be saved to [output_path]/diff
+    with the same image name.
+
+    Returns:
+      A dict mapping image names to percentage of pixels changes.
+    """
+    difference = {}
+    pool = multiprocessing.Pool(self.num_workers)
+    worker_func = functools.partial(GenerateOneDiffParallel, self)
+
+    try:
+      # The timeout is a workaround for http://bugs.python.org/issue8296
+      # which prevents KeyboardInterrupt from working.
+      one_year_in_seconds = 3600 * 24 * 365
+      worker_results = (pool.map_async(worker_func,
+                                       self.image_locations.Images())
+                        .get(one_year_in_seconds))
+      for worker_result in worker_results:
+        image, result = worker_result
+        difference[image] = result
+    except KeyboardInterrupt:
+      pool.terminate()
+      sys.exit(1)
+    else:
+      pool.close()
+
+    pool.join()
+
+    return difference
+
+  def GenerateOneDiff(self, image):
+    """Runs a diff over one pair of images, producing a diff image.
+
+    As a side effect, the diff image will be saved to [output_path]/diff
+    with the same image name.
+
+    Args:
+      image: Page image to compare.
+
+    Returns:
+      A tuple (image, diff), where image is the parameter and diff is the
+      percentage of pixels changed.
+    """
+    try:
+      subprocess.check_output(
+          [self.img_diff_bin,
+           self.image_locations.Left(image),
+           self.image_locations.Right(image)])
+    except subprocess.CalledProcessError as e:
+      percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0])
+    else:
+      return image, 0
+
+    try:
+      subprocess.check_output(
+          [self.img_diff_bin,
+           '--diff',
+           self.image_locations.Left(image),
+           self.image_locations.Right(image),
+           self.image_locations.Diff(image)])
+    except subprocess.CalledProcessError as e:
+      return image, percentage_change
+    else:
+      print >> sys.stderr, 'Warning: Should have failed the previous diff.'
+      return image, 0
+
+  def _GetRelativePath(self, absolute_path):
+    return os.path.relpath(absolute_path, start=self.output_path)
+
+  def _WriteImageRows(self, f, image, diff):
+    """Write table rows for a page image comparing its two versions.
+
+    Args:
+      f: Open HTML file to write to.
+      image: Image file name.
+      diff: Percentage of different pixels.
+    """
+    f.write('<tr><td colspan="2">')
+    f.write('%s (%.4f%% changed)' % (image, diff))
+    f.write('</td></tr>')
+
+    f.write('<tr>')
+    self._WritePageCompareTd(
+        f,
+        self._GetRelativePath(self.image_locations.Left(image)),
+        self._GetRelativePath(self.image_locations.Right(image)))
+    self._WritePageTd(
+        f,
+        self._GetRelativePath(self.image_locations.Diff(image)))
+    f.write('</tr>')
+
+  def _WritePageTd(self, f, image_path):
+    """Write table column with a single image.
+
+    Args:
+      f: Open HTML file to write to.
+      image_path: Path to image file.
+    """
+    f.write('<td>')
+    f.write('<img src="%s">' % image_path)
+    f.write('</td>')
+
+  def _WritePageCompareTd(self, f, normal_image_path, hover_image_path):
+    """Write table column for an image comparing its two versions.
+
+    Args:
+      f: Open HTML file to write to.
+      normal_image_path: Path to image to be used in the "normal" state.
+      hover_image_path: Path to image to be used in the "hover" state.
+    """
+    f.write('<td>')
+    f.write('<img src="%s" '
+            'onmouseover="this.src=\'%s\';" '
+            'onmouseout="this.src=\'%s\';">' % (normal_image_path,
+                                                hover_image_path,
+                                                normal_image_path))
+    f.write('</td>')
+
+  def _WriteSmallChanges(self, f, small_changes):
+    """Write table rows for all images considered to have only small changes.
+
+    Args:
+      f: Open HTML file to write to.
+      small_changes: List of (image, change) tuples, where image is the page
+          image and change is the percentage of pixels changed.
+    """
+    for image, change in small_changes:
+      f.write('<tr><td colspan="2">')
+      if not change:
+        f.write('No change for: %s' % image)
+      else:
+        f.write('Small change of %.4f%% for: %s' % (change, image))
+      f.write('</td></tr>')
+
+
+class ImageLocations(object):
+  """Contains the locations of input and output image files.
+  """
+
+  def __init__(self, output_path, diff_path, two_labels):
+    """Constructor.
+
+    Args:
+      output_path: Path to directory with the pngs.
+      diff_path: Path to directory where the diffs will be generated.
+      two_labels: Tuple of two strings that name the subdirectories in
+          output_path containing the images.
+    """
+    self.output_path = output_path
+    self.diff_path = diff_path
+    self.two_labels = two_labels
+
+    self.left = self._FindImages(self.two_labels[0])
+    self.right = self._FindImages(self.two_labels[1])
+
+    self.images = list(self.left.viewkeys() & self.right.viewkeys())
+
+    # Sort by pdf filename, then page number
+    def KeyFn(s):
+      pieces = s.rsplit('.', 2)
+      return (pieces[0], int(pieces[1]))
+
+    self.images.sort(key=KeyFn)
+    self.diff = {image: os.path.join(self.diff_path, image)
+                 for image in self.images}
+
+  def _FindImages(self, label):
+    """Traverses a dir and builds a dict of all page images to compare in it.
+
+    Args:
+      label: name of subdirectory of output_path to traverse.
+
+    Returns:
+      Dict mapping page image names to the path of the image file.
+    """
+    image_path_matcher = os.path.join(self.output_path, label, '*.*.png')
+    image_paths = glob.glob(image_path_matcher)
+
+    image_dict = {os.path.split(image_path)[1]: image_path
+                  for image_path in image_paths}
+
+    return image_dict
+
+  def Images(self):
+    """Returns a list of all page images present in both directories."""
+    return self.images
+
+  def Left(self, test_case):
+    """Returns the path for a page image in the first subdirectory."""
+    return self.left[test_case]
+
+  def Right(self, test_case):
+    """Returns the path for a page image in the second subdirectory."""
+    return self.right[test_case]
+
+  def Diff(self, test_case):
+    """Returns the path for a page diff image."""
+    return self.diff[test_case]
diff --git a/testing/tools/safetynet_measure.py b/testing/tools/safetynet_measure.py
index 7b5bd5f..ac362b2 100755
--- a/testing/tools/safetynet_measure.py
+++ b/testing/tools/safetynet_measure.py
@@ -105,8 +105,14 @@
   def _BuildTestHarnessCommand(self):
     """Builds command to run the test harness."""
     cmd = [self.pdfium_test_path, '--send-events']
+
     if self.args.interesting_section:
       cmd.append('--callgrind-delim')
+    if self.args.png:
+      cmd.append('--png')
+    if self.args.pages:
+      cmd.append('--pages=%s' % self.args.pages)
+
     cmd.append(self.args.pdf_path)
     return cmd
 
@@ -140,6 +146,14 @@
                            'Limiting to only the interesting section does not '
                            'work on Release since the delimiters are optimized '
                            'out. Callgrind only.')
+  parser.add_argument('--png', action='store_true',
+                      help='outputs a png image on the same location as the '
+                           'pdf file')
+  parser.add_argument('--pages',
+                      help='selects some pages to be rendered. Page numbers '
+                           'are 0-based. "--pages A" will render only page A. '
+                           '"--pages A-B" will render pages A to B '
+                           '(inclusive).')
   parser.add_argument('--output-path',
                       help='where to write the profile data output file')
   args = parser.parse_args()