blob: 31611780e0a5a1ed91f48b6309e30d660e9a1661 [file] [log] [blame]
# Copyright 2017 The PDFium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Compares pairs of page images and generates an HTML to look at differences.
"""
from __future__ import print_function
import functools
import glob
import multiprocessing
import os
import re
import subprocess
import sys
import webbrowser
# pylint: disable=relative-import
from common import DirectoryFinder
def GenerateOneDiffParallel(image_comparison, image):
return image_comparison.GenerateOneDiff(image)
class ImageComparison(object):
"""Compares pairs of page images and generates an HTML to look at differences.
The images are all assumed to have the same name and be in two directories:
[output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example,
if output_path is "/tmp/images" and two_labels is ("before", "after"),
images in /tmp/images/before will be compared to /tmp/images/after. The HTML
produced will be in /tmp/images/compare.html and have relative links to these
images, so /tmp/images is self-contained and can be moved around or shared.
"""
def __init__(self, build_dir, output_path, two_labels, num_workers,
threshold_fraction):
"""Constructor.
Args:
build_dir: Path to the build directory.
output_path: Path with the pngs and where the html will be created.
two_labels: Tuple of two strings that name the subdirectories in
output_path containing the images.
num_workers: Number of worker threads to start.
threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which
an image is considered to have only small changes. They will not be
displayed on the HTML, only listed.
"""
self.build_dir = build_dir
self.output_path = output_path
self.two_labels = two_labels
self.num_workers = num_workers
self.threshold = threshold_fraction * 100
def Run(self, open_in_browser):
"""Runs the comparison and generates an HTML with the results.
Returns:
Exit status.
"""
# Running a test defines a number of attributes on the fly.
# pylint: disable=attribute-defined-outside-init
if len(self.two_labels) != 2:
print('two_labels must be a tuple of length 2', file=sys.stderr)
return 1
finder = DirectoryFinder(self.build_dir)
self.img_diff_bin = finder.ExecutablePath('pdfium_diff')
html_path = os.path.join(self.output_path, 'compare.html')
self.diff_path = os.path.join(self.output_path, 'diff')
if not os.path.exists(self.diff_path):
os.makedirs(self.diff_path)
self.image_locations = ImageLocations(self.output_path, self.diff_path,
self.two_labels)
difference = self._GenerateDiffs()
small_changes = []
with open(html_path, 'w') as f:
f.write('<html><body>')
f.write('<table>')
for image in self.image_locations.Images():
diff = difference[image]
if diff is None:
print('Failed to compare image %s' % image, file=sys.stderr)
elif diff > self.threshold:
self._WriteImageRows(f, image, diff)
else:
small_changes.append((image, diff))
self._WriteSmallChanges(f, small_changes)
f.write('</table>')
f.write('</body></html>')
if open_in_browser:
webbrowser.open(html_path)
return 0
def _GenerateDiffs(self):
"""Runs a diff over all pairs of page images, producing diff images.
As a side effect, the diff images will be saved to [output_path]/diff
with the same image name.
Returns:
A dict mapping image names to percentage of pixels changes.
"""
difference = {}
pool = multiprocessing.Pool(self.num_workers)
worker_func = functools.partial(GenerateOneDiffParallel, self)
try:
# The timeout is a workaround for http://bugs.python.org/issue8296
# which prevents KeyboardInterrupt from working.
one_year_in_seconds = 3600 * 24 * 365
worker_results = (
pool.map_async(
worker_func,
self.image_locations.Images()).get(one_year_in_seconds))
for worker_result in worker_results:
image, result = worker_result
difference[image] = result
except KeyboardInterrupt:
pool.terminate()
sys.exit(1)
else:
pool.close()
pool.join()
return difference
def GenerateOneDiff(self, image):
"""Runs a diff over one pair of images, producing a diff image.
As a side effect, the diff image will be saved to [output_path]/diff
with the same image name.
Args:
image: Page image to compare.
Returns:
A tuple (image, diff), where image is the parameter and diff is the
percentage of pixels changed.
"""
try:
subprocess.check_output([
self.img_diff_bin,
self.image_locations.Left(image),
self.image_locations.Right(image)
])
except subprocess.CalledProcessError as e:
percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0])
else:
return image, 0
try:
subprocess.check_output([
self.img_diff_bin, '--diff',
self.image_locations.Left(image),
self.image_locations.Right(image),
self.image_locations.Diff(image)
])
except subprocess.CalledProcessError as e:
return image, percentage_change
else:
print('Warning: Should have failed the previous diff.', file=sys.stderr)
return image, 0
def _GetRelativePath(self, absolute_path):
return os.path.relpath(absolute_path, start=self.output_path)
def _WriteImageRows(self, f, image, diff):
"""Write table rows for a page image comparing its two versions.
Args:
f: Open HTML file to write to.
image: Image file name.
diff: Percentage of different pixels.
"""
f.write('<tr><td colspan="2">')
f.write('%s (%.4f%% changed)' % (image, diff))
f.write('</td></tr>')
f.write('<tr>')
self._WritePageCompareTd(
f, self._GetRelativePath(self.image_locations.Left(image)),
self._GetRelativePath(self.image_locations.Right(image)))
self._WritePageTd(f, self._GetRelativePath(
self.image_locations.Diff(image)))
f.write('</tr>')
def _WritePageTd(self, f, image_path):
"""Write table column with a single image.
Args:
f: Open HTML file to write to.
image_path: Path to image file.
"""
f.write('<td>')
f.write('<img src="%s">' % image_path)
f.write('</td>')
def _WritePageCompareTd(self, f, normal_image_path, hover_image_path):
"""Write table column for an image comparing its two versions.
Args:
f: Open HTML file to write to.
normal_image_path: Path to image to be used in the "normal" state.
hover_image_path: Path to image to be used in the "hover" state.
"""
f.write('<td>')
f.write('<img src="%s" '
'onmouseover="this.src=\'%s\';" '
'onmouseout="this.src=\'%s\';">' %
(normal_image_path, hover_image_path, normal_image_path))
f.write('</td>')
def _WriteSmallChanges(self, f, small_changes):
"""Write table rows for all images considered to have only small changes.
Args:
f: Open HTML file to write to.
small_changes: List of (image, change) tuples, where image is the page
image and change is the percentage of pixels changed.
"""
for image, change in small_changes:
f.write('<tr><td colspan="2">')
if not change:
f.write('No change for: %s' % image)
else:
f.write('Small change of %.4f%% for: %s' % (change, image))
f.write('</td></tr>')
class ImageLocations(object):
"""Contains the locations of input and output image files.
"""
def __init__(self, output_path, diff_path, two_labels):
"""Constructor.
Args:
output_path: Path to directory with the pngs.
diff_path: Path to directory where the diffs will be generated.
two_labels: Tuple of two strings that name the subdirectories in
output_path containing the images.
"""
self.output_path = output_path
self.diff_path = diff_path
self.two_labels = two_labels
self.left = self._FindImages(self.two_labels[0])
self.right = self._FindImages(self.two_labels[1])
self.images = list(self.left.viewkeys() & self.right.viewkeys())
# Sort by pdf filename, then page number
def KeyFn(s):
pieces = s.rsplit('.', 2)
return (pieces[0], int(pieces[1]))
self.images.sort(key=KeyFn)
self.diff = {
image: os.path.join(self.diff_path, image) for image in self.images
}
def _FindImages(self, label):
"""Traverses a dir and builds a dict of all page images to compare in it.
Args:
label: name of subdirectory of output_path to traverse.
Returns:
Dict mapping page image names to the path of the image file.
"""
image_path_matcher = os.path.join(self.output_path, label, '*.*.png')
image_paths = glob.glob(image_path_matcher)
image_dict = {
os.path.split(image_path)[1]: image_path for image_path in image_paths
}
return image_dict
def Images(self):
"""Returns a list of all page images present in both directories."""
return self.images
def Left(self, test_case):
"""Returns the path for a page image in the first subdirectory."""
return self.left[test_case]
def Right(self, test_case):
"""Returns the path for a page image in the second subdirectory."""
return self.right[test_case]
def Diff(self, test_case):
"""Returns the path for a page diff image."""
return self.diff[test_case]