Add script to compare performance of two versions of pdfium.

Run from the pdfium root:

$ testing/tools/safetynet_compare.py testing
This compares the current branch with and without local changes.

$ testing/tools/safetynet_compare.py testing/corpus --branch-before x
This compares the current branch + local changes against branch x.
It runs only the corpus tests.

$ testing/tools/safetynet_compare.py testing --branch-before x
    --branch-after y --build-dir=~/output_compare
This compares branch x and branch y. x and y can be revision hashes.
The callgrind.out files of cases with significant changes will be
created in ~/output_compare.

$ testing/tools/safetynet_compare.py -h
Print all options.

Change-Id: I43aaf5fe890745db611fb3bc00a656ef799fdfef
Reviewed-on: https://pdfium-review.googlesource.com/7390
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
diff --git a/testing/tools/common.py b/testing/tools/common.py
index b6e4a7d..c3bc218 100755
--- a/testing/tools/common.py
+++ b/testing/tools/common.py
@@ -5,6 +5,7 @@
 
 import glob
 import os
+import re
 import subprocess
 import sys
 
@@ -96,3 +97,14 @@
     if other_components:
       result = os.path.join(result, other_components)
     return result
+
+
+def GetBooleanGnArg(arg_name, build_dir):
+    '''Extract the value of a boolean flag in args.gn'''
+    cwd = os.getcwd()
+    os.chdir(build_dir)
+    gn_args_output = subprocess.check_output(
+        ['gn', 'args', '.', '--list=%s' % arg_name, '--short'])
+    os.chdir(cwd)
+    arg_match_output = re.search('%s = (.*)' % arg_name, gn_args_output).group(1)
+    return arg_match_output == 'true'
diff --git a/testing/tools/githelper.py b/testing/tools/githelper.py
new file mode 100644
index 0000000..42cc57d
--- /dev/null
+++ b/testing/tools/githelper.py
@@ -0,0 +1,50 @@
+# Copyright 2017 The PDFium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Classes for dealing with git."""
+
+import subprocess
+
+
+class GitHelper(object):
+  """Issues git commands. Stateful."""
+
+  def __init__(self):
+    self.stashed = 0
+
+  def Checkout(self, branch):
+    """Checks out a branch."""
+    subprocess.check_output(['git', 'checkout', branch])
+
+  def StashPush(self):
+    """Stashes uncommitted changes."""
+    output = subprocess.check_output(['git', 'stash', '--include-untracked'])
+    if 'No local changes to save' in output:
+      return False
+
+    self.stashed += 1
+    return True
+
+  def StashPopAll(self):
+    """Pops as many changes as this instance stashed."""
+    while self.stashed > 0:
+      subprocess.check_output(['git', 'stash', 'pop'])
+      self.stashed -= 1
+
+  def GetCurrentBranchName(self):
+    """Returns a string with the current branch name."""
+    return subprocess.check_output(
+        ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
+
+  def BranchExists(self, branch_name):
+    """Return whether a branch with the given name exists."""
+    try:
+      subprocess.check_output(['git', 'rev-parse', '--verify',
+                               branch_name])
+      return True
+    except subprocess.CalledProcessError:
+      return False
+
+  def CloneLocal(self, source_repo, new_repo):
+    subprocess.check_call(['git', 'clone', source_repo, new_repo])
diff --git a/testing/tools/safetynet_compare.py b/testing/tools/safetynet_compare.py
new file mode 100755
index 0000000..e54bec5
--- /dev/null
+++ b/testing/tools/safetynet_compare.py
@@ -0,0 +1,680 @@
+#!/usr/bin/env python
+# Copyright 2017 The PDFium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Compares the performance of two versions of the pdfium code."""
+
+import argparse
+import functools
+import json
+import multiprocessing
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+
+from common import GetBooleanGnArg
+from githelper import GitHelper
+from safetynet_conclusions import ComparisonConclusions
+from safetynet_conclusions import PrintConclusionsDictHumanReadable
+from safetynet_conclusions import RATING_IMPROVEMENT
+from safetynet_conclusions import RATING_REGRESSION
+
+
+def PrintErr(s):
+  """Prints s to stderr."""
+  print >> sys.stderr, s
+
+
+def RunSingleTestCaseParallel(this, run_label, build_dir, test_case):
+  result = this.RunSingleTestCase(run_label, build_dir, test_case)
+  return (test_case, result)
+
+
+class CompareRun(object):
+  """A comparison between two branches of pdfium."""
+
+  def __init__(self, args):
+    self.git = GitHelper()
+    self.args = args
+    self._InitPaths()
+
+  def _InitPaths(self):
+    if self.args.this_repo:
+      measure_script_path = os.path.join(self.args.build_dir,
+                                         'safetynet_measure_current.py')
+    else:
+      measure_script_path = 'testing/tools/safetynet_measure.py'
+    self.safe_measure_script_path = os.path.abspath(measure_script_path)
+
+    input_file_re = re.compile('^.+[.]pdf$')
+    self.test_cases = []
+    for input_path in self.args.input_paths:
+      if os.path.isfile(input_path):
+        self.test_cases.append(input_path)
+      elif os.path.isdir(input_path):
+        for file_dir, _, filename_list in os.walk(input_path):
+          for input_filename in filename_list:
+            if input_file_re.match(input_filename):
+              file_path = os.path.join(file_dir, input_filename)
+              if os.path.isfile(file_path):
+                self.test_cases.append(file_path)
+
+    self.after_build_dir = self.args.build_dir
+    if self.args.build_dir_before:
+      self.before_build_dir = self.args.build_dir_before
+    else:
+      self.before_build_dir = self.after_build_dir
+
+  def Run(self):
+    """Runs comparison by checking out branches, building and measuring them.
+
+    Returns:
+      Exit code for the script.
+    """
+    if self.args.this_repo:
+      self._FreezeMeasureScript()
+
+    if self.args.branch_after:
+      if self.args.this_repo:
+        before, after = self._ProfileTwoOtherBranchesInThisRepo(
+            self.args.branch_before,
+            self.args.branch_after)
+      else:
+        before, after = self._ProfileTwoOtherBranches(
+            self.args.branch_before,
+            self.args.branch_after)
+    elif self.args.branch_before:
+      if self.args.this_repo:
+        before, after = self._ProfileCurrentAndOtherBranchInThisRepo(
+            self.args.branch_before)
+      else:
+        before, after = self._ProfileCurrentAndOtherBranch(
+            self.args.branch_before)
+    else:
+      if self.args.this_repo:
+        before, after = self._ProfileLocalChangesAndCurrentBranchInThisRepo()
+      else:
+        before, after = self._ProfileLocalChangesAndCurrentBranch()
+
+    conclusions = self._DrawConclusions(before, after)
+    conclusions_dict = conclusions.GetOutputDict()
+
+    self._PrintConclusions(conclusions_dict)
+
+    self._CleanUp(conclusions)
+
+    return 0
+
+  def _FreezeMeasureScript(self):
+    """Freezes a version of the measuring script.
+
+    This is needed to make sure we are comparing the pdfium library changes and
+    not script changes that may happen between the two branches.
+    """
+    subprocess.check_output(['cp', 'testing/tools/safetynet_measure.py',
+                             self.safe_measure_script_path])
+
+  def _ProfileTwoOtherBranchesInThisRepo(self, before_branch, after_branch):
+    """Profiles two branches that are not the current branch.
+
+    This is done in the local repository and changes may not be restored if the
+    script fails or is interrupted.
+
+    after_branch does not need to descend from before_branch, they will be
+    measured the same way
+
+    Args:
+      before_branch: One branch to profile.
+      after_branch: Other branch to profile.
+
+    Returns:
+      A tuple (before, after), where each of before and after is a dict
+      mapping a test case name to the the profiling values for that test case
+      in the given branch.
+    """
+    branch_to_restore = self.git.GetCurrentBranchName()
+
+    self._StashLocalChanges()
+
+    self._CheckoutBranch(after_branch)
+    self._BuildCurrentBranch(self.after_build_dir)
+    after = self._MeasureCurrentBranch('after', self.after_build_dir)
+
+    self._CheckoutBranch(before_branch)
+    self._BuildCurrentBranch(self.before_build_dir)
+    before = self._MeasureCurrentBranch('before', self.before_build_dir)
+
+    self._CheckoutBranch(branch_to_restore)
+    self._RestoreLocalChanges()
+
+    return before, after
+
+  def _ProfileTwoOtherBranches(self, before_branch, after_branch):
+    """Profiles two branches that are not the current branch.
+
+    This is done in new, cloned repositories, therefore it is safer but slower
+    and requires downloads.
+
+    after_branch does not need to descend from before_branch, they will be
+    measured the same way
+
+    Args:
+      before_branch: One branch to profile.
+      after_branch: Other branch to profile.
+
+    Returns:
+      A tuple (before, after), where each of before and after is a dict
+      mapping a test case name to the the profiling values for that test case
+      in the given branch.
+    """
+    after = self._ProfileSeparateRepo('after',
+                                      self.after_build_dir,
+                                      after_branch)
+    before = self._ProfileSeparateRepo('before',
+                                       self.before_build_dir,
+                                       before_branch)
+    return before, after
+
+  def _ProfileCurrentAndOtherBranchInThisRepo(self, other_branch):
+    """Profiles the current branch (with uncommitted changes) and another one.
+
+    This is done in the local repository and changes may not be restored if the
+    script fails or is interrupted.
+
+    The current branch does not need to descend from other_branch.
+
+    Args:
+      other_branch: Other branch to profile that is not the current.
+
+    Returns:
+      A tuple (before, after), where each of before and after is a dict
+      mapping a test case name to the the profiling values for that test case
+      in the given branch. The current branch is considered to be "after" and
+      the other branch is considered to be "before".
+    """
+    branch_to_restore = self.git.GetCurrentBranchName()
+
+    self._BuildCurrentBranch(self.after_build_dir)
+    after = self._MeasureCurrentBranch('after', self.after_build_dir)
+
+    self._StashLocalChanges()
+
+    self._CheckoutBranch(other_branch)
+    self._BuildCurrentBranch(self.before_build_dir)
+    before = self._MeasureCurrentBranch('before', self.before_build_dir)
+
+    self._CheckoutBranch(branch_to_restore)
+    self._RestoreLocalChanges()
+
+    return before, after
+
+  def _ProfileCurrentAndOtherBranch(self, other_branch):
+    """Profiles the current branch (with uncommitted changes) and another one.
+
+    This is done in new, cloned repositories, therefore it is safer but slower
+    and requires downloads.
+
+    The current branch does not need to descend from other_branch.
+
+    Args:
+      other_branch: Other branch to profile that is not the current. None will
+          compare to the same branch.
+
+    Returns:
+      A tuple (before, after), where each of before and after is a dict
+      mapping a test case name to the the profiling values for that test case
+      in the given branch. The current branch is considered to be "after" and
+      the other branch is considered to be "before".
+    """
+    self._BuildCurrentBranch(self.after_build_dir)
+    after = self._MeasureCurrentBranch('after', self.after_build_dir)
+
+    before = self._ProfileSeparateRepo('before',
+                                       self.before_build_dir,
+                                       other_branch)
+
+    return before, after
+
+  def _ProfileLocalChangesAndCurrentBranchInThisRepo(self):
+    """Profiles the current branch with and without uncommitted changes.
+
+    This is done in the local repository and changes may not be restored if the
+    script fails or is interrupted.
+
+    Returns:
+      A tuple (before, after), where each of before and after is a dict
+      mapping a test case name to the the profiling values for that test case
+      using the given version. The current branch without uncommitted changes is
+      considered to be "before" and with uncommitted changes is considered to be
+      "after".
+    """
+    self._BuildCurrentBranch(self.after_build_dir)
+    after = self._MeasureCurrentBranch('after', self.after_build_dir)
+
+    pushed = self._StashLocalChanges()
+    if not pushed and not self.args.build_dir_before:
+      PrintErr('Warning: No local changes to compare')
+
+    before_build_dir = self.before_build_dir
+
+    self._BuildCurrentBranch(before_build_dir)
+    before = self._MeasureCurrentBranch('before', before_build_dir)
+
+    self._RestoreLocalChanges()
+
+    return before, after
+
+  def _ProfileLocalChangesAndCurrentBranch(self):
+    """Profiles the current branch with and without uncommitted changes.
+
+    This is done in new, cloned repositories, therefore it is safer but slower
+    and requires downloads.
+
+    Returns:
+      A tuple (before, after), where each of before and after is a dict
+      mapping a test case name to the the profiling values for that test case
+      using the given version. The current branch without uncommitted changes is
+      considered to be "before" and with uncommitted changes is considered to be
+      "after".
+    """
+    return self._ProfileCurrentAndOtherBranch(other_branch=None)
+
+  def _ProfileSeparateRepo(self, run_label, relative_build_dir, branch):
+    """Profiles a branch in a a temporary git repository.
+
+    Args:
+      run_label: String to differentiate this version of the code in output
+          files from other versions.
+      relative_build_dir: Path to the build dir in the current working dir to
+          clone build args from.
+      branch: Branch to checkout in the new repository. None will
+          profile the same branch checked out in the original repo.
+    Returns:
+      A dict mapping each test case name to the the profiling values for that
+      test case.
+    """
+    build_dir = self._CreateTempRepo('repo_%s' % run_label,
+                                     relative_build_dir,
+                                     branch)
+
+    self._BuildCurrentBranch(build_dir)
+    return self._MeasureCurrentBranch(run_label, build_dir)
+
+  def _CreateTempRepo(self, dir_name, relative_build_dir, branch):
+    """Clones a temporary git repository out of the current working dir.
+
+    Args:
+      dir_name: Name for the temporary repository directory
+      relative_build_dir: Path to the build dir in the current working dir to
+          clone build args from.
+      branch: Branch to checkout in the new repository. None will keep checked
+          out the same branch as the local repo.
+    Returns:
+      Path to the build directory of the new repository.
+    """
+    cwd = os.getcwd()
+
+    repo_dir = tempfile.mkdtemp(suffix='-%s' % dir_name)
+    src_dir = os.path.join(repo_dir, 'pdfium')
+
+    self.git.CloneLocal(os.getcwd(), src_dir)
+
+    if branch is not None:
+      os.chdir(src_dir)
+      self.git.Checkout(branch)
+
+    os.chdir(repo_dir)
+    PrintErr('Syncing...')
+
+    cmd = ['gclient', 'config', '--unmanaged',
+           'https://pdfium.googlesource.com/pdfium.git']
+    if self.args.cache_dir:
+      cmd.append('--cache-dir=%s' % self.args.cache_dir)
+    subprocess.check_output(cmd)
+
+    subprocess.check_output(['gclient', 'sync'])
+    PrintErr('Done.')
+
+    build_dir = os.path.join(src_dir, relative_build_dir)
+    os.makedirs(build_dir)
+    os.chdir(src_dir)
+
+    source_gn_args = os.path.join(cwd, relative_build_dir, 'args.gn')
+    dest_gn_args = os.path.join(build_dir, 'args.gn')
+    shutil.copy(source_gn_args, dest_gn_args)
+
+    subprocess.check_output(['gn', 'gen', relative_build_dir])
+
+    os.chdir(cwd)
+
+    return build_dir
+
+
+  def _CheckoutBranch(self, branch):
+    PrintErr("Checking out branch '%s'" % branch)
+    self.git.Checkout(branch)
+
+  def _StashLocalChanges(self):
+    PrintErr('Stashing local changes')
+    return self.git.StashPush()
+
+  def _RestoreLocalChanges(self):
+    PrintErr('Restoring local changes')
+    self.git.StashPopAll()
+
+  def _BuildCurrentBranch(self, build_dir):
+    """Synchronizes and builds the current version of pdfium.
+
+    Args:
+      build_dir: String with path to build directory
+    """
+    PrintErr('Syncing...')
+    subprocess.check_output(['gclient', 'sync'])
+    PrintErr('Done.')
+
+    cmd = ['ninja', '-C', build_dir, 'pdfium_test']
+
+    if GetBooleanGnArg('use_goma', build_dir):
+      cmd.extend(['-j', '250'])
+
+    PrintErr('Building...')
+    subprocess.check_output(cmd)
+    PrintErr('Done.')
+
+  def _MeasureCurrentBranch(self, run_label, build_dir):
+    PrintErr('Measuring...')
+    if self.args.num_workers > 1 and len(self.test_cases) > 1:
+      results = self._RunAsync(run_label, build_dir)
+    else:
+      results = self._RunSync(run_label, build_dir)
+    PrintErr('Done.')
+
+    return results
+
+  def _RunSync(self, run_label, build_dir):
+    """Profiles the test cases synchronously.
+
+    Args:
+      run_label: String to differentiate this version of the code in output
+          files from other versions.
+      build_dir: String with path to build directory
+
+    Returns:
+      A dict mapping each test case name to the the profiling values for that
+      test case.
+    """
+    results = {}
+
+    for test_case in self.test_cases:
+      result = self.RunSingleTestCase(run_label, build_dir, test_case)
+      if result is not None:
+        results[test_case] = result
+
+    return results
+
+  def _RunAsync(self, run_label, build_dir):
+    """Profiles the test cases asynchronously.
+
+    Uses as many workers as configured by --num-workers.
+
+    Args:
+      run_label: String to differentiate this version of the code in output
+          files from other versions.
+      build_dir: String with path to build directory
+
+    Returns:
+      A dict mapping each test case name to the the profiling values for that
+      test case.
+    """
+    results = {}
+    pool = multiprocessing.Pool(self.args.num_workers)
+    worker_func = functools.partial(
+        RunSingleTestCaseParallel, self, run_label, build_dir)
+
+    try:
+      # The timeout is a workaround for http://bugs.python.org/issue8296
+      # which prevents KeyboardInterrupt from working.
+      one_year_in_seconds = 3600 * 24 * 365
+      worker_results = (pool.map_async(worker_func, self.test_cases)
+                        .get(one_year_in_seconds))
+      for worker_result in worker_results:
+        test_case, result = worker_result
+        if result is not None:
+          results[test_case] = result
+    except KeyboardInterrupt:
+      pool.terminate()
+      sys.exit(1)
+    else:
+      pool.close()
+
+    pool.join()
+
+    return results
+
+  def RunSingleTestCase(self, run_label, build_dir, test_case):
+    """Profiles a single test case.
+
+    Args:
+      run_label: String to differentiate this version of the code in output
+          files from other versions.
+      build_dir: String with path to build directory
+      test_case: Path to the test case.
+
+    Returns:
+      The measured profiling value for that test case.
+    """
+    command = [self.safe_measure_script_path, test_case,
+               '--build-dir=%s' % build_dir]
+
+    if self.args.interesting_section:
+      command.append('--interesting-section')
+
+    if self.args.profiler:
+      command.append('--profiler=%s' % self.args.profiler)
+
+    profile_file_path = self._GetProfileFilePath(run_label, test_case)
+    if profile_file_path:
+      command.append('--output-path=%s' % profile_file_path)
+
+    try:
+      output = subprocess.check_output(command)
+    except subprocess.CalledProcessError as e:
+      PrintErr(e)
+      PrintErr(35 * '=' + '  Output:  ' + 34 * '=')
+      PrintErr(e.output)
+      PrintErr(80 * '=')
+      return None
+
+    # Get the time number as output, making sure it's just a number
+    output = output.strip()
+    if re.match('^[0-9]+$', output):
+      return int(output)
+
+    return None
+
+  def _GetProfileFilePath(self, run_label, test_case):
+    if self.args.output_dir:
+      output_filename = ('callgrind.out.%s.%s'
+                         % (test_case.replace('/', '_'),
+                            run_label))
+      return os.path.join(self.args.output_dir, output_filename)
+    else:
+      return None
+
+  def _DrawConclusions(self, times_before_branch, times_after_branch):
+    """Draws conclusions comparing results of test runs in two branches.
+
+    Args:
+      times_before_branch: A dict mapping each test case name to the the
+          profiling values for that test case in the branch to be considered
+          as the baseline.
+      times_after_branch: A dict mapping each test case name to the the
+          profiling values for that test case in the branch to be considered
+          as the new version.
+
+    Returns:
+      ComparisonConclusions with all test cases processed.
+    """
+    conclusions = ComparisonConclusions(self.args.threshold_significant)
+
+    for test_case in sorted(self.test_cases):
+      before = times_before_branch.get(test_case)
+      after = times_after_branch.get(test_case)
+      conclusions.ProcessCase(test_case, before, after)
+
+    return conclusions
+
+  def _PrintConclusions(self, conclusions_dict):
+    """Prints the conclusions as the script output.
+
+    Depending on the script args, this can output a human or a machine-readable
+    version of the conclusions.
+
+    Args:
+      conclusions_dict: Dict to print returned from
+          ComparisonConclusions.GetOutputDict().
+    """
+    if self.args.machine_readable:
+      print json.dumps(conclusions_dict)
+    else:
+      PrintConclusionsDictHumanReadable(
+          conclusions_dict, colored=True, key=self.args.case_order)
+
+  def _CleanUp(self, conclusions):
+    """Removes profile output files for uninteresting cases.
+
+    Cases without significant regressions or improvements and considered
+    uninteresting.
+
+    Args:
+      conclusions: A ComparisonConclusions.
+    """
+    if not self.args.output_dir:
+      return
+
+    if self.args.profiler != 'callgrind':
+      return
+
+    for case_result in conclusions.GetCaseResults().values():
+      if case_result.rating not in [RATING_REGRESSION, RATING_IMPROVEMENT]:
+        self._CleanUpOutputFile('before', case_result.case_name)
+        self._CleanUpOutputFile('after', case_result.case_name)
+
+  def _CleanUpOutputFile(self, run_label, case_name):
+    """Removes one profile output file.
+
+    If the output file does not exist, fails silently.
+
+    Args:
+      run_label: String to differentiate a version of the code in output
+          files from other versions.
+      case_name: String identifying test case for which to remove the output
+          file.
+    """
+    try:
+      os.remove(self._GetProfileFilePath(run_label, case_name))
+    except OSError:
+      pass
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('input_paths', nargs='+',
+                      help='pdf files or directories to search for pdf files '
+                           'to run as test cases')
+  parser.add_argument('--branch-before',
+                      help='git branch to use as "before" for comparison. '
+                           'Omitting this will use the current branch '
+                           'without uncommitted changes as the baseline.')
+  parser.add_argument('--branch-after',
+                      help='git branch to use as "after" for comparison. '
+                           'Omitting this will use the current branch '
+                           'with uncommitted changes.')
+  parser.add_argument('--build-dir', default=os.path.join('out', 'Release'),
+                      help='relative path from the base source directory '
+                           'to the build directory')
+  parser.add_argument('--build-dir-before',
+                      help='relative path from the base source directory '
+                           'to the build directory for the "before" branch, if '
+                           'different from the build directory for the '
+                           '"after" branch')
+  parser.add_argument('--cache-dir', default=None,
+                      help='directory with a new or preexisting cache for '
+                           'downloads. Default is to not use a cache.')
+  parser.add_argument('--this-repo', action='store_true',
+                      help='use the repository where the script is instead of '
+                           'checking out a temporary one. This is faster and '
+                           'does not require downloads, but although it '
+                           'restores the state of the local repo, if the '
+                           'script is killed or crashes the changes can remain '
+                           'stashed and you may be on another branch.')
+  parser.add_argument('--profiler', default='callgrind',
+                      help='which profiler to use. Supports callgrind and '
+                           'perfstat for now. Default is callgrind.')
+  parser.add_argument('--interesting-section', action='store_true',
+                      help='whether to measure just the interesting section or '
+                           'the whole test harness. Limiting to only the '
+                           'interesting section does not work on Release since '
+                           'the delimiters are optimized out')
+  parser.add_argument('--num-workers', default=multiprocessing.cpu_count(),
+                      type=int, help='run NUM_WORKERS jobs in parallel')
+  parser.add_argument('--output-dir',
+                      help='directory to write the profile data output files')
+  parser.add_argument('--threshold-significant', default=0.02, type=float,
+                      help='variations in performance above this factor are '
+                           'considered significant')
+  parser.add_argument('--machine-readable', action='store_true',
+                      help='whether to get output for machines. If enabled the '
+                           'output will be a json with the format specified in '
+                           'ComparisonConclusions.GetOutputDict(). Default is '
+                           'human-readable.')
+  parser.add_argument('--case-order', default=None,
+                      help='what key to use when sorting test cases in the '
+                           'output. Accepted values are "after", "before", '
+                           '"ratio" and "rating". Default is sorting by test '
+                           'case path.')
+
+  args = parser.parse_args()
+
+  # Always start at the pdfium src dir, which is assumed to be two level above
+  # this script.
+  pdfium_src_dir = os.path.join(
+      os.path.dirname(__file__),
+      os.path.pardir,
+      os.path.pardir)
+  os.chdir(pdfium_src_dir)
+
+  git = GitHelper()
+
+  if args.branch_after and not args.branch_before:
+    PrintErr('--branch-after requires --branch-before to be specified.')
+    return 1
+
+  if args.branch_after and not git.BranchExists(args.branch_after):
+    PrintErr('Branch "%s" does not exist' % args.branch_after)
+    return 1
+
+  if args.branch_before and not git.BranchExists(args.branch_before):
+    PrintErr('Branch "%s" does not exist' % args.branch_before)
+    return 1
+
+  if args.output_dir:
+    args.output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.isdir(args.output_dir):
+      PrintErr('"%s" is not a directory' % args.output_dir)
+      return 1
+
+  if args.threshold_significant <= 0.0:
+    PrintErr('--threshold-significant should receive a positive float')
+    return 1
+
+  run = CompareRun(args)
+  return run.Run()
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/testing/tools/safetynet_conclusions.py b/testing/tools/safetynet_conclusions.py
new file mode 100644
index 0000000..112274e
--- /dev/null
+++ b/testing/tools/safetynet_conclusions.py
@@ -0,0 +1,297 @@
+# Copyright 2017 The PDFium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Classes that draw conclusions out of a comparison and represent them."""
+
+from collections import Counter
+
+
+FORMAT_RED = '\033[01;31m{0}\033[00m'
+FORMAT_GREEN = '\033[01;32m{0}\033[00m'
+FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
+FORMAT_CYAN = '\033[01;36m{0}\033[00m'
+FORMAT_NORMAL = '{0}'
+
+RATING_FAILURE = 'failure'
+RATING_REGRESSION = 'regression'
+RATING_IMPROVEMENT = 'improvement'
+RATING_NO_CHANGE = 'no_change'
+RATING_SMALL_CHANGE = 'small_change'
+
+RATINGS = [
+    RATING_FAILURE,
+    RATING_REGRESSION,
+    RATING_IMPROVEMENT,
+    RATING_NO_CHANGE,
+    RATING_SMALL_CHANGE
+]
+
+RATING_TO_COLOR = {
+    RATING_FAILURE: FORMAT_MAGENTA,
+    RATING_REGRESSION: FORMAT_RED,
+    RATING_IMPROVEMENT: FORMAT_CYAN,
+    RATING_NO_CHANGE: FORMAT_GREEN,
+    RATING_SMALL_CHANGE: FORMAT_NORMAL,
+}
+
+
+class ComparisonConclusions(object):
+  """All conclusions drawn from a comparison.
+
+  This is initialized empty and then processes pairs of results for each test
+  case, determining the rating for that case, which can be:
+  "failure" if either or both runs for the case failed.
+  "regression" if there is a significant increase in time for the test case.
+  "improvement" if there is a significant decrease in time for the test case.
+  "no_change" if the time for the test case did not change at all.
+  "small_change" if the time for the test case changed but within the threshold.
+  """
+
+  def __init__(self, threshold_significant):
+    """Initializes an empty ComparisonConclusions.
+
+    Args:
+      threshold_significant: Float with the tolerance beyond which changes in
+          measurements are considered significant.
+
+          The change is considered as a multiplication rather than an addition
+          of a fraction of the previous measurement, that is, a
+          threshold_significant of 1.0 will flag test cases that became over
+          100% slower (> 200% of the previous time measured) or over 100% faster
+          (< 50% of the previous time measured).
+
+          threshold_significant 0.02 -> 98.04% to 102% is not significant
+          threshold_significant 0.1 -> 90.9% to 110% is not significant
+          threshold_significant 0.25 -> 80% to 125% is not significant
+          threshold_significant 1 -> 50% to 200% is not significant
+          threshold_significant 4 -> 20% to 500% is not significant
+
+    """
+    self.threshold_significant = threshold_significant
+    self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1
+
+    self.params = {'threshold': threshold_significant}
+    self.summary = ComparisonSummary()
+    self.case_results = {}
+
+  def ProcessCase(self, case_name, before, after):
+    """Feeds a test case results to the ComparisonConclusions.
+
+    Args:
+      case_name: String identifying the case.
+      before: Measurement for the "before" version of the code.
+      after: Measurement for the "after" version of the code.
+    """
+
+    # Switch 0 to None to simplify the json dict output. All zeros are
+    # considered failed runs, so they will be represented by "null".
+    if not before:
+      before = None
+    if not after:
+      after = None
+
+    if not before or not after:
+      ratio = None
+      rating = RATING_FAILURE
+    else:
+      ratio = (float(after) / before) - 1.0
+      if ratio > self.threshold_significant:
+        rating = RATING_REGRESSION
+      elif ratio < self.threshold_significant_negative:
+        rating = RATING_IMPROVEMENT
+      elif ratio == 0:
+        rating = RATING_NO_CHANGE
+      else:
+        rating = RATING_SMALL_CHANGE
+
+    case_result = CaseResult(case_name, before, after, ratio, rating)
+
+    self.summary.ProcessCaseResult(case_result)
+    self.case_results[case_name] = case_result
+
+  def GetSummary(self):
+    """Gets the ComparisonSummary with consolidated totals."""
+    return self.summary
+
+  def GetCaseResults(self):
+    """Gets a dict mapping each test case identifier to its CaseResult."""
+    return self.case_results
+
+  def GetOutputDict(self):
+    """Returns a conclusions dict with all the conclusions drawn.
+
+    Returns:
+      A serializable dict with the format illustrated below:
+      {
+        "params": {
+          "threshold": 0.02
+        },
+        "summary": {
+          "total": 123,
+          "failure": 1,
+          "regression": 2,
+          "improvement": 1,
+          "no_change": 100,
+          "small_change": 19
+        },
+        "comparison_by_case": {
+          "testing/resources/new_test.pdf": {
+            "before": None,
+            "after": 1000,
+            "ratio": None,
+            "rating": "failure"
+          },
+          "testing/resources/test1.pdf": {
+            "before": 100,
+            "after": 120,
+            "ratio": 0.2,
+            "rating": "regression"
+          },
+          "testing/resources/test2.pdf": {
+            "before": 100,
+            "after": 2000,
+            "ratio": 19.0,
+            "rating": "regression"
+          },
+          "testing/resources/test3.pdf": {
+            "before": 1000,
+            "after": 1005,
+            "ratio": 0.005,
+            "rating": "small_change"
+          },
+          "testing/resources/test4.pdf": {
+            "before": 1000,
+            "after": 1000,
+            "ratio": 0.0,
+            "rating": "no_change"
+          },
+          "testing/resources/test5.pdf": {
+            "before": 1000,
+            "after": 600,
+            "ratio": -0.4,
+            "rating": "improvement"
+          }
+        }
+      }
+    """
+    output_dict = {}
+    output_dict['params'] = {'threshold': self.threshold_significant}
+    output_dict['summary'] = self.summary.GetOutputDict()
+    output_dict['comparison_by_case'] = {
+        cr.case_name: cr.GetOutputDict()
+        for cr in self.GetCaseResults().values()
+    }
+    return output_dict
+
+
+class ComparisonSummary(object):
+  """Totals computed for a comparison."""
+
+  def __init__(self):
+    self.rating_counter = Counter()
+
+  def ProcessCaseResult(self, case_result):
+    self.rating_counter[case_result.rating] += 1
+
+  def GetTotal(self):
+    """Gets the number of test cases processed."""
+    return sum(self.rating_counter.values())
+
+  def GetCount(self, rating):
+    """Gets the number of test cases processed with a given rating."""
+    return self.rating_counter[rating]
+
+  def GetOutputDict(self):
+    """Returns a dict that can be serialized with all the totals."""
+    result = {'total': self.GetTotal()}
+    for rating in RATINGS:
+      result[rating] = self.GetCount(rating)
+    return result
+
+
+class CaseResult(object):
+  """The conclusion for the comparison of a single test case."""
+
+  def __init__(self, case_name, before, after, ratio, rating):
+    """Initializes an empty ComparisonConclusions.
+
+    Args:
+      case_name: String identifying the case.
+      before: Measurement for the "before" version of the code.
+      after: Measurement for the "after" version of the code.
+      ratio: Difference between |after| and |before| as a fraction of |before|.
+      rating: Rating for this test case.
+    """
+    self.case_name = case_name
+    self.before = before
+    self.after = after
+    self.ratio = ratio
+    self.rating = rating
+
+  def GetOutputDict(self):
+    """Returns a dict with the test case's conclusions."""
+    return {'before': self.before,
+            'after': self.after,
+            'ratio': self.ratio,
+            'rating': self.rating}
+
+
+def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
+  """Prints a conclusions dict in a human-readable way.
+
+  Args:
+    conclusions_dict: Dict to print.
+    colored: Whether to color the output to highlight significant changes.
+    key: String with the CaseResult dictionary key to sort the cases.
+  """
+  # Print header
+  print '=' * 80
+  print '{0:>11s} {1:>15s}  {2}' .format(
+      '% Change',
+      'Time after',
+      'Test case')
+  print '-' * 80
+
+  color = FORMAT_NORMAL
+
+  # Print cases
+  if key is not None:
+    case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems(),
+                        key=lambda kv: kv[1][key])
+  else:
+    case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())
+
+  for case_name, case_dict in case_pairs:
+    if case_dict['rating'] == RATING_FAILURE:
+      print '%s to measure time for %s' % (
+          RATING_TO_COLOR[RATING_FAILURE].format('Failed'), case_name)
+      continue
+
+    if colored:
+      color = RATING_TO_COLOR[case_dict['rating']]
+
+    print '{0} {1:15,d}  {2}' .format(
+        color.format('{:+11.4%}'.format(case_dict['ratio'])),
+        case_dict['after'],
+        case_name)
+
+  # Print totals
+  totals = conclusions_dict['summary']
+  print '=' * 80
+  print 'Test cases run: %d' % totals['total']
+
+  if colored:
+    color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
+  print ('Failed to measure: %s'
+         % color.format(totals[RATING_FAILURE]))
+
+  if colored:
+    color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
+  print ('Regressions: %s'
+         % color.format(totals[RATING_REGRESSION]))
+
+  if colored:
+    color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
+  print ('Improvements: %s'
+         % color.format(totals[RATING_IMPROVEMENT]))