Add flag to ignore images by their MD5 digest

BUG=

Review-Url: https://codereview.chromium.org/2649313005
diff --git a/testing/tools/gold.py b/testing/tools/gold.py
index 7598caf..db3bf81 100644
--- a/testing/tools/gold.py
+++ b/testing/tools/gold.py
@@ -51,7 +51,8 @@
 # }
 #
 class GoldResults(object):
-  def __init__(self, source_type, outputDir, propertiesStr, keyStr):
+  def __init__(self, source_type, outputDir, propertiesStr, keyStr,
+               ignore_hashes_file):
     """
     source_type is the source_type (=corpus) field used for all results.
     output_dir is the directory where the resulting images are copied and
@@ -60,6 +61,8 @@
                is used to set the top level fields in the output JSON file.
     keyStr is a string with space separated key/value pairs that
                is used to set the 'key' field in the output JSON file.
+    ignore_hashes_file is a file that contains a list of image hashes
+               that should be ignored.
     """
     self._source_type = source_type
     self._properties = self._parseKeyValuePairs(propertiesStr)
@@ -71,13 +74,22 @@
     if not os.path.exists(outputDir):
       os.makedirs(outputDir)
 
+    self._ignore_hashes = set()
+    if ignore_hashes_file:
+      with open(ignore_hashes_file, 'r') as ig_file:
+        hashes=[x.strip() for x in ig_file.readlines() if x.strip()]
+        self._ignore_hashes = set(hashes)
+
   def AddTestResult(self, testName, md5Hash, outputImagePath):
-    # Copy the image to <output_dir>/<md5Hash>.<image_extension>
+    # If the hash is in the list of hashes to ignore then we don'try
+    # make a copy, but add it to the result.
     imgExt = os.path.splitext(outputImagePath)[1].lstrip(".")
-    if not imgExt:
-      raise ValueError("File %s does not have an extension" % outputImagePath)
-    newFilePath = os.path.join(self._outputDir, md5Hash + '.' + imgExt)
-    shutil.copy2(outputImagePath, newFilePath)
+    if md5Hash not in self._ignore_hashes:
+      # Copy the image to <output_dir>/<md5Hash>.<image_extension>
+      if not imgExt:
+        raise ValueError("File %s does not have an extension" % outputImagePath)
+      newFilePath = os.path.join(self._outputDir, md5Hash + '.' + imgExt)
+      shutil.copy2(outputImagePath, newFilePath)
 
     # Add an entry to the list of test results
     self._results.append({
@@ -123,7 +135,11 @@
 
   keyStr = "arch arm64 compiler Clang configuration Debug"
 
-  gr = GoldResults("pdfium", testDir, propStr, keyStr)
+  hash_file = os.path.join(testDir, "ignore_hashes.txt")
+  with open(hash_file, 'wb') as f:
+    f.write("\n".join(["hash-1","hash-4"]) + "\n")
+
+  gr = GoldResults("pdfium", testDir, propStr, keyStr, hash_file)
   gr.AddTestResult("test-1", "hash-1", os.path.join(testDir, "image1.png"))
   gr.AddTestResult("test-2", "hash-2", os.path.join(testDir, "image2.png"))
   gr.AddTestResult("test-3", "hash-3", os.path.join(testDir, "image3.png"))
diff --git a/testing/tools/test_runner.py b/testing/tools/test_runner.py
index 92db911..3a31709 100644
--- a/testing/tools/test_runner.py
+++ b/testing/tools/test_runner.py
@@ -163,6 +163,9 @@
     parser.add_option('--gold_output_dir', default='', dest="gold_output_dir",
                       help='Path of where to write the JSON output to be uploaded to Gold.')
 
+    parser.add_option('--gold_ignore_hashes', default='', dest="gold_ignore_hashes",
+                      help='Path to a file with MD5 hashes we wish to ignore.')
+
     parser.add_option('--ignore_errors', action="store_true", dest="ignore_errors",
                       help='Prevents the return value from being non-zero when image comparison fails.')
 
@@ -227,7 +230,8 @@
       self.gold_results = gold.GoldResults("pdfium",
                                            options.gold_output_dir,
                                            options.gold_properties,
-                                           options.gold_key)
+                                           options.gold_key,
+                                           options.gold_ignore_hashes)
 
     if options.num_workers > 1 and len(test_cases) > 1:
       try: