Reuse expectations in --regenerate_expected Improves --regenerate_expected heuristics as follows: 1. Reuses the least specific expected image that exists. 2. Removes non-matching or redundant expected images. 3. Detects and removes expected images for excess pages, rather than stopping at the last "actual" page. These improvements should reduce manual work when a change reduces differences with less specific expected images. Removals still must be reviewed manually, since the heuristics decide without global knowledge about other test configurations which might still need a given image. Fixed: pdfium:1987 Change-Id: I32393cefe95509c18cd9cc42aa5fa43b61524ec0 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/103770 Commit-Queue: K. Moon <kmoon@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org>

commit: a29e1163ad823aaa52ffb6247a6479412296039e [log] [tgz]
author: K. Moon <kmoon@chromium.org> Wed Feb 15 18:02:45 2023 +0000
committer: Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com> Wed Feb 15 18:02:45 2023 +0000
tree: 2f2b513e054385467e1970fff2220755b1118302
parent: c5387b9b172508a4acb54f5323bdb460ece72db0 [diff]
diff --git a/testing/tools/pngdiffer.py b/testing/tools/pngdiffer.py
index b2e70d2..91468bb 100755
--- a/testing/tools/pngdiffer.py
+++ b/testing/tools/pngdiffer.py

@@ -133,15 +133,50 @@
     path_templates = _PathTemplates(input_filename, source_dir, working_dir,
                                     self.os_name, self.suffix_order)
     for page in itertools.count():
+      expected_paths = path_templates.GetExpectedPaths(page)
+
+      first_match = None
+      last_match = None
       page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
-      if not os.path.isfile(page_diff.actual_path):
-        # No more actual pages.
+      if os.path.exists(page_diff.actual_path):
+        # Match against all expected page images.
+        for index, expected_path in enumerate(expected_paths):
+          page_diff.expected_path = expected_path
+          if not self._RunImageCompareCommand(page_diff):
+            if first_match is None:
+              first_match = index
+            last_match = index
+
+        if last_match == 0:
+          # Regeneration not needed. This case may be reached if only some, but
+          # not all, pages need to be regenerated.
+          continue
+      elif expected_paths:
+        # Remove all expected page images.
+        print(f'WARNING: {input_filename} has extra expected page {page}')
+        first_match = 0
+        last_match = len(expected_paths)
+      else:
+        # No more expected or actual pages.
         break
 
+      # Try to reuse expectations by removing intervening non-matches.
+      #
+      # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of
+      # global knowledge about other test configurations, which is why it just
+      # creates backup files rather than immediately removing files.
+      if last_match is not None:
+        if first_match > 1:
+          print(f'WARNING: {input_filename}.{page} has non-adjacent match')
+        if first_match != last_match:
+          print(f'WARNING: {input_filename}.{page} has redundant matches')
+
+        for expected_path in expected_paths[:last_match]:
+          os.rename(expected_path, expected_path + '.bak')
+        continue
+
       # Regenerate the most specific expected path that exists. If there are no
       # existing expectations, regenerate the base case.
-      #
-      # TODO(crbug.com/pdfium/1987): Clean up redundant expectations.
       expected_path = path_templates.GetExpectedPath(page)
       shutil.copyfile(page_diff.actual_path, expected_path)
       self._RunCommand([_PNG_OPTIMIZER, expected_path])

diff --git a/testing/tools/test_runner.py b/testing/tools/test_runner.py
index d72ca33..8ff288d 100644
--- a/testing/tools/test_runner.py
+++ b/testing/tools/test_runner.py

@@ -217,7 +217,11 @@
         '--regenerate_expected',
         action='store_true',
         help='Regenerates expected images. For each failing image diff, this '
-        'will regenerate the most specific expected image file that exists.')
+        'will regenerate the most specific expected image file that exists. '
+        'This also will suggest removals of unnecessary expected image files '
+        'by renaming them with an additional ".bak" extension, although these '
+        'removals should be reviewed manually. Use "git clean" to quickly deal '
+        'with any ".bak" files.')
 
     parser.add_argument(
         '--reverse-byte-order',
commit	a29e1163ad823aaa52ffb6247a6479412296039e	[log] [tgz]
author	K. Moon <kmoon@chromium.org>	Wed Feb 15 18:02:45 2023 +0000
committer	Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com>	Wed Feb 15 18:02:45 2023 +0000
tree	2f2b513e054385467e1970fff2220755b1118302
parent	c5387b9b172508a4acb54f5323bdb460ece72db0 [diff]