Reuse expectations in --regenerate_expected
Improves --regenerate_expected heuristics as follows:
1. Reuses the least specific expected image that exists.
2. Removes non-matching or redundant expected images.
3. Detects and removes expected images for excess pages, rather than
stopping at the last "actual" page.
These improvements should reduce manual work when a change reduces
differences with less specific expected images. Removals still must be
reviewed manually, since the heuristics decide without global knowledge
about other test configurations which might still need a given image.
Fixed: pdfium:1987
Change-Id: I32393cefe95509c18cd9cc42aa5fa43b61524ec0
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/103770
Commit-Queue: K. Moon <kmoon@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/testing/tools/pngdiffer.py b/testing/tools/pngdiffer.py
index b2e70d2..91468bb 100755
--- a/testing/tools/pngdiffer.py
+++ b/testing/tools/pngdiffer.py
@@ -133,15 +133,50 @@
path_templates = _PathTemplates(input_filename, source_dir, working_dir,
self.os_name, self.suffix_order)
for page in itertools.count():
+ expected_paths = path_templates.GetExpectedPaths(page)
+
+ first_match = None
+ last_match = None
page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
- if not os.path.isfile(page_diff.actual_path):
- # No more actual pages.
+ if os.path.exists(page_diff.actual_path):
+ # Match against all expected page images.
+ for index, expected_path in enumerate(expected_paths):
+ page_diff.expected_path = expected_path
+ if not self._RunImageCompareCommand(page_diff):
+ if first_match is None:
+ first_match = index
+ last_match = index
+
+ if last_match == 0:
+ # Regeneration not needed. This case may be reached if only some, but
+ # not all, pages need to be regenerated.
+ continue
+ elif expected_paths:
+ # Remove all expected page images.
+ print(f'WARNING: {input_filename} has extra expected page {page}')
+ first_match = 0
+ last_match = len(expected_paths)
+ else:
+ # No more expected or actual pages.
break
+ # Try to reuse expectations by removing intervening non-matches.
+ #
+ # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of
+ # global knowledge about other test configurations, which is why it just
+ # creates backup files rather than immediately removing files.
+ if last_match is not None:
+ if first_match > 1:
+ print(f'WARNING: {input_filename}.{page} has non-adjacent match')
+ if first_match != last_match:
+ print(f'WARNING: {input_filename}.{page} has redundant matches')
+
+ for expected_path in expected_paths[:last_match]:
+ os.rename(expected_path, expected_path + '.bak')
+ continue
+
# Regenerate the most specific expected path that exists. If there are no
# existing expectations, regenerate the base case.
- #
- # TODO(crbug.com/pdfium/1987): Clean up redundant expectations.
expected_path = path_templates.GetExpectedPath(page)
shutil.copyfile(page_diff.actual_path, expected_path)
self._RunCommand([_PNG_OPTIMIZER, expected_path])
diff --git a/testing/tools/test_runner.py b/testing/tools/test_runner.py
index d72ca33..8ff288d 100644
--- a/testing/tools/test_runner.py
+++ b/testing/tools/test_runner.py
@@ -217,7 +217,11 @@
'--regenerate_expected',
action='store_true',
help='Regenerates expected images. For each failing image diff, this '
- 'will regenerate the most specific expected image file that exists.')
+ 'will regenerate the most specific expected image file that exists. '
+ 'This also will suggest removals of unnecessary expected image files '
+ 'by renaming them with an additional ".bak" extension, although these '
+ 'removals should be reviewed manually. Use "git clean" to quickly deal '
+ 'with any ".bak" files.')
parser.add_argument(
'--reverse-byte-order',