| #!/usr/bin/env python3 |
| # Copyright 2015 The PDFium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| from dataclasses import dataclass |
| import itertools |
| import os |
| import shutil |
| import subprocess |
| import sys |
| |
| EXACT_MATCHING = 'exact' |
| FUZZY_MATCHING = 'fuzzy' |
| |
| _PNG_OPTIMIZER = 'optipng' |
| |
| # Each suffix order acts like a path along a tree, with the leaves being the |
| # most specific, and the root being the least specific. |
| _COMMON_SUFFIX_ORDER = ('_{os}', '') |
| _AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER |
| _GDI_AGG_SUFFIX_ORDER = ('_gdi_agg_{os}', '_gdi_agg', '_gdi_{os}', |
| '_gdi') + _COMMON_SUFFIX_ORDER |
| _GDI_SKIA_SUFFIX_ORDER = ('_gdi_skia_{os}', '_gdi_skia', '_gdi_{os}', |
| '_gdi') + _COMMON_SUFFIX_ORDER |
| _SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER |
| |
| |
| @dataclass |
| class ImageDiff: |
| """Details about an image diff. |
| |
| Attributes: |
| actual_path: Path to the actual image file. |
| expected_path: Path to the expected image file, or `None` if no matches. |
| diff_path: Path to the diff image file, or `None` if no diff. |
| reason: Optional reason for the diff. |
| """ |
| actual_path: str |
| expected_path: str = None |
| diff_path: str = None |
| reason: str = None |
| |
| class PNGDiffer(): |
| |
| def __init__(self, finder, reverse_byte_order, rendering_option, |
| default_renderer): |
| self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff') |
| self.os_name = finder.os_name |
| self.reverse_byte_order = reverse_byte_order |
| |
| self.suffix_order = None |
| if rendering_option == 'gdi': |
| if default_renderer == 'agg': |
| self.suffix_order = _GDI_AGG_SUFFIX_ORDER |
| elif default_renderer == 'skia': |
| self.suffix_order = _GDI_SKIA_SUFFIX_ORDER |
| elif rendering_option == 'agg': |
| self.suffix_order = _AGG_SUFFIX_ORDER |
| elif rendering_option == 'skia': |
| self.suffix_order = _SKIA_SUFFIX_ORDER |
| |
| if not self.suffix_order: |
| raise ValueError(f'rendering_option={rendering_option}') |
| |
| def CheckMissingTools(self, regenerate_expected): |
| if regenerate_expected and not shutil.which(_PNG_OPTIMIZER): |
| return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.' |
| return None |
| |
| def GetActualFiles(self, input_filename, source_dir, working_dir): |
| actual_paths = [] |
| path_templates = _PathTemplates(input_filename, source_dir, working_dir, |
| self.os_name, self.suffix_order) |
| |
| for page in itertools.count(): |
| actual_path = path_templates.GetActualPath(page) |
| if path_templates.GetExpectedPath(page, default_to_base=False): |
| actual_paths.append(actual_path) |
| else: |
| break |
| return actual_paths |
| |
| def _RunCommand(self, cmd): |
| try: |
| subprocess.run(cmd, capture_output=True, check=True) |
| return None |
| except subprocess.CalledProcessError as e: |
| return e |
| |
| def _RunImageCompareCommand(self, image_diff, image_matching_algorithm): |
| cmd = [self.pdfium_diff_path] |
| if self.reverse_byte_order: |
| cmd.append('--reverse-byte-order') |
| if image_matching_algorithm == FUZZY_MATCHING: |
| cmd.append('--fuzzy') |
| cmd.extend([image_diff.actual_path, image_diff.expected_path]) |
| return self._RunCommand(cmd) |
| |
| def _RunImageDiffCommand(self, image_diff): |
| # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order. |
| return self._RunCommand([ |
| self.pdfium_diff_path, '--subtract', image_diff.actual_path, |
| image_diff.expected_path, image_diff.diff_path |
| ]) |
| |
| def ComputeDifferences(self, input_filename, source_dir, working_dir, |
| image_matching_algorithm): |
| """Computes differences between actual and expected image files. |
| |
| Returns: |
| A list of `ImageDiff` instances, one per differing page. |
| """ |
| image_diffs = [] |
| |
| path_templates = _PathTemplates(input_filename, source_dir, working_dir, |
| self.os_name, self.suffix_order) |
| for page in itertools.count(): |
| page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) |
| if not os.path.exists(page_diff.actual_path): |
| # No more actual pages. |
| break |
| |
| expected_path = path_templates.GetExpectedPath(page) |
| if os.path.exists(expected_path): |
| page_diff.expected_path = expected_path |
| |
| compare_error = self._RunImageCompareCommand(page_diff, |
| image_matching_algorithm) |
| if compare_error: |
| page_diff.reason = str(compare_error) |
| |
| # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously. |
| page_diff.diff_path = path_templates.GetDiffPath(page) |
| if not self._RunImageDiffCommand(page_diff): |
| print(f'WARNING: No diff for {page_diff.actual_path}') |
| page_diff.diff_path = None |
| else: |
| # Validate that no other paths match. |
| for unexpected_path in path_templates.GetExpectedPaths(page)[1:]: |
| page_diff.expected_path = unexpected_path |
| if not self._RunImageCompareCommand(page_diff, |
| image_matching_algorithm): |
| page_diff.reason = f'Also matches {unexpected_path}' |
| break |
| page_diff.expected_path = expected_path |
| else: |
| if page == 0: |
| print(f'WARNING: no expected results files for {input_filename}') |
| page_diff.reason = f'{expected_path} does not exist' |
| |
| if page_diff.reason: |
| image_diffs.append(page_diff) |
| |
| return image_diffs |
| |
| def Regenerate(self, input_filename, source_dir, working_dir, |
| image_matching_algorithm): |
| path_templates = _PathTemplates(input_filename, source_dir, working_dir, |
| self.os_name, self.suffix_order) |
| for page in itertools.count(): |
| expected_paths = path_templates.GetExpectedPaths(page) |
| |
| first_match = None |
| last_match = None |
| page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) |
| if os.path.exists(page_diff.actual_path): |
| # Match against all expected page images. |
| for index, expected_path in enumerate(expected_paths): |
| page_diff.expected_path = expected_path |
| if not self._RunImageCompareCommand(page_diff, |
| image_matching_algorithm): |
| if first_match is None: |
| first_match = index |
| last_match = index |
| |
| if last_match == 0: |
| # Regeneration not needed. This case may be reached if only some, but |
| # not all, pages need to be regenerated. |
| continue |
| elif expected_paths: |
| # Remove all expected page images. |
| print(f'WARNING: {input_filename} has extra expected page {page}') |
| first_match = 0 |
| last_match = len(expected_paths) |
| else: |
| # No more expected or actual pages. |
| break |
| |
| # Try to reuse expectations by removing intervening non-matches. |
| # |
| # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of |
| # global knowledge about other test configurations, which is why it just |
| # creates backup files rather than immediately removing files. |
| if last_match is not None: |
| if first_match > 1: |
| print(f'WARNING: {input_filename}.{page} has non-adjacent match') |
| if first_match != last_match: |
| print(f'WARNING: {input_filename}.{page} has redundant matches') |
| |
| for expected_path in expected_paths[:last_match]: |
| os.rename(expected_path, expected_path + '.bak') |
| continue |
| |
| # Regenerate the most specific expected path that exists. If there are no |
| # existing expectations, regenerate the base case. |
| expected_path = path_templates.GetExpectedPath(page) |
| shutil.copyfile(page_diff.actual_path, expected_path) |
| self._RunCommand([_PNG_OPTIMIZER, expected_path]) |
| |
| |
| _ACTUAL_TEMPLATE = '.pdf.%d.png' |
| _DIFF_TEMPLATE = '.pdf.%d.diff.png' |
| |
| |
| class _PathTemplates: |
| |
| def __init__(self, input_filename, source_dir, working_dir, os_name, |
| suffix_order): |
| input_root, _ = os.path.splitext(input_filename) |
| self.actual_path_template = os.path.join(working_dir, |
| input_root + _ACTUAL_TEMPLATE) |
| self.diff_path_template = os.path.join(working_dir, |
| input_root + _DIFF_TEMPLATE) |
| |
| # Pre-create the available templates from most to least specific. We |
| # generally expect the most specific case to match first. |
| self.expected_templates = [] |
| for suffix in suffix_order: |
| formatted_suffix = suffix.format(os=os_name) |
| self.expected_templates.append( |
| os.path.join( |
| source_dir, |
| f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}')) |
| assert self.expected_templates |
| |
| def GetActualPath(self, page): |
| return self.actual_path_template % page |
| |
| def GetDiffPath(self, page): |
| return self.diff_path_template % page |
| |
| def _GetPossibleExpectedPaths(self, page): |
| return [template % page for template in self.expected_templates] |
| |
| def GetExpectedPaths(self, page): |
| return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page))) |
| |
| def GetExpectedPath(self, page, default_to_base=True): |
| """Returns the most specific expected path that exists.""" |
| last_not_found_expected_path = None |
| for expected_path in self._GetPossibleExpectedPaths(page): |
| if os.path.exists(expected_path): |
| return expected_path |
| last_not_found_expected_path = expected_path |
| return last_not_found_expected_path if default_to_base else None |