| # Copyright 2015 The PDFium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| |
| import json |
| import os |
| import shlex |
| import shutil |
| import ssl |
| import urllib2 |
| |
| |
| def _ParseKeyValuePairs(kv_str): |
| """ |
| Parses a string of the type 'key1 value1 key2 value2' into a dict. |
| """ |
| kv_pairs = shlex.split(kv_str) |
| if len(kv_pairs) % 2: |
| raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str) |
| return { kv_pairs[i]:kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2) } |
| |
| |
| # This module downloads a json provided by Skia Gold with the expected baselines |
| # for each test file. |
| # |
| # The expected format for the json is: |
| # { |
| # "commit": { |
| # "author": "John Doe (jdoe@chromium.org)", |
| # "commit_time": 1510598123, |
| # "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0" |
| # }, |
| # "master": { |
| # "abc.pdf.1": { |
| # "0ec3d86f545052acd7c9a16fde8ca9d4": 1, |
| # "80455b71673becc9fbc100d6da56ca65": 1, |
| # "b68e2ecb80090b4502ec89ad1be2322c": 1 |
| # }, |
| # "defgh.pdf.0": { |
| # "01e020cd4cd05c6738e479a46a506044": 1, |
| # "b68e2ecb80090b4502ec89ad1be2322c": 1 |
| # } |
| # }, |
| # "changeLists": { |
| # "18499" : { |
| # "abc.pdf.1": { |
| # "d5dd649124cf1779152253dc8fb239c5": 1, |
| # "42a270581930579cdb0f28674972fb1a": 1, |
| # } |
| # } |
| # } |
| # } |
| class GoldBaseline(object): |
| |
| def __init__(self, properties_str): |
| """ |
| properties_str is a string with space separated key/value pairs that |
| is used to find the cl number for which to baseline |
| """ |
| self._properties = _ParseKeyValuePairs(properties_str) |
| self._baselines = self._LoadSkiaGoldBaselines() |
| |
| def _LoadSkiaGoldBaselines(self): |
| """ |
| Download the baseline json and return a list of the two baselines that |
| should be used to match hashes (master and cl#). |
| """ |
| GOLD_BASELINE_URL = 'https://pdfium-gold.skia.org/json/baseline' |
| |
| # If we have an issue number add it to the baseline URL |
| cl_number_str = self._properties.get('issue', None) |
| url = GOLD_BASELINE_URL + ('/' + cl_number_str if cl_number_str else '') |
| |
| json_data = '' |
| MAX_TIMEOUT = 33 # 5 tries. (2, 4, 8, 16, 32) |
| timeout = 2 |
| while True: |
| try: |
| response = urllib2.urlopen(url, timeout=timeout) |
| c_type = response.headers.get('Content-type', '') |
| EXPECTED_CONTENT_TYPE = 'application/json' |
| if c_type != EXPECTED_CONTENT_TYPE: |
| raise ValueError('Invalid content type. Got %s instead of %s' % ( |
| c_type, EXPECTED_CONTENT_TYPE)) |
| json_data = response.read() |
| break # If this line is reached, then no exception occurred. |
| except (ssl.SSLError, urllib2.HTTPError, urllib2.URLError) as e: |
| timeout *= 2 |
| if timeout < MAX_TIMEOUT: |
| continue |
| print ('Error: Unable to read skia gold json from %s: %s' % (url, e)) |
| return None |
| |
| try: |
| data = json.loads(json_data) |
| except ValueError as e: |
| print 'Error: Malformed json read from %s: %s' % (url, e) |
| return None |
| |
| return data.get('master', {}) |
| |
| # Return values for MatchLocalResult(). |
| MATCH = 'match' |
| MISMATCH = 'mismatch' |
| NO_BASELINE = 'no_baseline' |
| BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed' |
| |
| def MatchLocalResult(self, test_name, md5_hash): |
| """ |
| Match a locally generated hash of a test cases rendered image with the |
| expected hashes downloaded in the baselines json. |
| |
| Each baseline is a dict mapping the test case name to a dict with the |
| expected hashes as keys. Therefore, this list of baselines should be |
| searched until the test case name is found, then the hash should be matched |
| with the options in that dict. If the hashes don't match, it should be |
| considered a failure and we should not continue searching the baseline list. |
| |
| Returns MATCH if the md5 provided matches the ones in the baseline json, |
| MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or |
| BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed. |
| """ |
| if self._baselines is None: |
| return GoldBaseline.BASELINE_DOWNLOAD_FAILED |
| |
| found_test_case = False |
| if test_name in self._baselines: |
| found_test_case = True |
| if md5_hash in self._baselines[test_name]: |
| return GoldBaseline.MATCH |
| |
| return (GoldBaseline.MISMATCH if found_test_case |
| else GoldBaseline.NO_BASELINE) |
| |
| |
| # This module collects and writes output in a format expected by the |
| # Gold baseline tool. Based on meta data provided explicitly and by |
| # adding a series of test results it can be used to produce |
| # a JSON file that is uploaded to Google Storage and ingested by Gold. |
| # |
| # The output will look similar this: |
| # |
| # { |
| # "build_number" : "2", |
| # "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c", |
| # "key" : { |
| # "arch" : "arm64", |
| # "compiler" : "Clang", |
| # }, |
| # "results" : [ |
| # { |
| # "key" : { |
| # "config" : "vk", |
| # "name" : "yuv_nv12_to_rgb_effect", |
| # "source_type" : "gm" |
| # }, |
| # "md5" : "7db34da246868d50ab9ddd776ce6d779", |
| # "options" : { |
| # "ext" : "png", |
| # "gamma_correct" : "no" |
| # } |
| # }, |
| # { |
| # "key" : { |
| # "config" : "vk", |
| # "name" : "yuv_to_rgb_effect", |
| # "source_type" : "gm" |
| # }, |
| # "md5" : "0b955f387740c66eb23bf0e253c80d64", |
| # "options" : { |
| # "ext" : "png", |
| # "gamma_correct" : "no" |
| # } |
| # } |
| # ], |
| # } |
| # |
| class GoldResults(object): |
| def __init__(self, source_type, output_dir, properties_str, key_str, |
| ignore_hashes_file): |
| """ |
| source_type is the source_type (=corpus) field used for all results. |
| output_dir is the directory where the resulting images are copied and |
| the dm.json file is written. If the directory exists it will |
| be removed and recreated. |
| properties_str is a string with space separated key/value pairs that |
| is used to set the top level fields in the output JSON file. |
| key_str is a string with space separated key/value pairs that |
| is used to set the 'key' field in the output JSON file. |
| ignore_hashes_file is a file that contains a list of image hashes |
| that should be ignored. |
| """ |
| self._source_type = source_type |
| self._properties = _ParseKeyValuePairs(properties_str) |
| self._properties['key'] = _ParseKeyValuePairs(key_str) |
| self._results = [] |
| self._passfail = [] |
| self._output_dir = output_dir |
| |
| # make sure the output directory exists and is empty. |
| if os.path.exists(output_dir): |
| shutil.rmtree(output_dir, ignore_errors=True) |
| os.makedirs(output_dir) |
| |
| self._ignore_hashes = set() |
| if ignore_hashes_file: |
| with open(ignore_hashes_file, 'r') as ig_file: |
| hashes=[x.strip() for x in ig_file.readlines() if x.strip()] |
| self._ignore_hashes = set(hashes) |
| |
| def AddTestResult(self, testName, md5Hash, outputImagePath, matchResult): |
| # If the hash is in the list of hashes to ignore then we don'try |
| # make a copy, but add it to the result. |
| imgExt = os.path.splitext(outputImagePath)[1].lstrip('.') |
| if md5Hash not in self._ignore_hashes: |
| # Copy the image to <output_dir>/<md5Hash>.<image_extension> |
| if not imgExt: |
| raise ValueError('File %s does not have an extension' % outputImagePath) |
| newFilePath = os.path.join(self._output_dir, md5Hash + '.' + imgExt) |
| shutil.copy2(outputImagePath, newFilePath) |
| |
| # Add an entry to the list of test results |
| self._results.append({ |
| 'key': { |
| 'name': testName, |
| 'source_type': self._source_type, |
| }, |
| 'md5': md5Hash, |
| 'options': { |
| 'ext': imgExt, |
| 'gamma_correct': 'no' |
| } |
| }) |
| |
| self._passfail.append((testName, matchResult)) |
| |
| def WriteResults(self): |
| self._properties.update({ |
| 'results': self._results |
| }) |
| |
| output_file_name = os.path.join(self._output_dir, 'dm.json') |
| with open(output_file_name, 'wb') as outfile: |
| json.dump(self._properties, outfile, indent=1) |
| outfile.write('\n') |
| |
| output_file_name = os.path.join(self._output_dir, 'passfail.json') |
| with open(output_file_name, 'wb') as outfile: |
| json.dump(self._passfail, outfile, indent=1) |
| outfile.write('\n') |
| |
| # Produce example output for manual testing. |
| if __name__ == '__main__': |
| # Create a test directory with three empty 'image' files. |
| test_dir = './testdirectory' |
| if not os.path.exists(test_dir): |
| os.makedirs(test_dir) |
| open(os.path.join(test_dir, 'image1.png'), 'wb').close() |
| open(os.path.join(test_dir, 'image2.png'), 'wb').close() |
| open(os.path.join(test_dir, 'image3.png'), 'wb').close() |
| |
| # Create an instance and add results. |
| prop_str = 'build_number 2 "builder name" Builder-Name gitHash ' \ |
| 'a4a338179013b029d6dd55e737b5bd648a9fb68c' |
| |
| key_str = 'arch arm64 compiler Clang configuration Debug' |
| |
| hash_file = os.path.join(test_dir, 'ignore_hashes.txt') |
| with open(hash_file, 'wb') as f: |
| f.write('\n'.join(['hash-1', 'hash-4']) + '\n') |
| |
| output_dir = './output_directory' |
| gr = GoldResults('pdfium', output_dir, prop_str, key_str, hash_file) |
| gr.AddTestResult('test-1', 'hash-1', os.path.join(test_dir, 'image1.png'), |
| GoldBaseline.MATCH) |
| gr.AddTestResult('test-2', 'hash-2', os.path.join(test_dir, 'image2.png'), |
| GoldBaseline.MATCH) |
| gr.AddTestResult('test-3', 'hash-3', os.path.join(test_dir, 'image3.png'), |
| GoldBaseline.MISMATCH) |
| gr.WriteResults() |