|  | # Copyright 2015 The PDFium Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  |  | 
|  | import json | 
|  | import os | 
|  | import shlex | 
|  | import shutil | 
|  | import ssl | 
|  | import urllib2 | 
|  |  | 
|  |  | 
|  | def _ParseKeyValuePairs(kv_str): | 
|  | """ | 
|  | Parses a string of the type 'key1 value1 key2 value2' into a dict. | 
|  | """ | 
|  | kv_pairs = shlex.split(kv_str) | 
|  | if len(kv_pairs) % 2: | 
|  | raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str) | 
|  | return {kv_pairs[i]: kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2)} | 
|  |  | 
|  |  | 
|  | # This module downloads a json provided by Skia Gold with the expected baselines | 
|  | # for each test file. | 
|  | # | 
|  | # The expected format for the json is: | 
|  | # { | 
|  | #   "commit": { | 
|  | #     "author": "John Doe (jdoe@chromium.org)", | 
|  | #     "commit_time": 1510598123, | 
|  | #     "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0" | 
|  | #   }, | 
|  | #   "master": { | 
|  | #     "abc.pdf.1": { | 
|  | #       "0ec3d86f545052acd7c9a16fde8ca9d4": 1, | 
|  | #       "80455b71673becc9fbc100d6da56ca65": 1, | 
|  | #       "b68e2ecb80090b4502ec89ad1be2322c": 1 | 
|  | #      }, | 
|  | #     "defgh.pdf.0": { | 
|  | #       "01e020cd4cd05c6738e479a46a506044": 1, | 
|  | #       "b68e2ecb80090b4502ec89ad1be2322c": 1 | 
|  | #     } | 
|  | #   }, | 
|  | #   "changeLists": { | 
|  | #     "18499" : { | 
|  | #       "abc.pdf.1": { | 
|  | #         "d5dd649124cf1779152253dc8fb239c5": 1, | 
|  | #         "42a270581930579cdb0f28674972fb1a": 1, | 
|  | #       } | 
|  | #     } | 
|  | #   } | 
|  | # } | 
|  | class GoldBaseline(object): | 
|  |  | 
|  | def __init__(self, properties_str): | 
|  | """ | 
|  | properties_str is a string with space separated key/value pairs that | 
|  | is used to find the cl number for which to baseline | 
|  | """ | 
|  | self._properties = _ParseKeyValuePairs(properties_str) | 
|  | self._baselines = self._LoadSkiaGoldBaselines() | 
|  |  | 
|  | def _LoadSkiaGoldBaselines(self): | 
|  | """ | 
|  | Download the baseline json and return a list of the two baselines that | 
|  | should be used to match hashes (master and cl#). | 
|  | """ | 
|  | GOLD_BASELINE_URL = 'https://pdfium-gold.skia.org/json/baseline' | 
|  |  | 
|  | # If we have an issue number add it to the baseline URL | 
|  | cl_number_str = self._properties.get('issue', None) | 
|  | url = GOLD_BASELINE_URL + ('/' + cl_number_str if cl_number_str else '') | 
|  |  | 
|  | json_data = '' | 
|  | MAX_TIMEOUT = 33  # 5 tries. (2, 4, 8, 16, 32) | 
|  | timeout = 2 | 
|  | while True: | 
|  | try: | 
|  | response = urllib2.urlopen(url, timeout=timeout) | 
|  | c_type = response.headers.get('Content-type', '') | 
|  | EXPECTED_CONTENT_TYPE = 'application/json' | 
|  | if c_type != EXPECTED_CONTENT_TYPE: | 
|  | raise ValueError('Invalid content type. Got %s instead of %s' % | 
|  | (c_type, EXPECTED_CONTENT_TYPE)) | 
|  | json_data = response.read() | 
|  | break  # If this line is reached, then no exception occurred. | 
|  | except (ssl.SSLError, urllib2.HTTPError, urllib2.URLError) as e: | 
|  | timeout *= 2 | 
|  | if timeout < MAX_TIMEOUT: | 
|  | continue | 
|  | print('Error: Unable to read skia gold json from %s: %s' % (url, e)) | 
|  | return None | 
|  |  | 
|  | try: | 
|  | data = json.loads(json_data) | 
|  | except ValueError as e: | 
|  | print 'Error: Malformed json read from %s: %s' % (url, e) | 
|  | return None | 
|  |  | 
|  | return data.get('master', {}) | 
|  |  | 
|  | # Return values for MatchLocalResult(). | 
|  | MATCH = 'match' | 
|  | MISMATCH = 'mismatch' | 
|  | NO_BASELINE = 'no_baseline' | 
|  | BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed' | 
|  |  | 
|  | def MatchLocalResult(self, test_name, md5_hash): | 
|  | """ | 
|  | Match a locally generated hash of a test cases rendered image with the | 
|  | expected hashes downloaded in the baselines json. | 
|  |  | 
|  | Each baseline is a dict mapping the test case name to a dict with the | 
|  | expected hashes as keys. Therefore, this list of baselines should be | 
|  | searched until the test case name is found, then the hash should be matched | 
|  | with the options in that dict. If the hashes don't match, it should be | 
|  | considered a failure and we should not continue searching the baseline list. | 
|  |  | 
|  | Returns MATCH if the md5 provided matches the ones in the baseline json, | 
|  | MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or | 
|  | BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed. | 
|  | """ | 
|  | if self._baselines is None: | 
|  | return GoldBaseline.BASELINE_DOWNLOAD_FAILED | 
|  |  | 
|  | found_test_case = False | 
|  | if test_name in self._baselines: | 
|  | found_test_case = True | 
|  | if md5_hash in self._baselines[test_name]: | 
|  | return GoldBaseline.MATCH | 
|  |  | 
|  | return (GoldBaseline.MISMATCH | 
|  | if found_test_case else GoldBaseline.NO_BASELINE) | 
|  |  | 
|  |  | 
|  | # This module collects and writes output in a format expected by the | 
|  | # Gold baseline tool. Based on meta data provided explicitly and by | 
|  | # adding a series of test results it can be used to produce | 
|  | # a JSON file that is uploaded to Google Storage and ingested by Gold. | 
|  | # | 
|  | # The output will look similar this: | 
|  | # | 
|  | # { | 
|  | #    "build_number" : "2", | 
|  | #    "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c", | 
|  | #    "key" : { | 
|  | #       "arch" : "arm64", | 
|  | #       "compiler" : "Clang", | 
|  | #    }, | 
|  | #    "results" : [ | 
|  | #       { | 
|  | #          "key" : { | 
|  | #             "config" : "vk", | 
|  | #             "name" : "yuv_nv12_to_rgb_effect", | 
|  | #             "source_type" : "gm" | 
|  | #          }, | 
|  | #          "md5" : "7db34da246868d50ab9ddd776ce6d779", | 
|  | #          "options" : { | 
|  | #             "ext" : "png", | 
|  | #             "gamma_correct" : "no" | 
|  | #          } | 
|  | #       }, | 
|  | #       { | 
|  | #          "key" : { | 
|  | #             "config" : "vk", | 
|  | #             "name" : "yuv_to_rgb_effect", | 
|  | #             "source_type" : "gm" | 
|  | #          }, | 
|  | #          "md5" : "0b955f387740c66eb23bf0e253c80d64", | 
|  | #          "options" : { | 
|  | #             "ext" : "png", | 
|  | #             "gamma_correct" : "no" | 
|  | #          } | 
|  | #       } | 
|  | #    ], | 
|  | # } | 
|  | # | 
|  | class GoldResults(object): | 
|  |  | 
|  | def __init__(self, source_type, output_dir, properties_str, key_str, | 
|  | ignore_hashes_file): | 
|  | """ | 
|  | source_type is the source_type (=corpus) field used for all results. | 
|  | output_dir is the directory where the resulting images are copied and | 
|  | the dm.json file is written. If the directory exists it will | 
|  | be removed and recreated. | 
|  | properties_str is a string with space separated key/value pairs that | 
|  | is used to set the top level fields in the output JSON file. | 
|  | key_str is a string with space separated key/value pairs that | 
|  | is used to set the 'key' field in the output JSON file. | 
|  | ignore_hashes_file is a file that contains a list of image hashes | 
|  | that should be ignored. | 
|  | """ | 
|  | self._source_type = source_type | 
|  | self._properties = _ParseKeyValuePairs(properties_str) | 
|  | self._properties['key'] = _ParseKeyValuePairs(key_str) | 
|  | self._results = [] | 
|  | self._passfail = [] | 
|  | self._output_dir = output_dir | 
|  |  | 
|  | # make sure the output directory exists and is empty. | 
|  | if os.path.exists(output_dir): | 
|  | shutil.rmtree(output_dir, ignore_errors=True) | 
|  | os.makedirs(output_dir) | 
|  |  | 
|  | self._ignore_hashes = set() | 
|  | if ignore_hashes_file: | 
|  | with open(ignore_hashes_file, 'r') as ig_file: | 
|  | hashes = [x.strip() for x in ig_file.readlines() if x.strip()] | 
|  | self._ignore_hashes = set(hashes) | 
|  |  | 
|  | def AddTestResult(self, testName, md5Hash, outputImagePath, matchResult): | 
|  | # If the hash is in the list of hashes to ignore then we don'try | 
|  | # make a copy, but add it to the result. | 
|  | imgExt = os.path.splitext(outputImagePath)[1].lstrip('.') | 
|  | if md5Hash not in self._ignore_hashes: | 
|  | # Copy the image to <output_dir>/<md5Hash>.<image_extension> | 
|  | if not imgExt: | 
|  | raise ValueError('File %s does not have an extension' % outputImagePath) | 
|  | newFilePath = os.path.join(self._output_dir, md5Hash + '.' + imgExt) | 
|  | shutil.copy2(outputImagePath, newFilePath) | 
|  |  | 
|  | # Add an entry to the list of test results | 
|  | self._results.append({ | 
|  | 'key': { | 
|  | 'name': testName, | 
|  | 'source_type': self._source_type, | 
|  | }, | 
|  | 'md5': md5Hash, | 
|  | 'options': { | 
|  | 'ext': imgExt, | 
|  | 'gamma_correct': 'no' | 
|  | } | 
|  | }) | 
|  |  | 
|  | self._passfail.append((testName, matchResult)) | 
|  |  | 
|  | def WriteResults(self): | 
|  | self._properties.update({'results': self._results}) | 
|  |  | 
|  | output_file_name = os.path.join(self._output_dir, 'dm.json') | 
|  | with open(output_file_name, 'wb') as outfile: | 
|  | json.dump(self._properties, outfile, indent=1) | 
|  | outfile.write('\n') | 
|  |  | 
|  | output_file_name = os.path.join(self._output_dir, 'passfail.json') | 
|  | with open(output_file_name, 'wb') as outfile: | 
|  | json.dump(self._passfail, outfile, indent=1) | 
|  | outfile.write('\n') | 
|  |  | 
|  |  | 
|  | # Produce example output for manual testing. | 
|  | def _Example(): | 
|  | # Create a test directory with three empty 'image' files. | 
|  | test_dir = './testdirectory' | 
|  | if not os.path.exists(test_dir): | 
|  | os.makedirs(test_dir) | 
|  | open(os.path.join(test_dir, 'image1.png'), 'wb').close() | 
|  | open(os.path.join(test_dir, 'image2.png'), 'wb').close() | 
|  | open(os.path.join(test_dir, 'image3.png'), 'wb').close() | 
|  |  | 
|  | # Create an instance and add results. | 
|  | prop_str = 'build_number 2 "builder name" Builder-Name gitHash ' \ | 
|  | 'a4a338179013b029d6dd55e737b5bd648a9fb68c' | 
|  |  | 
|  | key_str = 'arch arm64 compiler Clang configuration Debug' | 
|  |  | 
|  | hash_file = os.path.join(test_dir, 'ignore_hashes.txt') | 
|  | with open(hash_file, 'wb') as f: | 
|  | f.write('\n'.join(['hash-1', 'hash-4']) + '\n') | 
|  |  | 
|  | output_dir = './output_directory' | 
|  | gr = GoldResults('pdfium', output_dir, prop_str, key_str, hash_file) | 
|  | gr.AddTestResult('test-1', 'hash-1', os.path.join(test_dir, 'image1.png'), | 
|  | GoldBaseline.MATCH) | 
|  | gr.AddTestResult('test-2', 'hash-2', os.path.join(test_dir, 'image2.png'), | 
|  | GoldBaseline.MATCH) | 
|  | gr.AddTestResult('test-3', 'hash-3', os.path.join(test_dir, 'image3.png'), | 
|  | GoldBaseline.MISMATCH) | 
|  | gr.WriteResults() | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | _Example() |