testing/tools/safetynet_conclusions.py - pdfium.git - Git at Google

 # Copyright 2017 The PDFium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Classes that draw conclusions out of a comparison and represent them."""

 from collections import Counter

 FORMAT_RED = '\033[01;31m{0}\033[00m'
 FORMAT_GREEN = '\033[01;32m{0}\033[00m'
 FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
 FORMAT_CYAN = '\033[01;36m{0}\033[00m'
 FORMAT_NORMAL = '{0}'

 RATING_FAILURE = 'failure'
 RATING_REGRESSION = 'regression'
 RATING_IMPROVEMENT = 'improvement'
 RATING_NO_CHANGE = 'no_change'
 RATING_SMALL_CHANGE = 'small_change'

 RATINGS = [
     RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE,
     RATING_SMALL_CHANGE
 ]

 RATING_TO_COLOR = {
     RATING_FAILURE: FORMAT_MAGENTA,
     RATING_REGRESSION: FORMAT_RED,
     RATING_IMPROVEMENT: FORMAT_CYAN,
     RATING_NO_CHANGE: FORMAT_GREEN,
     RATING_SMALL_CHANGE: FORMAT_NORMAL,
 }


 class ComparisonConclusions(object):
   """All conclusions drawn from a comparison.

   This is initialized empty and then processes pairs of results for each test
   case, determining the rating for that case, which can be:
   "failure" if either or both runs for the case failed.
   "regression" if there is a significant increase in time for the test case.
   "improvement" if there is a significant decrease in time for the test case.
   "no_change" if the time for the test case did not change at all.
   "small_change" if the time for the test case changed but within the threshold.
   """

   def __init__(self, threshold_significant):
     """Initializes an empty ComparisonConclusions.

     Args:
       threshold_significant: Float with the tolerance beyond which changes in
           measurements are considered significant.

           The change is considered as a multiplication rather than an addition
           of a fraction of the previous measurement, that is, a
           threshold_significant of 1.0 will flag test cases that became over
           100% slower (> 200% of the previous time measured) or over 100% faster
           (< 50% of the previous time measured).

           threshold_significant 0.02 -> 98.04% to 102% is not significant
           threshold_significant 0.1 -> 90.9% to 110% is not significant
           threshold_significant 0.25 -> 80% to 125% is not significant
           threshold_significant 1 -> 50% to 200% is not significant
           threshold_significant 4 -> 20% to 500% is not significant

     """
     self.threshold_significant = threshold_significant
     self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1

     self.params = {'threshold': threshold_significant}
     self.summary = ComparisonSummary()
     self.case_results = {}

   def ProcessCase(self, case_name, before, after):
     """Feeds a test case results to the ComparisonConclusions.

     Args:
       case_name: String identifying the case.
       before: Measurement for the "before" version of the code.
       after: Measurement for the "after" version of the code.
     """

     # Switch 0 to None to simplify the json dict output. All zeros are
     # considered failed runs, so they will be represented by "null".
     if not before:
       before = None
     if not after:
       after = None

     if not before or not after:
       ratio = None
       rating = RATING_FAILURE
     else:
       ratio = (float(after) / before) - 1.0
       if ratio > self.threshold_significant:
         rating = RATING_REGRESSION
       elif ratio < self.threshold_significant_negative:
         rating = RATING_IMPROVEMENT
       elif ratio == 0:
         rating = RATING_NO_CHANGE
       else:
         rating = RATING_SMALL_CHANGE

     case_result = CaseResult(case_name, before, after, ratio, rating)

     self.summary.ProcessCaseResult(case_result)
     self.case_results[case_name] = case_result

   def GetSummary(self):
     """Gets the ComparisonSummary with consolidated totals."""
     return self.summary

   def GetCaseResults(self):
     """Gets a dict mapping each test case identifier to its CaseResult."""
     return self.case_results

   def GetOutputDict(self):
     """Returns a conclusions dict with all the conclusions drawn.

     Returns:
       A serializable dict with the format illustrated below:
       {
         "version": 1,
         "params": {
           "threshold": 0.02
         },
         "summary": {
           "total": 123,
           "failure": 1,
           "regression": 2,
           "improvement": 1,
           "no_change": 100,
           "small_change": 19
         },
         "comparison_by_case": {
           "testing/resources/new_test.pdf": {
             "before": None,
             "after": 1000,
             "ratio": None,
             "rating": "failure"
           },
           "testing/resources/test1.pdf": {
             "before": 100,
             "after": 120,
             "ratio": 0.2,
             "rating": "regression"
           },
           "testing/resources/test2.pdf": {
             "before": 100,
             "after": 2000,
             "ratio": 19.0,
             "rating": "regression"
           },
           "testing/resources/test3.pdf": {
             "before": 1000,
             "after": 1005,
             "ratio": 0.005,
             "rating": "small_change"
           },
           "testing/resources/test4.pdf": {
             "before": 1000,
             "after": 1000,
             "ratio": 0.0,
             "rating": "no_change"
           },
           "testing/resources/test5.pdf": {
             "before": 1000,
             "after": 600,
             "ratio": -0.4,
             "rating": "improvement"
           }
         }
       }
     """
     output_dict = {}
     output_dict['version'] = 1
     output_dict['params'] = {'threshold': self.threshold_significant}
     output_dict['summary'] = self.summary.GetOutputDict()
     output_dict['comparison_by_case'] = {
         cr.case_name.decode('utf-8'): cr.GetOutputDict()
         for cr in self.GetCaseResults().values()
     }
     return output_dict


 class ComparisonSummary(object):
   """Totals computed for a comparison."""

   def __init__(self):
     self.rating_counter = Counter()

   def ProcessCaseResult(self, case_result):
     self.rating_counter[case_result.rating] += 1

   def GetTotal(self):
     """Gets the number of test cases processed."""
     return sum(self.rating_counter.values())

   def GetCount(self, rating):
     """Gets the number of test cases processed with a given rating."""
     return self.rating_counter[rating]

   def GetOutputDict(self):
     """Returns a dict that can be serialized with all the totals."""
     result = {'total': self.GetTotal()}
     for rating in RATINGS:
       result[rating] = self.GetCount(rating)
     return result


 class CaseResult(object):
   """The conclusion for the comparison of a single test case."""

   def __init__(self, case_name, before, after, ratio, rating):
     """Initializes an empty ComparisonConclusions.

     Args:
       case_name: String identifying the case.
       before: Measurement for the "before" version of the code.
       after: Measurement for the "after" version of the code.
       ratio: Difference between |after| and |before| as a fraction of |before|.
       rating: Rating for this test case.
     """
     self.case_name = case_name
     self.before = before
     self.after = after
     self.ratio = ratio
     self.rating = rating

   def GetOutputDict(self):
     """Returns a dict with the test case's conclusions."""
     return {
         'before': self.before,
         'after': self.after,
         'ratio': self.ratio,
         'rating': self.rating
     }


 def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
   """Prints a conclusions dict in a human-readable way.

   Args:
     conclusions_dict: Dict to print.
     colored: Whether to color the output to highlight significant changes.
     key: String with the CaseResult dictionary key to sort the cases.
   """
   # Print header
   print '=' * 80
   print '{0:>11s} {1:>15s}  {2}'.format('% Change', 'Time after', 'Test case')
   print '-' * 80

   color = FORMAT_NORMAL

   # Print cases
   if key is not None:
     case_pairs = sorted(
         conclusions_dict['comparison_by_case'].iteritems(),
         key=lambda kv: kv[1][key])
   else:
     case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())

   for case_name, case_dict in case_pairs:
     if colored:
       color = RATING_TO_COLOR[case_dict['rating']]

     if case_dict['rating'] == RATING_FAILURE:
       print u'{} to measure time for {}'.format(
           color.format('Failed'), case_name).encode('utf-8')
       continue

     print u'{0} {1:15,d}  {2}'.format(
         color.format('{:+11.4%}'.format(case_dict['ratio'])),
         case_dict['after'], case_name).encode('utf-8')

   # Print totals
   totals = conclusions_dict['summary']
   print '=' * 80
   print 'Test cases run: %d' % totals['total']

   if colored:
     color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
   print('Failed to measure: %s' % color.format(totals[RATING_FAILURE]))

   if colored:
     color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
   print('Regressions: %s' % color.format(totals[RATING_REGRESSION]))

   if colored:
     color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
   print('Improvements: %s' % color.format(totals[RATING_IMPROVEMENT]))
	# Copyright 2017 The PDFium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Classes that draw conclusions out of a comparison and represent them."""

	from collections import Counter

	FORMAT_RED = '\033[01;31m{0}\033[00m'
	FORMAT_GREEN = '\033[01;32m{0}\033[00m'
	FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
	FORMAT_CYAN = '\033[01;36m{0}\033[00m'
	FORMAT_NORMAL = '{0}'

	RATING_FAILURE = 'failure'
	RATING_REGRESSION = 'regression'
	RATING_IMPROVEMENT = 'improvement'
	RATING_NO_CHANGE = 'no_change'
	RATING_SMALL_CHANGE = 'small_change'

	RATINGS = [
	RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE,
	RATING_SMALL_CHANGE
	]

	RATING_TO_COLOR = {
	RATING_FAILURE: FORMAT_MAGENTA,
	RATING_REGRESSION: FORMAT_RED,
	RATING_IMPROVEMENT: FORMAT_CYAN,
	RATING_NO_CHANGE: FORMAT_GREEN,
	RATING_SMALL_CHANGE: FORMAT_NORMAL,
	}


	class ComparisonConclusions(object):
	"""All conclusions drawn from a comparison.

	This is initialized empty and then processes pairs of results for each test
	case, determining the rating for that case, which can be:
	"failure" if either or both runs for the case failed.
	"regression" if there is a significant increase in time for the test case.
	"improvement" if there is a significant decrease in time for the test case.
	"no_change" if the time for the test case did not change at all.
	"small_change" if the time for the test case changed but within the threshold.
	"""

	def __init__(self, threshold_significant):
	"""Initializes an empty ComparisonConclusions.

	Args:
	threshold_significant: Float with the tolerance beyond which changes in
	measurements are considered significant.

	The change is considered as a multiplication rather than an addition
	of a fraction of the previous measurement, that is, a
	threshold_significant of 1.0 will flag test cases that became over
	100% slower (> 200% of the previous time measured) or over 100% faster
	(< 50% of the previous time measured).

	threshold_significant 0.02 -> 98.04% to 102% is not significant
	threshold_significant 0.1 -> 90.9% to 110% is not significant
	threshold_significant 0.25 -> 80% to 125% is not significant
	threshold_significant 1 -> 50% to 200% is not significant
	threshold_significant 4 -> 20% to 500% is not significant

	"""
	self.threshold_significant = threshold_significant
	self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1

	self.params = {'threshold': threshold_significant}
	self.summary = ComparisonSummary()
	self.case_results = {}

	def ProcessCase(self, case_name, before, after):
	"""Feeds a test case results to the ComparisonConclusions.

	Args:
	case_name: String identifying the case.
	before: Measurement for the "before" version of the code.
	after: Measurement for the "after" version of the code.
	"""

	# Switch 0 to None to simplify the json dict output. All zeros are
	# considered failed runs, so they will be represented by "null".
	if not before:
	before = None
	if not after:
	after = None

	if not before or not after:
	ratio = None
	rating = RATING_FAILURE
	else:
	ratio = (float(after) / before) - 1.0
	if ratio > self.threshold_significant:
	rating = RATING_REGRESSION
	elif ratio < self.threshold_significant_negative:
	rating = RATING_IMPROVEMENT
	elif ratio == 0:
	rating = RATING_NO_CHANGE
	else:
	rating = RATING_SMALL_CHANGE

	case_result = CaseResult(case_name, before, after, ratio, rating)

	self.summary.ProcessCaseResult(case_result)
	self.case_results[case_name] = case_result

	def GetSummary(self):
	"""Gets the ComparisonSummary with consolidated totals."""
	return self.summary

	def GetCaseResults(self):
	"""Gets a dict mapping each test case identifier to its CaseResult."""
	return self.case_results

	def GetOutputDict(self):
	"""Returns a conclusions dict with all the conclusions drawn.

	Returns:
	A serializable dict with the format illustrated below:
	{
	"version": 1,
	"params": {
	"threshold": 0.02
	},
	"summary": {
	"total": 123,
	"failure": 1,
	"regression": 2,
	"improvement": 1,
	"no_change": 100,
	"small_change": 19
	},
	"comparison_by_case": {
	"testing/resources/new_test.pdf": {
	"before": None,
	"after": 1000,
	"ratio": None,
	"rating": "failure"
	},
	"testing/resources/test1.pdf": {
	"before": 100,
	"after": 120,
	"ratio": 0.2,
	"rating": "regression"
	},
	"testing/resources/test2.pdf": {
	"before": 100,
	"after": 2000,
	"ratio": 19.0,
	"rating": "regression"
	},
	"testing/resources/test3.pdf": {
	"before": 1000,
	"after": 1005,
	"ratio": 0.005,
	"rating": "small_change"
	},
	"testing/resources/test4.pdf": {
	"before": 1000,
	"after": 1000,
	"ratio": 0.0,
	"rating": "no_change"
	},
	"testing/resources/test5.pdf": {
	"before": 1000,
	"after": 600,
	"ratio": -0.4,
	"rating": "improvement"
	}
	}
	}
	"""
	output_dict = {}
	output_dict['version'] = 1
	output_dict['params'] = {'threshold': self.threshold_significant}
	output_dict['summary'] = self.summary.GetOutputDict()
	output_dict['comparison_by_case'] = {
	cr.case_name.decode('utf-8'): cr.GetOutputDict()
	for cr in self.GetCaseResults().values()
	}
	return output_dict


	class ComparisonSummary(object):
	"""Totals computed for a comparison."""

	def __init__(self):
	self.rating_counter = Counter()

	def ProcessCaseResult(self, case_result):
	self.rating_counter[case_result.rating] += 1

	def GetTotal(self):
	"""Gets the number of test cases processed."""
	return sum(self.rating_counter.values())

	def GetCount(self, rating):
	"""Gets the number of test cases processed with a given rating."""
	return self.rating_counter[rating]

	def GetOutputDict(self):
	"""Returns a dict that can be serialized with all the totals."""
	result = {'total': self.GetTotal()}
	for rating in RATINGS:
	result[rating] = self.GetCount(rating)
	return result


	class CaseResult(object):
	"""The conclusion for the comparison of a single test case."""

	def __init__(self, case_name, before, after, ratio, rating):
	"""Initializes an empty ComparisonConclusions.

	Args:
	case_name: String identifying the case.
	before: Measurement for the "before" version of the code.
	after: Measurement for the "after" version of the code.
	ratio: Difference between \|after\| and \|before\| as a fraction of \|before\|.
	rating: Rating for this test case.
	"""
	self.case_name = case_name
	self.before = before
	self.after = after
	self.ratio = ratio
	self.rating = rating

	def GetOutputDict(self):
	"""Returns a dict with the test case's conclusions."""
	return {
	'before': self.before,
	'after': self.after,
	'ratio': self.ratio,
	'rating': self.rating
	}


	def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
	"""Prints a conclusions dict in a human-readable way.

	Args:
	conclusions_dict: Dict to print.
	colored: Whether to color the output to highlight significant changes.
	key: String with the CaseResult dictionary key to sort the cases.
	"""
	# Print header
	print '=' * 80
	print '{0:>11s} {1:>15s} {2}'.format('% Change', 'Time after', 'Test case')
	print '-' * 80

	color = FORMAT_NORMAL

	# Print cases
	if key is not None:
	case_pairs = sorted(
	conclusions_dict['comparison_by_case'].iteritems(),
	key=lambda kv: kv[1][key])
	else:
	case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())

	for case_name, case_dict in case_pairs:
	if colored:
	color = RATING_TO_COLOR[case_dict['rating']]

	if case_dict['rating'] == RATING_FAILURE:
	print u'{} to measure time for {}'.format(
	color.format('Failed'), case_name).encode('utf-8')
	continue

	print u'{0} {1:15,d} {2}'.format(
	color.format('{:+11.4%}'.format(case_dict['ratio'])),
	case_dict['after'], case_name).encode('utf-8')

	# Print totals
	totals = conclusions_dict['summary']
	print '=' * 80
	print 'Test cases run: %d' % totals['total']

	if colored:
	color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
	print('Failed to measure: %s' % color.format(totals[RATING_FAILURE]))

	if colored:
	color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
	print('Regressions: %s' % color.format(totals[RATING_REGRESSION]))

	if colored:
	color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
	print('Improvements: %s' % color.format(totals[RATING_IMPROVEMENT]))