Make fixup_pdf_template.py Python 3 compatible.
Consistently use bytes everywhere.
Bug: pdfium:1674
Change-Id: I2ed14680954088ee3240bac7d957fbc8ae931dcc
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/79492
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/testing/tools/fixup_pdf_template.py b/testing/tools/fixup_pdf_template.py
index 91d9cf0..84c19d4 100755
--- a/testing/tools/fixup_pdf_template.py
+++ b/testing/tools/fixup_pdf_template.py
@@ -19,8 +19,7 @@
from __future__ import print_function
-# TODO(thestig): Figure out what to do with cStringIO.
-import cStringIO
+import io
import optparse
import os
import re
@@ -46,31 +45,31 @@
class TemplateProcessor:
- HEADER_TOKEN = '{{header}}'
- HEADER_REPLACEMENT = '%PDF-1.7\n%\xa0\xf2\xa4\xf4'
+ HEADER_TOKEN = b'{{header}}'
+ HEADER_REPLACEMENT = b'%PDF-1.7\n%\xa0\xf2\xa4\xf4'
- XREF_TOKEN = '{{xref}}'
- XREF_REPLACEMENT = 'xref\n%d %d\n'
+ XREF_TOKEN = b'{{xref}}'
+ XREF_REPLACEMENT = b'xref\n%d %d\n'
- XREF_REPLACEMENT_N = '%010d %05d n \n'
- XREF_REPLACEMENT_F = '0000000000 65535 f \n'
+ XREF_REPLACEMENT_N = b'%010d %05d n \n'
+ XREF_REPLACEMENT_F = b'0000000000 65535 f \n'
# XREF rows must be exactly 20 bytes - space required.
assert len(XREF_REPLACEMENT_F) == 20
- TRAILER_TOKEN = '{{trailer}}'
- TRAILER_REPLACEMENT = 'trailer <<\n /Root 1 0 R\n /Size %d\n>>'
+ TRAILER_TOKEN = b'{{trailer}}'
+ TRAILER_REPLACEMENT = b'trailer <<\n /Root 1 0 R\n /Size %d\n>>'
- TRAILERSIZE_TOKEN = '{{trailersize}}'
- TRAILERSIZE_REPLACEMENT = '/Size %d'
+ TRAILERSIZE_TOKEN = b'{{trailersize}}'
+ TRAILERSIZE_REPLACEMENT = b'/Size %d'
- STARTXREF_TOKEN = '{{startxref}}'
- STARTXREF_REPLACEMENT = 'startxref\n%d'
+ STARTXREF_TOKEN = b'{{startxref}}'
+ STARTXREF_REPLACEMENT = b'startxref\n%d'
- OBJECT_PATTERN = r'\{\{object\s+(\d+)\s+(\d+)\}\}'
- OBJECT_REPLACEMENT = r'\1 \2 obj'
+ OBJECT_PATTERN = b'\{\{object\s+(\d+)\s+(\d+)\}\}'
+ OBJECT_REPLACEMENT = b'\g<1> \g<2> obj'
- STREAMLEN_TOKEN = '{{streamlen}}'
- STREAMLEN_REPLACEMENT = '/Length %d'
+ STREAMLEN_TOKEN = b'{{streamlen}}'
+ STREAMLEN_REPLACEMENT = b'/Length %d'
def __init__(self):
self.streamlen_state = StreamLenState.START
@@ -101,12 +100,12 @@
return
if (self.streamlen_state == StreamLenState.FIND_STREAM and
- line.rstrip() == 'stream'):
+ line.rstrip() == b'stream'):
self.streamlen_state = StreamLenState.FIND_ENDSTREAM
return
if self.streamlen_state == StreamLenState.FIND_ENDSTREAM:
- if line.rstrip() == 'endstream':
+ if line.rstrip() == b'endstream':
self.streamlen_state = StreamLenState.START
else:
self.streamlens[-1] += len(line)
@@ -141,7 +140,7 @@
processor = TemplateProcessor()
try:
with open(output_path, 'wb') as outfile:
- preprocessed = cStringIO.StringIO()
+ preprocessed = io.BytesIO()
for line in infile:
preprocessed.write(line)
processor.preprocess_line(line)
@@ -161,16 +160,18 @@
try:
with open(input_path, 'rb') as infile:
for line in infile:
- match = re.match(r'\s*\{\{include\s+(.+)\}\}', line)
+ match = re.match(b'\s*\{\{include\s+(.+)\}\}', line)
if match:
insert_includes(
- os.path.join(os.path.dirname(input_path), match.group(1)),
- output_file, visited_set)
+ os.path.join(
+ os.path.dirname(input_path),
+ match.group(1).decode('utf-8')), output_file, visited_set)
else:
# Replace CRLF with LF line endings for .in files.
_, file_extension = os.path.splitext(input_path)
- if file_extension in EXTENSION_OVERRIDE_LINE_ENDINGS:
- line = line.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING)
+ if (file_extension in EXTENSION_OVERRIDE_LINE_ENDINGS and
+ line.endswith(WINDOWS_LINE_ENDING)):
+ line = line.removesuffix(WINDOWS_LINE_ENDING) + UNIX_LINE_ENDING
output_file.write(line)
except IOError:
print('failed to include %s' % input_path, file=sys.stderr)
@@ -188,7 +189,7 @@
output_dir = os.path.dirname(testcase_path)
if options.output_dir:
output_dir = options.output_dir
- intermediate_stream = cStringIO.StringIO()
+ intermediate_stream = io.BytesIO()
insert_includes(testcase_path, intermediate_stream, set())
intermediate_stream.seek(0)
output_path = os.path.join(output_dir, testcase_root + '.pdf')