|  | #!/usr/bin/env python | 
|  | # Copyright 2014 The PDFium Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  | """Expands a hand-written PDF testcase (template) into a valid PDF file. | 
|  |  | 
|  | There are several places in a PDF file where byte-offsets are required. This | 
|  | script replaces {{name}}-style variables in the input with calculated results | 
|  |  | 
|  | {{include path/to/file}} - inserts file's contents into stream. | 
|  | {{header}} - expands to the header comment required for PDF files. | 
|  | {{xref}} - expands to a generated xref table, noting the offset. | 
|  | {{trailer}} - expands to a standard trailer with "1 0 R" as the /Root. | 
|  | {{startxref} - expands to a startxref directive followed by correct offset. | 
|  | {{object x y}} - expands to |x y obj| declaration, noting the offset. | 
|  | {{streamlen}} - expands to |/Length n|. | 
|  | """ | 
|  |  | 
|  | import cStringIO | 
|  | import optparse | 
|  | import os | 
|  | import re | 
|  | import sys | 
|  |  | 
|  |  | 
|  | class StreamLenState: | 
|  | START = 1 | 
|  | FIND_STREAM = 2 | 
|  | FIND_ENDSTREAM = 3 | 
|  |  | 
|  |  | 
|  | class TemplateProcessor: | 
|  | HEADER_TOKEN = '{{header}}' | 
|  | HEADER_REPLACEMENT = '%PDF-1.7\n%\xa0\xf2\xa4\xf4' | 
|  |  | 
|  | XREF_TOKEN = '{{xref}}' | 
|  | XREF_REPLACEMENT = 'xref\n%d %d\n' | 
|  |  | 
|  | XREF_REPLACEMENT_N = '%010d %05d n \n' | 
|  | XREF_REPLACEMENT_F = '0000000000 65535 f \n' | 
|  | # XREF rows must be exactly 20 bytes - space required. | 
|  | assert len(XREF_REPLACEMENT_F) == 20 | 
|  |  | 
|  | TRAILER_TOKEN = '{{trailer}}' | 
|  | TRAILER_REPLACEMENT = 'trailer <<\n  /Root 1 0 R\n  /Size %d\n>>' | 
|  |  | 
|  | STARTXREF_TOKEN = '{{startxref}}' | 
|  | STARTXREF_REPLACEMENT = 'startxref\n%d' | 
|  |  | 
|  | OBJECT_PATTERN = r'\{\{object\s+(\d+)\s+(\d+)\}\}' | 
|  | OBJECT_REPLACEMENT = r'\1 \2 obj' | 
|  |  | 
|  | STREAMLEN_TOKEN = '{{streamlen}}' | 
|  | STREAMLEN_REPLACEMENT = '/Length %d' | 
|  |  | 
|  | def __init__(self): | 
|  | self.streamlen_state = StreamLenState.START | 
|  | self.streamlens = [] | 
|  | self.offset = 0 | 
|  | self.xref_offset = 0 | 
|  | self.max_object_number = 0 | 
|  | self.objects = {} | 
|  |  | 
|  | def insert_xref_entry(self, object_number, generation_number): | 
|  | self.objects[object_number] = (self.offset, generation_number) | 
|  | self.max_object_number = max(self.max_object_number, object_number) | 
|  |  | 
|  | def generate_xref_table(self): | 
|  | result = self.XREF_REPLACEMENT % (0, self.max_object_number + 1) | 
|  | for i in range(0, self.max_object_number + 1): | 
|  | if i in self.objects: | 
|  | result += self.XREF_REPLACEMENT_N % self.objects[i] | 
|  | else: | 
|  | result += self.XREF_REPLACEMENT_F | 
|  | return result | 
|  |  | 
|  | def preprocess_line(self, line): | 
|  | if self.STREAMLEN_TOKEN in line: | 
|  | assert self.streamlen_state == StreamLenState.START | 
|  | self.streamlen_state = StreamLenState.FIND_STREAM | 
|  | self.streamlens.append(0) | 
|  | return | 
|  |  | 
|  | if (self.streamlen_state == StreamLenState.FIND_STREAM and | 
|  | line.rstrip() == 'stream'): | 
|  | self.streamlen_state = StreamLenState.FIND_ENDSTREAM | 
|  | return | 
|  |  | 
|  | if self.streamlen_state == StreamLenState.FIND_ENDSTREAM: | 
|  | if line.rstrip() == 'endstream': | 
|  | self.streamlen_state = StreamLenState.START | 
|  | else: | 
|  | self.streamlens[-1] += len(line) | 
|  |  | 
|  | def process_line(self, line): | 
|  | if self.HEADER_TOKEN in line: | 
|  | line = line.replace(self.HEADER_TOKEN, self.HEADER_REPLACEMENT) | 
|  | if self.STREAMLEN_TOKEN in line: | 
|  | sub = self.STREAMLEN_REPLACEMENT % self.streamlens.pop(0) | 
|  | line = re.sub(self.STREAMLEN_TOKEN, sub, line) | 
|  | if self.XREF_TOKEN in line: | 
|  | self.xref_offset = self.offset | 
|  | line = self.generate_xref_table() | 
|  | if self.TRAILER_TOKEN in line: | 
|  | replacement = self.TRAILER_REPLACEMENT % (self.max_object_number + 1) | 
|  | line = line.replace(self.TRAILER_TOKEN, replacement) | 
|  | if self.STARTXREF_TOKEN in line: | 
|  | replacement = self.STARTXREF_REPLACEMENT % self.xref_offset | 
|  | line = line.replace(self.STARTXREF_TOKEN, replacement) | 
|  | match = re.match(self.OBJECT_PATTERN, line) | 
|  | if match: | 
|  | self.insert_xref_entry(int(match.group(1)), int(match.group(2))) | 
|  | line = re.sub(self.OBJECT_PATTERN, self.OBJECT_REPLACEMENT, line) | 
|  | self.offset += len(line) | 
|  | return line | 
|  |  | 
|  |  | 
|  | def expand_file(infile, output_path): | 
|  | processor = TemplateProcessor() | 
|  | try: | 
|  | with open(output_path, 'wb') as outfile: | 
|  | preprocessed = cStringIO.StringIO() | 
|  | for line in infile: | 
|  | preprocessed.write(line) | 
|  | processor.preprocess_line(line) | 
|  | preprocessed.seek(0) | 
|  | for line in preprocessed: | 
|  | outfile.write(processor.process_line(line)) | 
|  | except IOError: | 
|  | print >> sys.stderr, 'failed to process %s' % input_path | 
|  |  | 
|  |  | 
|  | def insert_includes(input_path, output_file, visited_set): | 
|  | input_path = os.path.normpath(input_path) | 
|  | if input_path in visited_set: | 
|  | print >> sys.stderr, 'Circular inclusion %s, ignoring' % input_path | 
|  | return | 
|  | visited_set.add(input_path) | 
|  | try: | 
|  | with open(input_path, 'rb') as infile: | 
|  | for line in infile: | 
|  | match = re.match(r'\s*\{\{include\s+(.+)\}\}', line) | 
|  | if match: | 
|  | insert_includes( | 
|  | os.path.join(os.path.dirname(input_path), match.group(1)), | 
|  | output_file, visited_set) | 
|  | else: | 
|  | output_file.write(line) | 
|  | except IOError: | 
|  | print >> sys.stderr, 'failed to include %s' % input_path | 
|  | raise | 
|  | visited_set.discard(input_path) | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | parser = optparse.OptionParser() | 
|  | parser.add_option('--output-dir', default='') | 
|  | options, args = parser.parse_args() | 
|  | for testcase_path in args: | 
|  | testcase_filename = os.path.basename(testcase_path) | 
|  | testcase_root, _ = os.path.splitext(testcase_filename) | 
|  | output_dir = os.path.dirname(testcase_path) | 
|  | if options.output_dir: | 
|  | output_dir = options.output_dir | 
|  | intermediate_stream = cStringIO.StringIO() | 
|  | insert_includes(testcase_path, intermediate_stream, set()) | 
|  | intermediate_stream.seek(0) | 
|  | output_path = os.path.join(output_dir, testcase_root + '.pdf') | 
|  | expand_file(intermediate_stream, output_path) | 
|  | return 0 | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | sys.exit(main()) |