| #!/usr/bin/env python3 |
| # Copyright 2023 The PDFium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Strips comments from a JP2 file. |
| |
| This is a simple filter script to strip comments from a JP2 file, in order to |
| save a few bytes from the final file size. |
| """ |
| |
| import struct |
| import sys |
| |
| BOX_HEADER_SIZE = 8 |
| BOX_TAG_JP2C = b'jp2c' |
| |
| MARKER_SIZE = 2 |
| MARKER_START = 0xff |
| MARKER_TAG_IGNORE = 0x00 |
| MARKER_TAG_COMMENT = 0x64 |
| MARKER_TAG_FILL = 0xff |
| |
| |
| def parse_box(buffer, offset): |
| """Parses the next box in a JP2 file. |
| |
| Args: |
| buffer: A buffer containing the JP2 file contents. |
| offset: The starting offset into the buffer. |
| |
| Returns: |
| A tuple (next_offset, tag) where next_offset is the ending offset, and tag |
| is the type tag. The box contents will be buffer[offset + 8:next_offset]. |
| """ |
| length, tag = struct.unpack_from('>I4s', buffer, offset) |
| return offset + length, tag |
| |
| |
| def parse_marker(buffer, offset): |
| """Parses the next marker in a codestream. |
| |
| Args: |
| buffer: A buffer containing the codestream. |
| offset: The starting offset into the buffer. |
| |
| Returns: |
| A tuple (next_offset, tag) where next_offset is the offset after the marker, |
| and tag is the type tag. If no marker was found, next_offset will point to |
| the end of the buffer, and tag will be None. A marker is always 2 bytes. |
| """ |
| while True: |
| # Search for start of marker. |
| next_offset = buffer.find(MARKER_START, offset) |
| if next_offset == -1: |
| next_offset = len(buffer) |
| break |
| next_offset += 1 |
| |
| # Parse marker. |
| if next_offset == len(buffer): |
| break |
| tag = buffer[next_offset] |
| if tag == MARKER_TAG_FILL: |
| # Possible fill byte, reparse as start of marker. |
| continue |
| next_offset += 1 |
| |
| if tag == MARKER_TAG_IGNORE: |
| # Not a real marker. |
| continue |
| return next_offset, tag |
| |
| return next_offset |
| |
| |
| def rewrite_jp2c(buffer): |
| rewrite_buffer = bytearray(BOX_HEADER_SIZE) |
| |
| offset = 0 |
| start_offset = offset |
| while offset < len(buffer): |
| next_offset, marker = parse_marker(buffer, offset) |
| if marker == MARKER_TAG_COMMENT: |
| # Flush the codestream before the comment. |
| rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE]) |
| |
| # Find the next marker, skipping the comment. |
| next_offset, marker = parse_marker(buffer, next_offset) |
| if marker is not None: |
| # Reparse the marker. |
| next_offset -= MARKER_SIZE |
| start_offset = next_offset |
| else: |
| # Pass through other markers. |
| pass |
| offset = next_offset |
| |
| # Flush the tail of the codestream. |
| rewrite_buffer.extend(buffer[start_offset:]) |
| |
| struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C) |
| return rewrite_buffer |
| |
| |
| def main(in_file, out_file): |
| buffer = in_file.read() |
| |
| # Scan through JP2 boxes. |
| offset = 0 |
| while offset < len(buffer): |
| next_offset, tag = parse_box(buffer, offset) |
| if tag == BOX_TAG_JP2C: |
| # Rewrite "jp2c" (codestream) box. |
| out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset])) |
| else: |
| # Pass through other boxes. |
| out_file.write(buffer[offset:next_offset]) |
| offset = next_offset |
| |
| out_file.flush() |
| |
| |
| if __name__ == '__main__': |
| main(sys.stdin.buffer, sys.stdout.buffer) |