Add basic /JPXDecode test cases
Adds test cases for the /JPXDecode filter that exercise all the basic
color spaces (gray, RGB, and CMYK), both with and without opacity. The
test file renders correctly in Adobe Acrobat Reader, but has issues in
most other PDF readers. The expectation file was created manually, based
on the PDF specification, and may not reflect our final implementation.
The files are included into the .in template using the {{include}}
mechanism, without additional filters. The gray and RGB files are in the
basic JP2 (.jp2) format, while the CMYK files are in JPX (.jpf) format,
as CMYK support is an extension.
This change includes a simple utility script, strip_jp2_comments.py, for
removing comment segments from a JPEG 2000 codestream.
Bug: pdfium:2000
Change-Id: I246ab1573b15089af9f2812d62b49ae67680d249
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/104710
Commit-Queue: K. Moon <kmoon@chromium.org>
Auto-Submit: K. Moon <kmoon@chromium.org>
Reviewed-by: Nigi <nigi@chromium.org>
diff --git a/testing/SUPPRESSIONS b/testing/SUPPRESSIONS
index b3409bf..a7a4fce 100644
--- a/testing/SUPPRESSIONS
+++ b/testing/SUPPRESSIONS
@@ -376,6 +376,9 @@
# TODO(chromium:983289): Remove after associated bug is fixed
bug_983289.in * * * agg
+# TODO(pdfium:1747): Remove after associated bug is fixed
+jpxdecode.in * * * *
+
# TODO(chromium:1028991): Remove after associated bug is fixed
reset_button.in * * * *
diff --git a/testing/resources/CMYK-alpha.jpf b/testing/resources/CMYK-alpha.jpf
new file mode 100644
index 0000000..538e6d6
--- /dev/null
+++ b/testing/resources/CMYK-alpha.jpf
Binary files differ
diff --git a/testing/resources/CMYK.jpf b/testing/resources/CMYK.jpf
new file mode 100644
index 0000000..00f2ef9
--- /dev/null
+++ b/testing/resources/CMYK.jpf
Binary files differ
diff --git a/testing/resources/RGB-alpha.jp2 b/testing/resources/RGB-alpha.jp2
new file mode 100644
index 0000000..4ab41da
--- /dev/null
+++ b/testing/resources/RGB-alpha.jp2
Binary files differ
diff --git a/testing/resources/RGB.jp2 b/testing/resources/RGB.jp2
new file mode 100644
index 0000000..7b7d428
--- /dev/null
+++ b/testing/resources/RGB.jp2
Binary files differ
diff --git a/testing/resources/gray-alpha.jp2 b/testing/resources/gray-alpha.jp2
new file mode 100644
index 0000000..f5e3f34
--- /dev/null
+++ b/testing/resources/gray-alpha.jp2
Binary files differ
diff --git a/testing/resources/gray.jp2 b/testing/resources/gray.jp2
new file mode 100644
index 0000000..53293bd
--- /dev/null
+++ b/testing/resources/gray.jp2
Binary files differ
diff --git a/testing/resources/pixel/jpxdecode.in b/testing/resources/pixel/jpxdecode.in
new file mode 100644
index 0000000..d69b058
--- /dev/null
+++ b/testing/resources/pixel/jpxdecode.in
@@ -0,0 +1,177 @@
+{{header}}
+{{object 1 0}} <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+{{object 2 0}} <<
+ /Type /Pages
+ /Count 1
+ /Kids [3 0 R]
+>>
+endobj
+{{object 3 0}} <<
+ /Type /Page
+ /Parent 2 0 R
+ /Contents 4 0 R
+ /MediaBox [0 0 88 128]
+ /Resources <<
+ /XObject <<
+ /ImGray 5 0 R
+ /ImGrayAlpha 6 0 R
+ /ImRGB 7 0 R
+ /ImRGBAlpha 8 0 R
+ /ImCMYK 9 0 R
+ /ImCMYKAlpha 10 0 R
+ >>
+ >>
+>>
+endobj
+{{object 4 0}} <<
+ {{streamlen}}
+>>
+stream
+
+% 50% gray background rectangle
+q
+ 0.5 0.5 0.5 rg
+ 0 0 88 128 re
+ f
+Q
+
+% grayscale, grayscale with alpha
+q
+ 32 0 0 32 8 88 cm
+ /ImGray Do
+Q
+q
+ 32 0 0 32 48 88 cm
+ /ImGrayAlpha Do
+Q
+
+% RGB, RGB with alpha
+q
+ 32 0 0 32 8 48 cm
+ /ImRGB Do
+Q
+q
+ 32 0 0 32 48 48 cm
+ /ImRGBAlpha Do
+Q
+
+% CMYK, CMYK with alpha
+q
+ 32 0 0 32 8 8 cm
+ /ImCMYK Do
+Q
+q
+ 32 0 0 32 48 8 cm
+ /ImCMYKAlpha Do
+Q
+
+endstream
+endobj
+
+% grayscale
+{{object 5 0}} <<
+ /Type /XObject
+ /Subtype /Image
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Filter /JPXDecode
+ /Height 4
+ /Width 4
+ {{streamlen}}
+>>
+stream
+{{include ../gray.jp2}}
+endstream
+endobj
+
+% grayscale with opacity
+{{object 6 0}} <<
+ /Type /XObject
+ /Subtype /Image
+ /BitsPerComponent 8
+ /ColorSpace /DeviceGray
+ /Filter /JPXDecode
+ /Height 4
+ /SMaskInData 1
+ /Width 4
+ {{streamlen}}
+>>
+stream
+{{include ../gray-alpha.jp2}}
+endstream
+endobj
+
+% RGB
+{{object 7 0}} <<
+ /Type /XObject
+ /Subtype /Image
+ /BitsPerComponent 8
+ /ColorSpace /DeviceRGB
+ /Filter /JPXDecode
+ /Height 4
+ /Width 4
+ {{streamlen}}
+>>
+stream
+{{include ../RGB.jp2}}
+endstream
+endobj
+
+% RGB with opacity
+{{object 8 0}} <<
+ /Type /XObject
+ /Subtype /Image
+ /BitsPerComponent 8
+ /ColorSpace /DeviceRGB
+ /Filter /JPXDecode
+ /Height 4
+ /SMaskInData 1
+ /Width 4
+ {{streamlen}}
+>>
+stream
+{{include ../RGB-alpha.jp2}}
+endstream
+endobj
+
+% CMYK
+{{object 9 0}} <<
+ /Type /XObject
+ /Subtype /Image
+ /BitsPerComponent 8
+ /ColorSpace /DeviceCMYK
+ /Filter /JPXDecode
+ /Height 4
+ /Width 4
+ {{streamlen}}
+>>
+stream
+{{include ../CMYK.jpf}}
+endstream
+endobj
+
+% CMYK with opacity.
+{{object 10 0}} <<
+ /Type /XObject
+ /Subtype /Image
+ /BitsPerComponent 8
+ /ColorSpace /DeviceCMYK
+ /Filter /JPXDecode
+ /Height 4
+ /SMaskInData 1
+ /Width 4
+ {{streamlen}}
+>>
+stream
+{{include ../CMYK-alpha.jpf}}
+endstream
+endobj
+
+{{xref}}
+{{trailer}}
+{{startxref}}
+%%EOF
diff --git a/testing/resources/pixel/jpxdecode_expected.pdf.0.png b/testing/resources/pixel/jpxdecode_expected.pdf.0.png
new file mode 100644
index 0000000..9051cb0
--- /dev/null
+++ b/testing/resources/pixel/jpxdecode_expected.pdf.0.png
Binary files differ
diff --git a/testing/tools/strip_jp2_comments.py b/testing/tools/strip_jp2_comments.py
new file mode 100755
index 0000000..eb03cdc
--- /dev/null
+++ b/testing/tools/strip_jp2_comments.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+# Copyright 2023 The PDFium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Strips comments from a JP2 file.
+
+This is a simple filter script to strip comments from a JP2 file, in order to
+save a few bytes from the final file size.
+"""
+
+import struct
+import sys
+
+BOX_HEADER_SIZE = 8
+BOX_TAG_JP2C = b'jp2c'
+
+MARKER_SIZE = 2
+MARKER_START = 0xff
+MARKER_TAG_IGNORE = 0x00
+MARKER_TAG_COMMENT = 0x64
+MARKER_TAG_FILL = 0xff
+
+
+def parse_box(buffer, offset):
+ """Parses the next box in a JP2 file.
+
+ Args:
+ buffer: A buffer containing the JP2 file contents.
+ offset: The starting offset into the buffer.
+
+ Returns:
+ A tuple (next_offset, tag) where next_offset is the ending offset, and tag
+ is the type tag. The box contents will be buffer[offset + 8:next_offset].
+ """
+ length, tag = struct.unpack_from('>I4s', buffer, offset)
+ return offset + length, tag
+
+
+def parse_marker(buffer, offset):
+ """Parses the next marker in a codestream.
+
+ Args:
+ buffer: A buffer containing the codestream.
+ offset: The starting offset into the buffer.
+
+ Returns:
+ A tuple (next_offset, tag) where next_offset is the offset after the marker,
+ and tag is the type tag. If no marker was found, next_offset will point to
+ the end of the buffer, and tag will be None. A marker is always 2 bytes.
+ """
+ while True:
+ # Search for start of marker.
+ next_offset = buffer.find(MARKER_START, offset)
+ if next_offset == -1:
+ next_offset = len(buffer)
+ break
+ next_offset += 1
+
+ # Parse marker.
+ if next_offset == len(buffer):
+ break
+ tag = buffer[next_offset]
+ if tag == MARKER_TAG_FILL:
+ # Possible fill byte, reparse as start of marker.
+ continue
+ next_offset += 1
+
+ if tag == MARKER_TAG_IGNORE:
+ # Not a real marker.
+ continue
+ return next_offset, tag
+
+ return next_offset
+
+
+def rewrite_jp2c(buffer):
+ rewrite_buffer = bytearray(BOX_HEADER_SIZE)
+
+ offset = 0
+ start_offset = offset
+ while offset < len(buffer):
+ next_offset, marker = parse_marker(buffer, offset)
+ if marker == MARKER_TAG_COMMENT:
+ # Flush the codestream before the comment.
+ rewrite_buffer.extend(buffer[start_offset:next_offset - MARKER_SIZE])
+
+ # Find the next marker, skipping the comment.
+ next_offset, marker = parse_marker(buffer, next_offset)
+ if marker is not None:
+ # Reparse the marker.
+ next_offset -= MARKER_SIZE
+ start_offset = next_offset
+ else:
+ # Pass through other markers.
+ pass
+ offset = next_offset
+
+ # Flush the tail of the codestream.
+ rewrite_buffer.extend(buffer[start_offset:])
+
+ struct.pack_into('>I4s', rewrite_buffer, 0, len(rewrite_buffer), BOX_TAG_JP2C)
+ return rewrite_buffer
+
+
+def main(in_file, out_file):
+ buffer = in_file.read()
+
+ # Scan through JP2 boxes.
+ offset = 0
+ while offset < len(buffer):
+ next_offset, tag = parse_box(buffer, offset)
+ if tag == BOX_TAG_JP2C:
+ # Rewrite "jp2c" (codestream) box.
+ out_file.write(rewrite_jp2c(buffer[offset + BOX_HEADER_SIZE:next_offset]))
+ else:
+ # Pass through other boxes.
+ out_file.write(buffer[offset:next_offset])
+ offset = next_offset
+
+ out_file.flush()
+
+
+if __name__ == '__main__':
+ main(sys.stdin.buffer, sys.stdout.buffer)