Update OpenJPEG to 2.5.3
Change-Id: Ib25f76a918592adc76cce1ac2ee4290c0bb3c7d5
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/127130
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
diff --git a/third_party/libopenjpeg/0003-dwt-decode.patch b/third_party/libopenjpeg/0003-dwt-decode.patch
index 3d43a89..5c76809 100644
--- a/third_party/libopenjpeg/0003-dwt-decode.patch
+++ b/third_party/libopenjpeg/0003-dwt-decode.patch
@@ -1,5 +1,5 @@
diff --git a/third_party/libopenjpeg/dwt.c b/third_party/libopenjpeg/dwt.c
-index 4164ba090..a36b7ed10 100644
+index 11aae472d..731e7b2c9 100644
--- a/third_party/libopenjpeg/dwt.c
+++ b/third_party/libopenjpeg/dwt.c
@@ -63,9 +63,6 @@
@@ -9,10 +9,10 @@
-#define OPJ_WS(i) v->mem[(i)*2]
-#define OPJ_WD(i) v->mem[(1+(i)*2)]
-
- #ifdef __AVX2__
- /** Number of int32 values in a AVX2 register */
- #define VREG_INT_COUNT 8
-@@ -82,6 +79,7 @@
+ #if defined(__AVX512F__)
+ /** Number of int32 values in a AVX512 register */
+ #define VREG_INT_COUNT 16
+@@ -85,6 +82,7 @@
typedef struct dwt_local {
OPJ_INT32* mem;
@@ -20,7 +20,7 @@
OPJ_INT32 dn; /* number of elements in high pass band */
OPJ_INT32 sn; /* number of elements in low pass band */
OPJ_INT32 cas; /* 0 = start on even coord, 1 = start on odd coord */
-@@ -140,7 +138,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
+@@ -143,7 +141,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
Inverse wavelet transform in 2-D.
*/
static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
@@ -29,7 +29,7 @@
static OPJ_BOOL opj_dwt_decode_partial_tile(
opj_tcd_tilecomp_t* tilec,
-@@ -181,13 +179,20 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
+@@ -184,13 +182,20 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
/*@}*/
@@ -56,7 +56,7 @@
/* <summary> */
/* This table contains the norms of the 5-3 wavelets for different bands. */
-@@ -296,8 +301,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
+@@ -299,8 +304,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
/* <summary> */
/* Inverse 5-3 wavelet transform in 1-D. */
/* </summary> */
@@ -67,7 +67,7 @@
{
OPJ_INT32 i;
-@@ -326,7 +331,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+@@ -329,7 +334,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
static void opj_dwt_decode_1(const opj_dwt_t *v)
{
@@ -76,7 +76,7 @@
}
#endif /* STANDARD_SLOW_VERSION */
-@@ -2062,7 +2067,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
+@@ -2275,7 +2280,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
/* Inverse wavelet transform in 2-D. */
/* </summary> */
static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
@@ -85,7 +85,7 @@
{
opj_dwt_t h;
opj_dwt_t v;
-@@ -2084,22 +2089,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
+@@ -2299,22 +2304,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
return OPJ_TRUE;
}
num_threads = opj_thread_pool_get_thread_count(tp);
@@ -112,7 +112,7 @@
v.mem = h.mem;
while (--numres) {
-@@ -2277,7 +2283,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
+@@ -2492,7 +2498,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
OPJ_UNUSED(ret);
}
@@ -122,7 +122,7 @@
OPJ_INT32 cas,
OPJ_INT32 win_l_x0,
OPJ_INT32 win_l_x1,
-@@ -2657,16 +2664,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+@@ -2880,16 +2887,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
opj_sparse_array_int32_free(sa);
return OPJ_TRUE;
}
@@ -142,7 +142,7 @@
h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
if (! h.mem) {
/* FIXME event manager error callback */
-@@ -2674,6 +2681,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+@@ -2897,6 +2904,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
return OPJ_FALSE;
}
@@ -150,7 +150,7 @@
v.mem = h.mem;
for (resno = 1; resno < numres; resno ++) {
-@@ -2784,7 +2792,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+@@ -3007,7 +3015,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
win_ll_x1,
win_hl_x0,
win_hl_x1);
diff --git a/third_party/libopenjpeg/0046-func-ptr-mixup.patch b/third_party/libopenjpeg/0046-func-ptr-mixup.patch
index cdd2288..2df25f1 100644
--- a/third_party/libopenjpeg/0046-func-ptr-mixup.patch
+++ b/third_party/libopenjpeg/0046-func-ptr-mixup.patch
@@ -1,8 +1,8 @@
diff --git a/third_party/libopenjpeg/j2k.c b/third_party/libopenjpeg/j2k.c
-index 9b06e7ec8..e2e048760 100644
+index c56ec72c1..5e9d75076 100644
--- a/third_party/libopenjpeg/j2k.c
+++ b/third_party/libopenjpeg/j2k.c
-@@ -6685,8 +6685,9 @@ static OPJ_BOOL opj_j2k_read_cpf(opj_j2k_t *p_j2k,
+@@ -6749,8 +6749,9 @@ static OPJ_BOOL opj_j2k_read_cpf(opj_j2k_t *p_j2k,
/* J2K / JPT decoder interface */
/* ----------------------------------------------------------------------- */
@@ -13,7 +13,7 @@
if (j2k && parameters) {
j2k->m_cp.m_specific_param.m_dec.m_layer = parameters->cp_layer;
j2k->m_cp.m_specific_param.m_dec.m_reduce = parameters->cp_reduce;
-@@ -6700,15 +6701,17 @@ void opj_j2k_setup_decoder(opj_j2k_t *j2k, opj_dparameters_t *parameters)
+@@ -6764,8 +6765,9 @@ void opj_j2k_setup_decoder(opj_j2k_t *j2k, opj_dparameters_t *parameters)
}
}
@@ -23,6 +23,8 @@
+ opj_j2k_t* j2k = (opj_j2k_t*)p_j2k;
if (j2k) {
j2k->m_cp.strict = strict;
+ if (strict) {
+@@ -6774,8 +6776,9 @@ void opj_j2k_decoder_set_strict_mode(opj_j2k_t *j2k, OPJ_BOOL strict)
}
}
@@ -33,7 +35,7 @@
/* Currently we pass the thread-pool to the tcd, so we cannot re-set it */
/* afterwards */
if (opj_has_thread_support() && j2k->m_tcd == NULL) {
-@@ -7613,11 +7616,12 @@ static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters,
+@@ -7680,11 +7683,12 @@ static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters,
}
@@ -47,8 +49,8 @@
OPJ_UINT32 i, j, tileno, numpocs_tile;
opj_cp_t *cp = 00;
OPJ_UINT32 cblkw, cblkh;
-@@ -7666,10 +7670,10 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
- return OPJ_FALSE;
+@@ -7754,10 +7758,10 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+ }
}
- p_j2k->m_specific_param.m_encoder.m_nb_comps = image->numcomps;
@@ -60,7 +62,7 @@
/* set default values for cp */
cp->tw = 1;
-@@ -7834,7 +7838,7 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+@@ -7922,7 +7926,7 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
}
if (OPJ_IS_CINEMA(parameters->rsiz) || OPJ_IS_IMF(parameters->rsiz)) {
@@ -69,7 +71,7 @@
}
/* Manage profiles and applications and set RSIZ */
-@@ -8379,7 +8383,7 @@ static OPJ_BOOL opj_j2k_add_tlmarker(OPJ_UINT32 tileno,
+@@ -8478,7 +8482,7 @@ static OPJ_BOOL opj_j2k_add_tlmarker(OPJ_UINT32 tileno,
* -----------------------------------------------------------------------
*/
@@ -78,7 +80,7 @@
opj_stream_private_t *p_stream,
opj_event_mgr_t * p_manager
)
-@@ -8391,10 +8395,11 @@ OPJ_BOOL opj_j2k_end_decompress(opj_j2k_t *p_j2k,
+@@ -8490,10 +8494,11 @@ OPJ_BOOL opj_j2k_end_decompress(opj_j2k_t *p_j2k,
}
OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream,
@@ -91,7 +93,7 @@
/* preconditions */
assert(p_j2k != 00);
assert(p_stream != 00);
-@@ -9178,8 +9183,9 @@ static const opj_dec_memory_marker_handler_t * opj_j2k_get_marker_handler(
+@@ -9354,8 +9359,9 @@ static const opj_dec_memory_marker_handler_t * opj_j2k_get_marker_handler(
return e;
}
@@ -102,7 +104,7 @@
if (p_j2k == 00) {
return;
}
-@@ -9518,7 +9524,7 @@ static OPJ_BOOL opj_j2k_need_nb_tile_parts_correction(opj_stream_private_t
+@@ -9700,7 +9706,7 @@ static OPJ_BOOL opj_j2k_need_nb_tile_parts_correction(opj_stream_private_t
return OPJ_TRUE;
}
@@ -111,7 +113,7 @@
OPJ_UINT32 * p_tile_index,
OPJ_UINT32 * p_data_size,
OPJ_INT32 * p_tile_x0, OPJ_INT32 * p_tile_y0,
-@@ -9528,6 +9534,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+@@ -9710,6 +9716,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
opj_stream_private_t *p_stream,
opj_event_mgr_t * p_manager)
{
@@ -119,7 +121,7 @@
OPJ_UINT32 l_current_marker = J2K_MS_SOT;
OPJ_UINT32 l_marker_size;
const opj_dec_memory_marker_handler_t * l_marker_handler = 00;
-@@ -9827,13 +9834,14 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+@@ -10112,13 +10119,14 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
return OPJ_TRUE;
}
@@ -135,7 +137,7 @@
OPJ_UINT32 l_current_marker;
OPJ_BYTE l_data [2];
opj_tcp_t * l_tcp;
-@@ -10200,11 +10208,12 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image,
+@@ -10485,11 +10493,12 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image,
return OPJ_TRUE;
}
@@ -149,7 +151,7 @@
OPJ_UINT32 i;
OPJ_BOOL* already_mapped;
-@@ -10260,12 +10269,13 @@ OPJ_BOOL opj_j2k_set_decoded_components(opj_j2k_t *p_j2k,
+@@ -10545,12 +10554,13 @@ OPJ_BOOL opj_j2k_set_decoded_components(opj_j2k_t *p_j2k,
}
@@ -164,7 +166,7 @@
opj_cp_t * l_cp = &(p_j2k->m_cp);
opj_image_t * l_image = p_j2k->m_private_image;
OPJ_BOOL ret;
-@@ -11200,8 +11210,9 @@ static void opj_j2k_dump_tile_info(opj_tcp_t * l_default_tile,
+@@ -11493,8 +11503,9 @@ static void opj_j2k_dump_tile_info(opj_tcp_t * l_default_tile,
}
}
@@ -175,7 +177,7 @@
/* Check if the flag is compatible with j2k file*/
if ((flag & OPJ_JP2_INFO) || (flag & OPJ_JP2_IND)) {
fprintf(out_stream, "Wrong flag\n");
-@@ -11391,8 +11402,9 @@ void j2k_dump_image_comp_header(opj_image_comp_t* comp_header,
+@@ -11698,8 +11709,9 @@ void j2k_dump_image_comp_header(opj_image_comp_t* comp_header,
}
}
@@ -186,7 +188,7 @@
OPJ_UINT32 compno;
OPJ_UINT32 numcomps = p_j2k->m_private_image->numcomps;
opj_tcp_t *l_default_tile;
-@@ -11467,8 +11479,9 @@ opj_codestream_info_v2_t* j2k_get_cstr_info(opj_j2k_t* p_j2k)
+@@ -11774,8 +11786,9 @@ opj_codestream_info_v2_t* j2k_get_cstr_info(opj_j2k_t* p_j2k)
return cstr_info;
}
@@ -197,7 +199,7 @@
opj_codestream_index_t* l_cstr_index = (opj_codestream_index_t*)
opj_calloc(1, sizeof(opj_codestream_index_t));
if (!l_cstr_index) {
-@@ -11972,11 +11985,12 @@ static OPJ_BOOL opj_j2k_move_data_from_codec_to_output_image(opj_j2k_t * p_j2k,
+@@ -12390,11 +12403,12 @@ static OPJ_BOOL opj_j2k_move_data_from_codec_to_output_image(opj_j2k_t * p_j2k,
return OPJ_TRUE;
}
@@ -211,7 +213,7 @@
if (!p_image) {
return OPJ_FALSE;
}
-@@ -12030,12 +12044,13 @@ OPJ_BOOL opj_j2k_decode(opj_j2k_t * p_j2k,
+@@ -12448,12 +12462,13 @@ OPJ_BOOL opj_j2k_decode(opj_j2k_t * p_j2k,
return opj_j2k_move_data_from_codec_to_output_image(p_j2k, p_image);
}
@@ -226,7 +228,7 @@
OPJ_UINT32 compno;
OPJ_UINT32 l_tile_x, l_tile_y;
opj_image_comp_t* l_img_comp;
-@@ -12143,10 +12158,11 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k,
+@@ -12561,10 +12576,11 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k,
return opj_j2k_move_data_from_codec_to_output_image(p_j2k, p_image);
}
@@ -239,7 +241,7 @@
OPJ_UINT32 it_comp;
p_j2k->m_cp.m_specific_param.m_dec.m_reduce = res_factor;
-@@ -12177,10 +12193,11 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k,
+@@ -12595,10 +12611,11 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k,
/* ----------------------------------------------------------------------- */
OPJ_BOOL opj_j2k_encoder_set_extra_options(
@@ -252,7 +254,7 @@
const char* const* p_option_iter;
if (p_options == NULL) {
-@@ -12239,10 +12256,11 @@ OPJ_BOOL opj_j2k_encoder_set_extra_options(
+@@ -12657,10 +12674,11 @@ OPJ_BOOL opj_j2k_encoder_set_extra_options(
/* ----------------------------------------------------------------------- */
@@ -265,7 +267,7 @@
OPJ_UINT32 i, j;
OPJ_UINT32 l_nb_tiles;
OPJ_SIZE_T l_max_tile_size = 0, l_current_tile_size;
-@@ -12347,10 +12365,11 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
+@@ -12765,10 +12783,11 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
return OPJ_TRUE;
}
@@ -278,7 +280,7 @@
/* customization of the encoding */
if (! opj_j2k_setup_end_compress(p_j2k, p_manager)) {
return OPJ_FALSE;
-@@ -12363,11 +12382,12 @@ OPJ_BOOL opj_j2k_end_compress(opj_j2k_t *p_j2k,
+@@ -12781,11 +12800,12 @@ OPJ_BOOL opj_j2k_end_compress(opj_j2k_t *p_j2k,
return OPJ_TRUE;
}
@@ -292,7 +294,7 @@
/* preconditions */
assert(p_j2k != 00);
assert(p_stream != 00);
-@@ -13154,13 +13174,14 @@ static OPJ_BOOL opj_j2k_create_tcd(opj_j2k_t *p_j2k,
+@@ -13571,13 +13591,14 @@ static OPJ_BOOL opj_j2k_create_tcd(opj_j2k_t *p_j2k,
return OPJ_TRUE;
}
@@ -309,10 +311,10 @@
opj_event_msg(p_manager, EVT_ERROR,
"Error while opj_j2k_pre_write_tile with tile index = %d\n", p_tile_index);
diff --git a/third_party/libopenjpeg/j2k.h b/third_party/libopenjpeg/j2k.h
-index 04fba645a..1d824c019 100644
+index bcf70a419..01baf4750 100644
--- a/third_party/libopenjpeg/j2k.h
+++ b/third_party/libopenjpeg/j2k.h
-@@ -621,15 +621,15 @@ opj_j2k_t;
+@@ -658,15 +658,15 @@ opj_j2k_t;
/**
Setup the decoder decoding parameters using user parameters.
@@ -333,7 +335,7 @@
/**
* Creates a J2K compression structure
-@@ -639,7 +639,7 @@ OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads);
+@@ -676,7 +676,7 @@ OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads);
opj_j2k_t* opj_j2k_create_compress(void);
@@ -342,7 +344,7 @@
opj_cparameters_t *parameters,
opj_image_t *image,
opj_event_mgr_t * p_manager);
-@@ -658,7 +658,7 @@ const char *opj_j2k_convert_progression_order(OPJ_PROG_ORDER prg_order);
+@@ -695,7 +695,7 @@ const char *opj_j2k_convert_progression_order(OPJ_PROG_ORDER prg_order);
* Ends the decompression procedures and possibiliy add data to be read after the
* codestream.
*/
@@ -351,7 +353,7 @@
opj_stream_private_t *p_stream,
opj_event_mgr_t * p_manager);
-@@ -666,14 +666,14 @@ OPJ_BOOL opj_j2k_end_decompress(opj_j2k_t *j2k,
+@@ -703,14 +703,14 @@ OPJ_BOOL opj_j2k_end_decompress(opj_j2k_t *j2k,
* Reads a jpeg2000 codestream header structure.
*
* @param p_stream the stream to read data from.
@@ -368,7 +370,7 @@
opj_image_t** p_image,
opj_event_mgr_t* p_manager);
-@@ -681,9 +681,9 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream,
+@@ -718,9 +718,9 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream,
/**
* Destroys a jpeg2000 codec.
*
@@ -380,7 +382,7 @@
/**
* Destroys a codestream index structure.
-@@ -694,14 +694,14 @@ void j2k_destroy_cstr_index(opj_codestream_index_t *p_cstr_ind);
+@@ -731,14 +731,14 @@ void j2k_destroy_cstr_index(opj_codestream_index_t *p_cstr_ind);
/**
* Decode tile data.
@@ -397,7 +399,7 @@
OPJ_UINT32 p_tile_index,
OPJ_BYTE * p_data,
OPJ_UINT32 p_data_size,
-@@ -710,7 +710,7 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k,
+@@ -747,7 +747,7 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k,
/**
* Reads a tile header.
@@ -406,7 +408,7 @@
* @param p_tile_index FIXME DOC
* @param p_data_size FIXME DOC
* @param p_tile_x0 FIXME DOC
-@@ -722,7 +722,7 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k,
+@@ -759,7 +759,7 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k,
* @param p_stream the stream to write data to.
* @param p_manager the user event manager.
*/
@@ -415,7 +417,7 @@
OPJ_UINT32 * p_tile_index,
OPJ_UINT32 * p_data_size,
OPJ_INT32 * p_tile_x0,
-@@ -737,7 +737,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+@@ -774,7 +774,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
/** Sets the indices of the components to decode.
*
@@ -424,7 +426,7 @@
* @param numcomps Number of components to decode.
* @param comps_indices Array of num_compts indices (numbering starting at 0)
* corresponding to the components to decode.
-@@ -745,7 +745,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+@@ -782,7 +782,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
*
* @return OPJ_TRUE in case of success.
*/
@@ -433,7 +435,7 @@
OPJ_UINT32 numcomps,
const OPJ_UINT32* comps_indices,
opj_event_mgr_t * p_manager);
-@@ -753,7 +753,7 @@ OPJ_BOOL opj_j2k_set_decoded_components(opj_j2k_t *p_j2k,
+@@ -790,7 +790,7 @@ OPJ_BOOL opj_j2k_set_decoded_components(opj_j2k_t *p_j2k,
/**
* Sets the given area to be decoded. This function should be called right after opj_read_header and before any tile header reading.
*
@@ -442,7 +444,7 @@
* @param p_image FIXME DOC
* @param p_start_x the left position of the rectangle to decode (in image coordinates).
* @param p_start_y the up position of the rectangle to decode (in image coordinates).
-@@ -763,7 +763,7 @@ OPJ_BOOL opj_j2k_set_decoded_components(opj_j2k_t *p_j2k,
+@@ -800,7 +800,7 @@ OPJ_BOOL opj_j2k_set_decoded_components(opj_j2k_t *p_j2k,
*
* @return true if the area could be set.
*/
@@ -451,7 +453,7 @@
opj_image_t* p_image,
OPJ_INT32 p_start_x, OPJ_INT32 p_start_y,
OPJ_INT32 p_end_x, OPJ_INT32 p_end_y,
-@@ -780,12 +780,12 @@ opj_j2k_t* opj_j2k_create_decompress(void);
+@@ -817,12 +817,12 @@ opj_j2k_t* opj_j2k_create_decompress(void);
/**
* Dump some elements from the J2K decompression structure .
*
@@ -466,7 +468,7 @@
-@@ -812,20 +812,20 @@ void j2k_dump_image_comp_header(opj_image_comp_t* comp, OPJ_BOOL dev_dump_flag,
+@@ -849,20 +849,20 @@ void j2k_dump_image_comp_header(opj_image_comp_t* comp, OPJ_BOOL dev_dump_flag,
/**
* Get the codestream info from a JPEG2000 codec.
*
@@ -491,7 +493,7 @@
/**
* Decode an image from a JPEG-2000 codestream
-@@ -835,46 +835,46 @@ opj_codestream_index_t* j2k_get_cstr_index(opj_j2k_t* p_j2k);
+@@ -872,46 +872,46 @@ opj_codestream_index_t* j2k_get_cstr_index(opj_j2k_t* p_j2k);
* @param p_manager FIXME DOC
* @return FIXME DOC
*/
@@ -545,7 +547,7 @@
OPJ_UINT32 p_tile_index,
OPJ_BYTE * p_data,
OPJ_UINT32 p_data_size,
-@@ -884,21 +884,21 @@ OPJ_BOOL opj_j2k_write_tile(opj_j2k_t * p_j2k,
+@@ -921,21 +921,21 @@ OPJ_BOOL opj_j2k_write_tile(opj_j2k_t * p_j2k,
/**
* Encodes an image into a JPEG-2000 codestream
*/
@@ -570,7 +572,7 @@
opj_stream_private_t *p_stream,
opj_image_t * p_image,
opj_event_mgr_t * p_manager);
-@@ -907,7 +907,7 @@ OPJ_BOOL opj_j2k_start_compress(opj_j2k_t *p_j2k,
+@@ -944,7 +944,7 @@ OPJ_BOOL opj_j2k_start_compress(opj_j2k_t *p_j2k,
* Ends the compression procedures and possibiliy add data to be read after the
* codestream.
*/
@@ -580,10 +582,10 @@
opj_event_mgr_t * p_manager);
diff --git a/third_party/libopenjpeg/jp2.c b/third_party/libopenjpeg/jp2.c
-index 44d0c98e5..6db728d18 100644
+index 9007c9736..6b4d5dbaa 100644
--- a/third_party/libopenjpeg/jp2.c
+++ b/third_party/libopenjpeg/jp2.c
-@@ -1609,11 +1609,12 @@ static OPJ_BOOL opj_jp2_read_colr(opj_jp2_t *jp2,
+@@ -1643,11 +1643,12 @@ static OPJ_BOOL opj_jp2_apply_color_postprocessing(opj_jp2_t *jp2,
return OPJ_TRUE;
}
@@ -597,7 +599,7 @@
if (!p_image) {
return OPJ_FALSE;
}
-@@ -1905,8 +1906,9 @@ static OPJ_BOOL opj_jp2_write_jp(opj_jp2_t *jp2,
+@@ -1891,8 +1892,9 @@ static OPJ_BOOL opj_jp2_write_jp(opj_jp2_t *jp2,
/* JP2 decoder interface */
/* ----------------------------------------------------------------------- */
@@ -608,7 +610,7 @@
/* setup the J2K codec */
opj_j2k_setup_decoder(jp2->j2k, parameters);
-@@ -1917,13 +1919,15 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters)
+@@ -1903,13 +1905,15 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters)
OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG;
}
@@ -626,7 +628,7 @@
return opj_j2k_set_threads(jp2->j2k, num_threads);
}
-@@ -1931,11 +1935,12 @@ OPJ_BOOL opj_jp2_set_threads(opj_jp2_t *jp2, OPJ_UINT32 num_threads)
+@@ -1917,11 +1921,12 @@ OPJ_BOOL opj_jp2_set_threads(opj_jp2_t *jp2, OPJ_UINT32 num_threads)
/* JP2 encoder interface */
/* ----------------------------------------------------------------------- */
@@ -640,7 +642,7 @@
OPJ_UINT32 i;
OPJ_UINT32 depth_0;
OPJ_UINT32 sign;
-@@ -2118,18 +2123,20 @@ OPJ_BOOL opj_jp2_setup_encoder(opj_jp2_t *jp2,
+@@ -2108,18 +2113,20 @@ OPJ_BOOL opj_jp2_setup_encoder(opj_jp2_t *jp2,
return OPJ_TRUE;
}
@@ -663,7 +665,7 @@
/* preconditions */
assert(jp2 != 00);
assert(cio != 00);
-@@ -2148,11 +2155,12 @@ OPJ_BOOL opj_jp2_end_decompress(opj_jp2_t *jp2,
+@@ -2138,11 +2145,12 @@ OPJ_BOOL opj_jp2_end_decompress(opj_jp2_t *jp2,
return opj_j2k_end_decompress(jp2->j2k, cio, p_manager);
}
@@ -677,7 +679,7 @@
/* preconditions */
assert(jp2 != 00);
assert(cio != 00);
-@@ -2476,12 +2484,13 @@ static OPJ_BOOL opj_jp2_exec(opj_jp2_t * jp2,
+@@ -2466,12 +2474,13 @@ static OPJ_BOOL opj_jp2_exec(opj_jp2_t * jp2,
return l_result;
}
@@ -692,7 +694,7 @@
/* preconditions */
assert(jp2 != 00);
assert(stream != 00);
-@@ -2849,11 +2849,12 @@ static OPJ_BOOL opj_jp2_read_boxhdr_char(opj_jp2_box_t *box,
+@@ -2844,11 +2853,12 @@ static OPJ_BOOL opj_jp2_read_boxhdr_char(opj_jp2_box_t *box,
}
OPJ_BOOL opj_jp2_read_header(opj_stream_private_t *p_stream,
@@ -706,7 +708,7 @@
int ret;
/* preconditions */
-@@ -2981,7 +2991,7 @@ static OPJ_BOOL opj_jp2_setup_header_reading(opj_jp2_t *jp2,
+@@ -2997,7 +3007,7 @@ static OPJ_BOOL opj_jp2_setup_header_reading(opj_jp2_t *jp2,
return OPJ_TRUE;
}
@@ -715,7 +717,7 @@
OPJ_UINT32 * p_tile_index,
OPJ_UINT32 * p_data_size,
OPJ_INT32 * p_tile_x0,
-@@ -2994,7 +3004,8 @@ OPJ_BOOL opj_jp2_read_tile_header(opj_jp2_t * p_jp2,
+@@ -3010,7 +3020,8 @@ OPJ_BOOL opj_jp2_read_tile_header(opj_jp2_t * p_jp2,
opj_event_mgr_t * p_manager
)
{
@@ -725,7 +727,7 @@
p_tile_index,
p_data_size,
p_tile_x0, p_tile_y0,
-@@ -3005,7 +3016,7 @@ OPJ_BOOL opj_jp2_read_tile_header(opj_jp2_t * p_jp2,
+@@ -3021,7 +3032,7 @@ OPJ_BOOL opj_jp2_read_tile_header(opj_jp2_t * p_jp2,
p_manager);
}
@@ -734,7 +736,7 @@
OPJ_UINT32 p_tile_index,
OPJ_BYTE * p_data,
OPJ_UINT32 p_data_size,
-@@ -3014,11 +3025,12 @@ OPJ_BOOL opj_jp2_write_tile(opj_jp2_t *p_jp2,
+@@ -3030,11 +3041,12 @@ OPJ_BOOL opj_jp2_write_tile(opj_jp2_t *p_jp2,
)
{
@@ -749,7 +751,7 @@
OPJ_UINT32 p_tile_index,
OPJ_BYTE * p_data,
OPJ_UINT32 p_data_size,
-@@ -3026,12 +3038,14 @@ OPJ_BOOL opj_jp2_decode_tile(opj_jp2_t * p_jp2,
+@@ -3042,12 +3054,14 @@ OPJ_BOOL opj_jp2_decode_tile(opj_jp2_t * p_jp2,
opj_event_mgr_t * p_manager
)
{
@@ -766,7 +768,7 @@
if (jp2) {
/* destroy the J2K codec */
opj_j2k_destroy(jp2->j2k);
-@@ -3098,34 +3112,37 @@ void opj_jp2_destroy(opj_jp2_t *jp2)
+@@ -3114,34 +3128,37 @@ void opj_jp2_destroy(opj_jp2_t *jp2)
}
}
@@ -809,7 +811,7 @@
if (!p_image) {
return OPJ_FALSE;
}
-@@ -3234,41 +3251,46 @@ opj_jp2_t* opj_jp2_create(OPJ_BOOL p_is_decoder)
+@@ -3204,41 +3221,46 @@ opj_jp2_t* opj_jp2_create(OPJ_BOOL p_is_decoder)
return jp2;
}
@@ -1118,10 +1120,10 @@
/*@}*/
diff --git a/third_party/libopenjpeg/openjpeg.c b/third_party/libopenjpeg/openjpeg.c
-index 29d3ee528..9dd4256d7 100644
+index 382d8f4f0..b8a4bfb0d 100644
--- a/third_party/libopenjpeg/openjpeg.c
+++ b/third_party/libopenjpeg/openjpeg.c
-@@ -189,85 +189,48 @@ opj_codec_t* OPJ_CALLCONV opj_create_decompress(OPJ_CODEC_FORMAT p_format)
+@@ -194,85 +194,48 @@ opj_codec_t* OPJ_CALLCONV opj_create_decompress(OPJ_CODEC_FORMAT p_format)
switch (p_format) {
case OPJ_CODEC_J2K:
@@ -1223,7 +1225,7 @@
l_codec->m_codec = opj_j2k_create_decompress();
-@@ -280,85 +243,47 @@ opj_codec_t* OPJ_CALLCONV opj_create_decompress(OPJ_CODEC_FORMAT p_format)
+@@ -285,85 +248,47 @@ opj_codec_t* OPJ_CALLCONV opj_create_decompress(OPJ_CODEC_FORMAT p_format)
case OPJ_CODEC_JP2:
/* get a JP2 decoder handle */
@@ -1325,7 +1327,7 @@
l_codec->m_codec = opj_jp2_create(OPJ_TRUE);
-@@ -662,41 +587,25 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format)
+@@ -667,41 +592,25 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format)
switch (p_format) {
case OPJ_CODEC_J2K:
@@ -1386,7 +1388,7 @@
l_codec->m_codec = opj_j2k_create_compress();
if (! l_codec->m_codec) {
-@@ -708,41 +617,25 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format)
+@@ -713,41 +622,25 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format)
case OPJ_CODEC_JP2:
/* get a JP2 decoder handle */
diff --git a/third_party/libopenjpeg/0047-validate_opj_stream_read_data.patch b/third_party/libopenjpeg/0047-validate_opj_stream_read_data.patch
deleted file mode 100644
index 25c3933..0000000
--- a/third_party/libopenjpeg/0047-validate_opj_stream_read_data.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-commit dea92eea8b6ab55f7eb542ea229b2c2124aa2124
-Author: Even Rouault <even.rouault@spatialys.com>
-Date: Fri Jun 21 15:08:24 2024 +0200
-
- opj_j2k_read_sod(): validate opj_stream_read_data() return to avoid potential later heap-buffer-overflow in in opj_t1_decode_cblk when disabling strict mode
-
- Fixes #1533
-
-diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
-index 293f7579..7cdeffd7 100644
---- a/src/lib/openjp2/j2k.c
-+++ b/src/lib/openjp2/j2k.c
-@@ -5059,6 +5059,11 @@ static OPJ_BOOL opj_j2k_read_sod(opj_j2k_t *p_j2k,
- }
-
- if (l_current_read_size != p_j2k->m_specific_param.m_decoder.m_sot_length) {
-+ if (l_current_read_size == (OPJ_SIZE_T)(-1)) {
-+ /* Avoid issue of https://github.com/uclouvain/openjpeg/issues/1533 */
-+ opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
-+ return OPJ_FALSE;
-+ }
- p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_NEOC;
- } else {
- p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT;
diff --git a/third_party/libopenjpeg/0048-check_corruption_non_strict_mode.patch b/third_party/libopenjpeg/0048-check_corruption_non_strict_mode.patch
deleted file mode 100644
index f7e597f..0000000
--- a/third_party/libopenjpeg/0048-check_corruption_non_strict_mode.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-commit f3b28c5ee417df9f23ca590b0e949d8a309408a0
-Author: Even Rouault <even.rouault@spatialys.com>
-Date: Mon Jun 24 21:11:21 2024 +0200
-
- Avoid heap-buffer-overflow read on corrupted image in non-strict mode
-
- Fixes #1535
-
-diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
-index df14ffc1..b5adbf2f 100644
---- a/src/lib/openjp2/t1.c
-+++ b/src/lib/openjp2/t1.c
-@@ -2006,10 +2006,16 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
- opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
- opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
-
-+ if (cblk->corrupted) {
-+ assert(cblk->numchunks == 0);
-+ return OPJ_TRUE;
-+ }
-+
- /* Even if we have a single chunk, in multi-threaded decoding */
- /* the insertion of our synthetic marker might potentially override */
- /* valid codestream of other codeblocks decoded in parallel. */
-- if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
-+ if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer &&
-+ cblk->numchunks > 0)) {
- OPJ_UINT32 i;
- OPJ_UINT32 cblk_len;
-
-diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c
-index 57353bf1..22f2e623 100644
---- a/src/lib/openjp2/t2.c
-+++ b/src/lib/openjp2/t2.c
-@@ -1407,18 +1407,21 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2,
- l_nb_code_blocks = l_prc->cw * l_prc->ch;
- l_cblk = l_prc->cblks.dec;
-
-- for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) {
-+ for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno, ++l_cblk) {
- opj_tcd_seg_t *l_seg = 00;
-
-- // if we have a partial data stream, set numchunks to zero
-- // since we have no data to actually decode.
-- if (partial_buffer) {
-- l_cblk->numchunks = 0;
-- }
--
- if (!l_cblk->numnewpasses) {
- /* nothing to do */
-- ++l_cblk;
-+ continue;
-+ }
-+
-+ if (partial_buffer || l_cblk->corrupted) {
-+ /* if a previous segment in this packet couldn't be decoded,
-+ * or if this code block was corrupted in a previous layer,
-+ * then mark it as corrupted.
-+ */
-+ l_cblk->numchunks = 0;
-+ l_cblk->corrupted = OPJ_TRUE;
- continue;
- }
-
-@@ -1451,18 +1454,13 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2,
- "read: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
- l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
- p_pi->compno);
-- // skip this codeblock since it is a partial read
-+ /* skip this codeblock (and following ones in this
-+ * packet) since it is a partial read
-+ */
- partial_buffer = OPJ_TRUE;
-+ l_cblk->corrupted = OPJ_TRUE;
- l_cblk->numchunks = 0;
--
-- l_seg->numpasses += l_seg->numnewpasses;
-- l_cblk->numnewpasses -= l_seg->numnewpasses;
-- if (l_cblk->numnewpasses > 0) {
-- ++l_seg;
-- ++l_cblk->numsegs;
-- break;
-- }
-- continue;
-+ break;
- }
- }
-
-@@ -1519,7 +1517,7 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2,
- } while (l_cblk->numnewpasses > 0);
-
- l_cblk->real_num_segs = l_cblk->numsegs;
-- ++l_cblk;
-+
- } /* next code_block */
-
- ++l_band;
-@@ -1603,6 +1601,8 @@ static OPJ_BOOL opj_t2_skip_packet_data(opj_t2_t* p_t2,
- "skip: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
- l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
- p_pi->compno);
-+
-+ *p_data_read = p_max_length;
- return OPJ_TRUE;
- }
- }
-diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h
-index cf4e0082..3371b08c 100644
---- a/src/lib/openjp2/tcd.h
-+++ b/src/lib/openjp2/tcd.h
-@@ -141,6 +141,7 @@ typedef struct opj_tcd_cblk_dec {
- OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */
- /* Decoded code-block. Only used for subtile decoding. Otherwise tilec->data is directly updated */
- OPJ_INT32* decoded_data;
-+ OPJ_BOOL corrupted; /* whether the code block data is corrupted */
- } opj_tcd_cblk_dec_t;
-
- /** Precinct structure */
diff --git a/third_party/libopenjpeg/README.pdfium b/third_party/libopenjpeg/README.pdfium
index 726574e..2bef41a 100644
--- a/third_party/libopenjpeg/README.pdfium
+++ b/third_party/libopenjpeg/README.pdfium
@@ -1,12 +1,12 @@
Name: OpenJPEG
URL: http://www.openjpeg.org/
-Version: 2.5.2 (also update in opj_config*)
-Revision: 39e8c50a2f9bdcf36810ee3d41bcbf1cc78968ae
+Version: 2.5.3 (also update in opj_config*)
+Revision: 210a8a5690d0da66f02d49420d7176a21ef409dc
Security Critical: yes
Shipped: yes
License: 2-clause BSD
License File: LICENSE
-CPEPrefix: cpe:/a:uclouvain:openjpeg:2.5.2
+CPEPrefix: cpe:/a:uclouvain:openjpeg:2.5.3
Description:
JPEG 2000 library.
@@ -32,5 +32,3 @@
0039-opj_mqc_renorme.patch: Remove unused opj_mqc_renorme().
0041-remove_opj_clock.patch: Remove unused opj_clock.h include.
0046-func-ptr-mixup.patch: Prevent mixing up function pointer types.
-0047-validate_opj_stream_read_data.patch: Avoid potential heap-buffer-overflow in non-strict mode.
-0048-check_corruption_non_strict_mode.patch: Avoid heap-buffer-overflow in non-strict mode.
diff --git a/third_party/libopenjpeg/dwt.c b/third_party/libopenjpeg/dwt.c
index b6f0e48..731e7b2 100644
--- a/third_party/libopenjpeg/dwt.c
+++ b/third_party/libopenjpeg/dwt.c
@@ -52,7 +52,7 @@
#ifdef __SSSE3__
#include <tmmintrin.h>
#endif
-#ifdef __AVX2__
+#if (defined(__AVX2__) || defined(__AVX512F__))
#include <immintrin.h>
#endif
@@ -63,7 +63,10 @@
/** @defgroup DWT DWT - Implementation of a discrete wavelet transform */
/*@{*/
-#ifdef __AVX2__
+#if defined(__AVX512F__)
+/** Number of int32 values in a AVX512 register */
+#define VREG_INT_COUNT 16
+#elif defined(__AVX2__)
/** Number of int32 values in a AVX2 register */
#define VREG_INT_COUNT 8
#else
@@ -336,6 +339,51 @@
#endif /* STANDARD_SLOW_VERSION */
+#if defined(__AVX512F__)
+static int32_t loop_short_sse(int32_t len, const int32_t** lf_ptr,
+ const int32_t** hf_ptr, int32_t** out_ptr,
+ int32_t* prev_even)
+{
+ int32_t next_even;
+ __m128i odd, even_m1, unpack1, unpack2;
+ const int32_t batch = (len - 2) / 8;
+ const __m128i two = _mm_set1_epi32(2);
+
+ for (int32_t i = 0; i < batch; i++) {
+ const __m128i lf_ = _mm_loadu_si128((__m128i*)(*lf_ptr + 1));
+ const __m128i hf1_ = _mm_loadu_si128((__m128i*)(*hf_ptr));
+ const __m128i hf2_ = _mm_loadu_si128((__m128i*)(*hf_ptr + 1));
+
+ __m128i even = _mm_add_epi32(hf1_, hf2_);
+ even = _mm_add_epi32(even, two);
+ even = _mm_srai_epi32(even, 2);
+ even = _mm_sub_epi32(lf_, even);
+
+ next_even = _mm_extract_epi32(even, 3);
+ even_m1 = _mm_bslli_si128(even, 4);
+ even_m1 = _mm_insert_epi32(even_m1, *prev_even, 0);
+
+ //out[0] + out[2]
+ odd = _mm_add_epi32(even_m1, even);
+ odd = _mm_srai_epi32(odd, 1);
+ odd = _mm_add_epi32(odd, hf1_);
+
+ unpack1 = _mm_unpacklo_epi32(even_m1, odd);
+ unpack2 = _mm_unpackhi_epi32(even_m1, odd);
+
+ _mm_storeu_si128((__m128i*)(*out_ptr + 0), unpack1);
+ _mm_storeu_si128((__m128i*)(*out_ptr + 4), unpack2);
+
+ *prev_even = next_even;
+
+ *out_ptr += 8;
+ *lf_ptr += 4;
+ *hf_ptr += 4;
+ }
+ return batch;
+}
+#endif
+
#if !defined(STANDARD_SLOW_VERSION)
static void opj_idwt53_h_cas0(OPJ_INT32* tmp,
const OPJ_INT32 sn,
@@ -369,6 +417,145 @@
tmp[len - 1] = in_odd[(len - 1) / 2] + tmp[len - 2];
}
#else
+#if defined(__AVX512F__)
+ OPJ_INT32* out_ptr = tmp;
+ int32_t prev_even = in_even[0] - ((in_odd[0] + 1) >> 1);
+
+ const __m512i permutevar_mask = _mm512_setr_epi32(
+ 0x10, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+ 0x0c, 0x0d, 0x0e);
+ const __m512i store1_perm = _mm512_setr_epi64(0x00, 0x01, 0x08, 0x09, 0x02,
+ 0x03, 0x0a, 0x0b);
+ const __m512i store2_perm = _mm512_setr_epi64(0x04, 0x05, 0x0c, 0x0d, 0x06,
+ 0x07, 0x0e, 0x0f);
+
+ const __m512i two = _mm512_set1_epi32(2);
+
+ int32_t simd_batch_512 = (len - 2) / 32;
+ int32_t leftover;
+
+ for (i = 0; i < simd_batch_512; i++) {
+ const __m512i lf_avx2 = _mm512_loadu_si512((__m512i*)(in_even + 1));
+ const __m512i hf1_avx2 = _mm512_loadu_si512((__m512i*)(in_odd));
+ const __m512i hf2_avx2 = _mm512_loadu_si512((__m512i*)(in_odd + 1));
+ int32_t next_even;
+ __m512i duplicate, even_m1, odd, unpack1, unpack2, store1, store2;
+
+ __m512i even = _mm512_add_epi32(hf1_avx2, hf2_avx2);
+ even = _mm512_add_epi32(even, two);
+ even = _mm512_srai_epi32(even, 2);
+ even = _mm512_sub_epi32(lf_avx2, even);
+
+ next_even = _mm_extract_epi32(_mm512_extracti32x4_epi32(even, 3), 3);
+
+ duplicate = _mm512_set1_epi32(prev_even);
+ even_m1 = _mm512_permutex2var_epi32(even, permutevar_mask, duplicate);
+
+ //out[0] + out[2]
+ odd = _mm512_add_epi32(even_m1, even);
+ odd = _mm512_srai_epi32(odd, 1);
+ odd = _mm512_add_epi32(odd, hf1_avx2);
+
+ unpack1 = _mm512_unpacklo_epi32(even_m1, odd);
+ unpack2 = _mm512_unpackhi_epi32(even_m1, odd);
+
+ store1 = _mm512_permutex2var_epi64(unpack1, store1_perm, unpack2);
+ store2 = _mm512_permutex2var_epi64(unpack1, store2_perm, unpack2);
+
+ _mm512_storeu_si512(out_ptr, store1);
+ _mm512_storeu_si512(out_ptr + 16, store2);
+
+ prev_even = next_even;
+
+ out_ptr += 32;
+ in_even += 16;
+ in_odd += 16;
+ }
+
+ leftover = len - simd_batch_512 * 32;
+ if (leftover > 8) {
+ leftover -= 8 * loop_short_sse(leftover, &in_even, &in_odd, &out_ptr,
+ &prev_even);
+ }
+ out_ptr[0] = prev_even;
+
+ for (j = 1; j < (leftover - 2); j += 2) {
+ out_ptr[2] = in_even[1] - ((in_odd[0] + (in_odd[1]) + 2) >> 2);
+ out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1);
+ in_even++;
+ in_odd++;
+ out_ptr += 2;
+ }
+
+ if (len & 1) {
+ out_ptr[2] = in_even[1] - ((in_odd[0] + 1) >> 1);
+ out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1);
+ } else { //!(len & 1)
+ out_ptr[1] = in_odd[0] + out_ptr[0];
+ }
+#elif defined(__AVX2__)
+ OPJ_INT32* out_ptr = tmp;
+ int32_t prev_even = in_even[0] - ((in_odd[0] + 1) >> 1);
+
+ const __m256i reg_permutevar_mask_move_right = _mm256_setr_epi32(0x00, 0x00,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+ const __m256i two = _mm256_set1_epi32(2);
+
+ int32_t simd_batch = (len - 2) / 16;
+ int32_t next_even;
+ __m256i even_m1, odd, unpack1_avx2, unpack2_avx2;
+
+ for (i = 0; i < simd_batch; i++) {
+ const __m256i lf_avx2 = _mm256_loadu_si256((__m256i*)(in_even + 1));
+ const __m256i hf1_avx2 = _mm256_loadu_si256((__m256i*)(in_odd));
+ const __m256i hf2_avx2 = _mm256_loadu_si256((__m256i*)(in_odd + 1));
+
+ __m256i even = _mm256_add_epi32(hf1_avx2, hf2_avx2);
+ even = _mm256_add_epi32(even, two);
+ even = _mm256_srai_epi32(even, 2);
+ even = _mm256_sub_epi32(lf_avx2, even);
+
+ next_even = _mm_extract_epi32(_mm256_extracti128_si256(even, 1), 3);
+ even_m1 = _mm256_permutevar8x32_epi32(even, reg_permutevar_mask_move_right);
+ even_m1 = _mm256_blend_epi32(even_m1, _mm256_set1_epi32(prev_even), (1 << 0));
+
+ //out[0] + out[2]
+ odd = _mm256_add_epi32(even_m1, even);
+ odd = _mm256_srai_epi32(odd, 1);
+ odd = _mm256_add_epi32(odd, hf1_avx2);
+
+ unpack1_avx2 = _mm256_unpacklo_epi32(even_m1, odd);
+ unpack2_avx2 = _mm256_unpackhi_epi32(even_m1, odd);
+
+ _mm_storeu_si128((__m128i*)(out_ptr + 0), _mm256_castsi256_si128(unpack1_avx2));
+ _mm_storeu_si128((__m128i*)(out_ptr + 4), _mm256_castsi256_si128(unpack2_avx2));
+ _mm_storeu_si128((__m128i*)(out_ptr + 8), _mm256_extracti128_si256(unpack1_avx2,
+ 0x1));
+ _mm_storeu_si128((__m128i*)(out_ptr + 12),
+ _mm256_extracti128_si256(unpack2_avx2, 0x1));
+
+ prev_even = next_even;
+
+ out_ptr += 16;
+ in_even += 8;
+ in_odd += 8;
+ }
+ out_ptr[0] = prev_even;
+ for (j = simd_batch * 16 + 1; j < (len - 2); j += 2) {
+ out_ptr[2] = in_even[1] - ((in_odd[0] + in_odd[1] + 2) >> 2);
+ out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1);
+ in_even++;
+ in_odd++;
+ out_ptr += 2;
+ }
+
+ if (len & 1) {
+ out_ptr[2] = in_even[1] - ((in_odd[0] + 1) >> 1);
+ out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1);
+ } else { //!(len & 1)
+ out_ptr[1] = in_odd[0] + out_ptr[0];
+ }
+#else
OPJ_INT32 d1c, d1n, s1n, s0c, s0n;
assert(len > 1);
@@ -402,7 +589,8 @@
} else {
tmp[len - 1] = d1n + s0n;
}
-#endif
+#endif /*(__AVX512F__ || __AVX2__)*/
+#endif /*TWO_PASS_VERSION*/
memcpy(tiledp, tmp, (OPJ_UINT32)len * sizeof(OPJ_INT32));
}
@@ -516,10 +704,20 @@
#endif
}
-#if (defined(__SSE2__) || defined(__AVX2__)) && !defined(STANDARD_SLOW_VERSION)
+#if (defined(__SSE2__) || defined(__AVX2__) || defined(__AVX512F__)) && !defined(STANDARD_SLOW_VERSION)
/* Conveniency macros to improve the readability of the formulas */
-#if __AVX2__
+#if defined(__AVX512F__)
+#define VREG __m512i
+#define LOAD_CST(x) _mm512_set1_epi32(x)
+#define LOAD(x) _mm512_loadu_si512((const VREG*)(x))
+#define LOADU(x) _mm512_loadu_si512((const VREG*)(x))
+#define STORE(x,y) _mm512_storeu_si512((VREG*)(x),(y))
+#define STOREU(x,y) _mm512_storeu_si512((VREG*)(x),(y))
+#define ADD(x,y) _mm512_add_epi32((x),(y))
+#define SUB(x,y) _mm512_sub_epi32((x),(y))
+#define SAR(x,y) _mm512_srai_epi32((x),(y))
+#elif defined(__AVX2__)
#define VREG __m256i
#define LOAD_CST(x) _mm256_set1_epi32(x)
#define LOAD(x) _mm256_load_si256((const VREG*)(x))
@@ -581,7 +779,10 @@
const VREG two = LOAD_CST(2);
assert(len > 1);
-#if __AVX2__
+#if defined(__AVX512F__)
+ assert(PARALLEL_COLS_53 == 32);
+ assert(VREG_INT_COUNT == 16);
+#elif defined(__AVX2__)
assert(PARALLEL_COLS_53 == 16);
assert(VREG_INT_COUNT == 8);
#else
@@ -589,10 +790,13 @@
assert(VREG_INT_COUNT == 4);
#endif
+//For AVX512 code aligned load/store is set to it's unaligned equivalents
+#if !defined(__AVX512F__)
/* Note: loads of input even/odd values must be done in a unaligned */
/* fashion. But stores in tmp can be done with aligned store, since */
/* the temporary buffer is properly aligned */
assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
+#endif
s1n_0 = LOADU(in_even + 0);
s1n_1 = LOADU(in_even + VREG_INT_COUNT);
@@ -683,7 +887,10 @@
const OPJ_INT32* in_odd = &tiledp_col[0];
assert(len > 2);
-#if __AVX2__
+#if defined(__AVX512F__)
+ assert(PARALLEL_COLS_53 == 32);
+ assert(VREG_INT_COUNT == 16);
+#elif defined(__AVX2__)
assert(PARALLEL_COLS_53 == 16);
assert(VREG_INT_COUNT == 8);
#else
@@ -691,10 +898,13 @@
assert(VREG_INT_COUNT == 4);
#endif
+//For AVX512 code aligned load/store is set to it's unaligned equivalents
+#if !defined(__AVX512F__)
/* Note: loads of input even/odd values must be done in a unaligned */
/* fashion. But stores in tmp can be done with aligned store, since */
/* the temporary buffer is properly aligned */
assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
+#endif
s1_0 = LOADU(in_even + stride);
/* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */
diff --git a/third_party/libopenjpeg/ht_dec.c b/third_party/libopenjpeg/ht_dec.c
index a554b24..2984f56 100644
--- a/third_party/libopenjpeg/ht_dec.c
+++ b/third_party/libopenjpeg/ht_dec.c
@@ -901,7 +901,7 @@
* X controls this value.
*
* Unstuffing prevent sequences that are more than 0xFF7F from appearing
- * in the conpressed sequence. So whenever a value of 0xFF is coded, the
+ * in the compressed sequence. So whenever a value of 0xFF is coded, the
* MSB of the next byte is set 0 and must be ignored during decoding.
*
* Reading can go beyond the end of buffer by up to 3 bytes.
@@ -1032,7 +1032,7 @@
//************************************************************************/
/** @brief Allocates T1 buffers
*
- * @param [in, out] t1 is codeblock cofficients storage
+ * @param [in, out] t1 is codeblock coefficients storage
* @param [in] w is codeblock width
* @param [in] h is codeblock height
*/
@@ -1120,7 +1120,7 @@
/** @brief Decodes one codeblock, processing the cleanup, siginificance
* propagation, and magnitude refinement pass
*
- * @param [in, out] t1 is codeblock cofficients storage
+ * @param [in, out] t1 is codeblock coefficients storage
* @param [in] cblk is codeblock properties
* @param [in] orient is the subband to which the codeblock belongs (not needed)
* @param [in] roishift is region of interest shift
diff --git a/third_party/libopenjpeg/j2k.c b/third_party/libopenjpeg/j2k.c
index 8b077d0..5e9d750 100644
--- a/third_party/libopenjpeg/j2k.c
+++ b/third_party/libopenjpeg/j2k.c
@@ -2484,6 +2484,11 @@
++l_current_tile_param;
}
+ /*Allocate and initialize some elements of codestrem index*/
+ if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) {
+ return OPJ_FALSE;
+ }
+
p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_MH;
opj_image_comp_header_update(l_image, l_cp);
@@ -3657,21 +3662,29 @@
opj_event_mgr_t * p_manager
)
{
- OPJ_UINT32 l_Ztlm, l_Stlm, l_ST, l_SP, l_tot_num_tp_remaining, l_quotient,
- l_Ptlm_size;
+ OPJ_UINT32 l_Ztlm, l_Stlm, l_ST, l_SP,
+ l_Ptlm_size, l_entry_size, l_num_tileparts;
+ OPJ_UINT32 i;
+ opj_j2k_tlm_tile_part_info_t* l_tile_part_infos;
+ opj_j2k_tlm_info_t* l_tlm;
+
/* preconditions */
assert(p_header_data != 00);
assert(p_j2k != 00);
assert(p_manager != 00);
- OPJ_UNUSED(p_j2k);
+ l_tlm = &(p_j2k->m_specific_param.m_decoder.m_tlm);
if (p_header_size < 2) {
- opj_event_msg(p_manager, EVT_ERROR, "Error reading TLM marker\n");
+ opj_event_msg(p_manager, EVT_ERROR, "Error reading TLM marker.\n");
return OPJ_FALSE;
}
p_header_size -= 2;
+ if (l_tlm->m_is_invalid) {
+ return OPJ_TRUE;
+ }
+
opj_read_bytes(p_header_data, &l_Ztlm,
1); /* Ztlm */
++p_header_data;
@@ -3680,27 +3693,83 @@
++p_header_data;
l_ST = ((l_Stlm >> 4) & 0x3);
+ if (l_ST == 3) {
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ opj_event_msg(p_manager, EVT_WARNING,
+ "opj_j2k_read_tlm(): ST = 3 is invalid.\n");
+ return OPJ_TRUE;
+ }
l_SP = (l_Stlm >> 6) & 0x1;
l_Ptlm_size = (l_SP + 1) * 2;
- l_quotient = l_Ptlm_size + l_ST;
+ l_entry_size = l_Ptlm_size + l_ST;
- l_tot_num_tp_remaining = p_header_size % l_quotient;
-
- if (l_tot_num_tp_remaining != 0) {
- opj_event_msg(p_manager, EVT_ERROR, "Error reading TLM marker\n");
- return OPJ_FALSE;
+ if ((p_header_size % l_entry_size) != 0) {
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ opj_event_msg(p_manager, EVT_WARNING,
+ "opj_j2k_read_tlm(): TLM marker not of expected size.\n");
+ return OPJ_TRUE;
}
- /* FIXME Do not care of this at the moment since only local variables are set here */
- /*
- for
- (i = 0; i < l_tot_num_tp; ++i)
- {
- opj_read_bytes(p_header_data,&l_Ttlm_i,l_ST); // Ttlm_i
+
+ l_num_tileparts = p_header_size / l_entry_size;
+ if (l_num_tileparts == 0) {
+ /* not totally sure if this is valid... */
+ return OPJ_TRUE;
+ }
+
+ /* Highly unlikely, unless there are gazillions of TLM markers */
+ if (l_tlm->m_entries_count > UINT32_MAX - l_num_tileparts ||
+ l_tlm->m_entries_count + l_num_tileparts > UINT32_MAX / sizeof(
+ opj_j2k_tlm_tile_part_info_t)) {
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ opj_event_msg(p_manager, EVT_WARNING,
+ "opj_j2k_read_tlm(): too many TLM markers.\n");
+ return OPJ_TRUE;
+ }
+
+ l_tile_part_infos = (opj_j2k_tlm_tile_part_info_t*)opj_realloc(
+ l_tlm->m_tile_part_infos,
+ (l_tlm->m_entries_count + l_num_tileparts) * sizeof(
+ opj_j2k_tlm_tile_part_info_t));
+ if (!l_tile_part_infos) {
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ opj_event_msg(p_manager, EVT_WARNING,
+ "opj_j2k_read_tlm(): cannot allocate m_tile_part_infos.\n");
+ return OPJ_TRUE;
+ }
+
+ l_tlm->m_tile_part_infos = l_tile_part_infos;
+
+ for (i = 0; i < l_num_tileparts; ++ i) {
+ OPJ_UINT32 l_tile_index;
+ OPJ_UINT32 l_length;
+
+ /* Read Ttlm_i */
+ if (l_ST == 0) {
+ l_tile_index = l_tlm->m_entries_count;
+ } else {
+ opj_read_bytes(p_header_data, &l_tile_index, l_ST);
p_header_data += l_ST;
- opj_read_bytes(p_header_data,&l_Ptlm_i,l_Ptlm_size); // Ptlm_i
- p_header_data += l_Ptlm_size;
- }*/
+ }
+
+ if (l_tile_index >= p_j2k->m_cp.tw * p_j2k->m_cp.th) {
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ opj_event_msg(p_manager, EVT_WARNING,
+ "opj_j2k_read_tlm(): invalid tile number %d\n",
+ l_tile_index);
+ return OPJ_TRUE;
+ }
+
+ /* Read Ptlm_i */
+ opj_read_bytes(p_header_data, &l_length, l_Ptlm_size);
+ p_header_data += l_Ptlm_size;
+
+ l_tile_part_infos[l_tlm->m_entries_count].m_tile_index =
+ (OPJ_UINT16)l_tile_index;
+ l_tile_part_infos[l_tlm->m_entries_count].m_length = l_length;
+ ++l_tlm->m_entries_count;
+ }
+
return OPJ_TRUE;
}
@@ -4583,14 +4652,26 @@
}
/* Index */
- if (p_j2k->cstr_index) {
+ {
assert(p_j2k->cstr_index->tile_index != 00);
p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].tileno =
p_j2k->m_current_tile_number;
p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].current_tpsno =
l_current_part;
- if (l_num_parts != 0) {
+ if (!p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid &&
+ l_num_parts >
+ p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].nb_tps) {
+ opj_event_msg(p_manager, EVT_WARNING,
+ "SOT marker for tile %u declares more tile-parts than found in TLM marker.",
+ p_j2k->m_current_tile_number);
+ p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid = OPJ_TRUE;
+ }
+
+ if (!p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid) {
+ /* do nothing */
+ } else if (l_num_parts != 0) {
+
p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].nb_tps =
l_num_parts;
p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].current_nb_tps =
@@ -4661,33 +4742,6 @@
}
- /* FIXME move this onto a separate method to call before reading any SOT, remove part about main_end header, use a index struct inside p_j2k */
- /* if (p_j2k->cstr_info) {
- if (l_tcp->first) {
- if (tileno == 0) {
- p_j2k->cstr_info->main_head_end = p_stream_tell(p_stream) - 13;
- }
-
- p_j2k->cstr_info->tile[tileno].tileno = tileno;
- p_j2k->cstr_info->tile[tileno].start_pos = p_stream_tell(p_stream) - 12;
- p_j2k->cstr_info->tile[tileno].end_pos = p_j2k->cstr_info->tile[tileno].start_pos + totlen - 1;
- p_j2k->cstr_info->tile[tileno].num_tps = numparts;
-
- if (numparts) {
- p_j2k->cstr_info->tile[tileno].tp = (opj_tp_info_t *) opj_malloc(numparts * sizeof(opj_tp_info_t));
- }
- else {
- p_j2k->cstr_info->tile[tileno].tp = (opj_tp_info_t *) opj_malloc(10 * sizeof(opj_tp_info_t)); // Fixme (10)
- }
- }
- else {
- p_j2k->cstr_info->tile[tileno].end_pos += totlen;
- }
-
- p_j2k->cstr_info->tile[tileno].tp[partno].tp_start_pos = p_stream_tell(p_stream) - 12;
- p_j2k->cstr_info->tile[tileno].tp[partno].tp_end_pos =
- p_j2k->cstr_info->tile[tileno].tp[partno].tp_start_pos + totlen - 1;
- }*/
return OPJ_TRUE;
}
@@ -5023,7 +5077,7 @@
/* Index */
l_cstr_index = p_j2k->cstr_index;
- if (l_cstr_index) {
+ {
OPJ_OFF_T l_current_pos = opj_stream_tell(p_stream) - 2;
OPJ_UINT32 l_current_tile_part =
@@ -6716,6 +6770,9 @@
opj_j2k_t* j2k = (opj_j2k_t*)p_j2k;
if (j2k) {
j2k->m_cp.strict = strict;
+ if (strict) {
+ j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked = 1;
+ }
}
}
@@ -8264,7 +8321,14 @@
tccp->qmfbid = parameters->irreversible ? 0 : 1;
tccp->qntsty = parameters->irreversible ? J2K_CCP_QNTSTY_SEQNT :
J2K_CCP_QNTSTY_NOQNT;
- tccp->numgbits = 2;
+
+ if (OPJ_IS_CINEMA(parameters->rsiz) &&
+ parameters->rsiz == OPJ_PROFILE_CINEMA_2K) {
+ /* From https://github.com/uclouvain/openjpeg/issues/1340 */
+ tccp->numgbits = 1;
+ } else {
+ tccp->numgbits = 2;
+ }
if ((OPJ_INT32)i == parameters->roi_compno) {
tccp->roishift = parameters->roi_shift;
@@ -8403,7 +8467,8 @@
if (type == J2K_MS_SOT) {
OPJ_UINT32 l_current_tile_part = cstr_index->tile_index[tileno].current_tpsno;
- if (cstr_index->tile_index[tileno].tp_index) {
+ if (cstr_index->tile_index[tileno].tp_index &&
+ l_current_tile_part < cstr_index->tile_index[tileno].nb_tps) {
cstr_index->tile_index[tileno].tp_index[l_current_tile_part].start_pos = pos;
}
@@ -8481,13 +8546,6 @@
/* Copy codestream image information to the output image */
opj_copy_image_header(p_j2k->m_private_image, *p_image);
- /*Allocate and initialize some elements of codestrem index*/
- if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) {
- opj_image_destroy(*p_image);
- *p_image = NULL;
- return OPJ_FALSE;
- }
-
return OPJ_TRUE;
}
@@ -8839,6 +8897,87 @@
return l_is_valid;
}
+/** Fill p_j2k->cstr_index->tp_index[].start_pos/end_pos fields from TLM marker segments */
+static void opj_j2k_build_tp_index_from_tlm(opj_j2k_t* p_j2k,
+ opj_event_mgr_t * p_manager)
+{
+ opj_j2k_tlm_info_t* l_tlm;
+ OPJ_UINT32 i;
+ OPJ_OFF_T l_cur_offset;
+
+ assert(p_j2k->cstr_index->main_head_end > 0);
+ assert(p_j2k->cstr_index->nb_of_tiles > 0);
+ assert(p_j2k->cstr_index->tile_index != NULL);
+
+ l_tlm = &(p_j2k->m_specific_param.m_decoder.m_tlm);
+
+ if (l_tlm->m_entries_count == 0) {
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ return;
+ }
+
+ if (l_tlm->m_is_invalid) {
+ return;
+ }
+
+ /* Initial pass to count the number of tile-parts per tile */
+ for (i = 0; i < l_tlm->m_entries_count; ++i) {
+ OPJ_UINT32 l_tile_index_no = l_tlm->m_tile_part_infos[i].m_tile_index;
+ assert(l_tile_index_no < p_j2k->cstr_index->nb_of_tiles);
+ p_j2k->cstr_index->tile_index[l_tile_index_no].tileno = l_tile_index_no;
+ ++p_j2k->cstr_index->tile_index[l_tile_index_no].current_nb_tps;
+ }
+
+ /* Now check that all tiles have at least one tile-part */
+ for (i = 0; i < p_j2k->cstr_index->nb_of_tiles; ++i) {
+ if (p_j2k->cstr_index->tile_index[i].current_nb_tps == 0) {
+ opj_event_msg(p_manager, EVT_ERROR,
+ "opj_j2k_build_tp_index_from_tlm(): tile %d has no "
+ "registered tile-part in TLM marker segments.\n", i);
+ goto error;
+ }
+ }
+
+ /* Final pass to fill p_j2k->cstr_index */
+ l_cur_offset = p_j2k->cstr_index->main_head_end;
+ for (i = 0; i < l_tlm->m_entries_count; ++i) {
+ OPJ_UINT32 l_tile_index_no = l_tlm->m_tile_part_infos[i].m_tile_index;
+ opj_tile_index_t* l_tile_index = &
+ (p_j2k->cstr_index->tile_index[l_tile_index_no]);
+ if (!l_tile_index->tp_index) {
+ l_tile_index->tp_index = (opj_tp_index_t *) opj_calloc(
+ l_tile_index->current_nb_tps, sizeof(opj_tp_index_t));
+ if (! l_tile_index->tp_index) {
+ opj_event_msg(p_manager, EVT_ERROR,
+ "opj_j2k_build_tp_index_from_tlm(): tile index allocation failed\n");
+ goto error;
+ }
+ }
+
+ assert(l_tile_index->nb_tps < l_tile_index->current_nb_tps);
+ l_tile_index->tp_index[l_tile_index->nb_tps].start_pos = l_cur_offset;
+ /* We don't know how to set the tp_index[].end_header field, but this is not really needed */
+ /* If there would be no markers between SOT and SOD, that would be : */
+ /* l_tile_index->tp_index[l_tile_index->nb_tps].end_header = l_cur_offset + 12; */
+ l_tile_index->tp_index[l_tile_index->nb_tps].end_pos = l_cur_offset +
+ l_tlm->m_tile_part_infos[i].m_length;
+ ++l_tile_index->nb_tps;
+
+ l_cur_offset += l_tlm->m_tile_part_infos[i].m_length;
+ }
+
+ return;
+
+error:
+ l_tlm->m_is_invalid = OPJ_TRUE;
+ for (i = 0; i < l_tlm->m_entries_count; ++i) {
+ OPJ_UINT32 l_tile_index = l_tlm->m_tile_part_infos[i].m_tile_index;
+ p_j2k->cstr_index->tile_index[l_tile_index].current_nb_tps = 0;
+ opj_free(p_j2k->cstr_index->tile_index[l_tile_index].tp_index);
+ p_j2k->cstr_index->tile_index[l_tile_index].tp_index = NULL;
+ }
+}
+
static OPJ_BOOL opj_j2k_read_header_procedure(opj_j2k_t *p_j2k,
opj_stream_private_t *p_stream,
opj_event_mgr_t * p_manager)
@@ -9018,6 +9157,9 @@
/* Position of the last element if the main header */
p_j2k->cstr_index->main_head_end = (OPJ_UINT32) opj_stream_tell(p_stream) - 2;
+ /* Build tile-part index from TLM information */
+ opj_j2k_build_tp_index_from_tlm(p_j2k, p_manager);
+
/* Next step: read a tile-part header */
p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT;
@@ -9242,6 +9384,12 @@
p_j2k->m_specific_param.m_decoder.m_comps_indices_to_decode = 00;
p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode = 0;
+ opj_free(p_j2k->m_specific_param.m_decoder.m_tlm.m_tile_part_infos);
+ p_j2k->m_specific_param.m_decoder.m_tlm.m_tile_part_infos = NULL;
+
+ opj_free(p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset);
+ p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset = NULL;
+
} else {
if (p_j2k->m_specific_param.m_encoder.m_encoded_tile_data) {
@@ -9593,6 +9741,39 @@
while ((!p_j2k->m_specific_param.m_decoder.m_can_decode) &&
(l_current_marker != J2K_MS_EOC)) {
+ if (p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts > 0 &&
+ p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts <
+ p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts) {
+ OPJ_OFF_T next_tp_sot_pos;
+
+ next_tp_sot_pos =
+ p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset[p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts];
+ ++p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts;
+ if (!(opj_stream_read_seek(p_stream,
+ next_tp_sot_pos,
+ p_manager))) {
+ opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n");
+ return OPJ_FALSE;
+ }
+
+ /* Try to read 2 bytes (the marker ID) from stream and copy them into the buffer */
+ if (opj_stream_read_data(p_stream,
+ p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) {
+ opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
+ return OPJ_FALSE;
+ }
+
+ /* Read 2 bytes from the buffer as the marker ID */
+ opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data,
+ &l_current_marker,
+ 2);
+
+ if (l_current_marker != J2K_MS_SOT) {
+ opj_event_msg(p_manager, EVT_ERROR, "Did not get expected SOT marker\n");
+ return OPJ_FALSE;
+ }
+ }
+
/* Try to read until the Start Of Data is detected */
while (l_current_marker != J2K_MS_SOD) {
@@ -9626,7 +9807,13 @@
}
/* Why this condition? FIXME */
- if (p_j2k->m_specific_param.m_decoder.m_state & J2K_STATE_TPH) {
+ if ((p_j2k->m_specific_param.m_decoder.m_state & J2K_STATE_TPH) &&
+ p_j2k->m_specific_param.m_decoder.m_sot_length != 0) {
+ if (p_j2k->m_specific_param.m_decoder.m_sot_length < l_marker_size + 2) {
+ opj_event_msg(p_manager, EVT_ERROR,
+ "Sot length is less than marker size + marker ID\n");
+ return OPJ_FALSE;
+ }
p_j2k->m_specific_param.m_decoder.m_sot_length -= (l_marker_size + 2);
}
l_marker_size -= 2; /* Subtract the size of the marker ID already read */
@@ -9736,14 +9923,78 @@
if (! opj_j2k_read_sod(p_j2k, p_stream, p_manager)) {
return OPJ_FALSE;
}
+
+ /* Check if we can use the TLM index to access the next tile-part */
+ if (!p_j2k->m_specific_param.m_decoder.m_can_decode &&
+ p_j2k->m_specific_param.m_decoder.m_tile_ind_to_dec >= 0 &&
+ p_j2k->m_current_tile_number == (OPJ_UINT32)
+ p_j2k->m_specific_param.m_decoder.m_tile_ind_to_dec &&
+ !p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid &&
+ opj_stream_has_seek(p_stream)) {
+ l_tcp = p_j2k->m_cp.tcps + p_j2k->m_current_tile_number;
+ if (l_tcp->m_nb_tile_parts ==
+ p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].nb_tps &&
+ (OPJ_UINT32)l_tcp->m_current_tile_part_number + 1 < l_tcp->m_nb_tile_parts) {
+ const OPJ_OFF_T next_tp_sot_pos = p_j2k->cstr_index->tile_index[
+ p_j2k->m_current_tile_number].tp_index[l_tcp->m_current_tile_part_number +
+ 1].start_pos;
+
+ if (next_tp_sot_pos != opj_stream_tell(p_stream)) {
+#if 0
+ opj_event_msg(p_manager, EVT_INFO,
+ "opj_j2k_read_tile_header(tile=%u): seek to tile part %u at %" PRId64 "\n",
+ p_j2k->m_current_tile_number,
+ l_tcp->m_current_tile_part_number + 1,
+ next_tp_sot_pos);
+#endif
+
+ if (!(opj_stream_read_seek(p_stream,
+ next_tp_sot_pos,
+ p_manager))) {
+ opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n");
+ return OPJ_FALSE;
+ }
+ }
+
+ /* Try to read 2 bytes (the marker ID) from stream and copy them into the buffer */
+ if (opj_stream_read_data(p_stream,
+ p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) {
+ opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
+ return OPJ_FALSE;
+ }
+
+ /* Read 2 bytes from the buffer as the marker ID */
+ opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data,
+ &l_current_marker,
+ 2);
+
+ if (l_current_marker != J2K_MS_SOT) {
+ opj_event_msg(p_manager, EVT_ERROR, "Did not get expected SOT marker\n");
+ return OPJ_FALSE;
+ }
+
+ continue;
+ }
+ }
+
if (p_j2k->m_specific_param.m_decoder.m_can_decode &&
!p_j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked) {
/* Issue 254 */
- OPJ_BOOL l_correction_needed;
+ OPJ_BOOL l_correction_needed = OPJ_FALSE;
p_j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked = 1;
- if (!opj_j2k_need_nb_tile_parts_correction(p_stream,
- p_j2k->m_current_tile_number, &l_correction_needed, p_manager)) {
+ if (p_j2k->m_cp.tcps[p_j2k->m_current_tile_number].m_nb_tile_parts == 1) {
+ /* Skip opj_j2k_need_nb_tile_parts_correction() if there is
+ * only a single tile part declared. The
+ * opj_j2k_need_nb_tile_parts_correction() hack was needed
+ * for files with 5 declared tileparts (where they were
+ * actually 6).
+ * Doing it systematically hurts performance when reading
+ * Sentinel2 L1C JPEG2000 files as explained in
+ * https://lists.osgeo.org/pipermail/gdal-dev/2024-November/059805.html
+ */
+ } else if (!opj_j2k_need_nb_tile_parts_correction(p_stream,
+ p_j2k->m_current_tile_number, &l_correction_needed, p_manager)) {
opj_event_msg(p_manager, EVT_ERROR,
"opj_j2k_apply_nb_tile_parts_correction error\n");
return OPJ_FALSE;
@@ -11335,6 +11586,17 @@
OPJ_UINT32 l_acc_nb_of_tile_part = 0;
for (it_tile = 0; it_tile < cstr_index->nb_of_tiles ; it_tile++) {
l_acc_nb_of_tile_part += cstr_index->tile_index[it_tile].nb_tps;
+
+ /* To avoid regenerating expected opj_dump results from the test */
+ /* suite when there is a TLM marker present */
+ if (cstr_index->tile_index[it_tile].nb_tps &&
+ cstr_index->tile_index[it_tile].tp_index &&
+ cstr_index->tile_index[it_tile].tp_index[0].start_pos > 0 &&
+ cstr_index->tile_index[it_tile].tp_index[0].end_header == 0 &&
+ getenv("OJP_DO_NOT_DISPLAY_TILE_INDEX_IF_TLM") != NULL) {
+ l_acc_nb_of_tile_part = 0;
+ break;
+ }
}
if (l_acc_nb_of_tile_part) {
@@ -11700,6 +11962,18 @@
return OPJ_TRUE;
}
+static int CompareOffT(const void* a, const void* b)
+{
+ const OPJ_OFF_T offA = *(const OPJ_OFF_T*)a;
+ const OPJ_OFF_T offB = *(const OPJ_OFF_T*)b;
+ if (offA < offB) {
+ return -1;
+ }
+ if (offA == offB) {
+ return 0;
+ }
+ return 1;
+}
static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
opj_stream_private_t *p_stream,
@@ -11710,6 +11984,7 @@
OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1;
OPJ_UINT32 l_nb_comps;
OPJ_UINT32 nr_tiles = 0;
+ OPJ_OFF_T end_pos = 0;
/* Particular case for whole single tile decoding */
/* We can avoid allocating intermediate tile buffers */
@@ -11732,8 +12007,9 @@
return OPJ_FALSE;
}
- if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0,
- p_stream, p_manager)) {
+ if (!l_go_on ||
+ ! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0,
+ p_stream, p_manager)) {
opj_event_msg(p_manager, EVT_ERROR, "Failed to decode tile 1/1\n");
return OPJ_FALSE;
}
@@ -11751,6 +12027,77 @@
return OPJ_TRUE;
}
+ p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts = 0;
+ p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts = 0;
+ opj_free(p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset);
+ p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset = NULL;
+
+ /* If the area to decode only intersects a subset of tiles, and we have
+ * valid TLM information, then use it to plan the tilepart offsets to
+ * seek to.
+ */
+ if (!(p_j2k->m_specific_param.m_decoder.m_start_tile_x == 0 &&
+ p_j2k->m_specific_param.m_decoder.m_start_tile_y == 0 &&
+ p_j2k->m_specific_param.m_decoder.m_end_tile_x == p_j2k->m_cp.tw &&
+ p_j2k->m_specific_param.m_decoder.m_end_tile_y == p_j2k->m_cp.th) &&
+ !p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid &&
+ opj_stream_has_seek(p_stream)) {
+ OPJ_UINT32 m_num_intersecting_tile_parts = 0;
+
+ OPJ_UINT32 j;
+ for (j = 0; j < p_j2k->m_cp.tw * p_j2k->m_cp.th; ++j) {
+ if (p_j2k->cstr_index->tile_index[j].nb_tps > 0 &&
+ p_j2k->cstr_index->tile_index[j].tp_index[
+ p_j2k->cstr_index->tile_index[j].nb_tps - 1].end_pos > end_pos) {
+ end_pos = p_j2k->cstr_index->tile_index[j].tp_index[
+ p_j2k->cstr_index->tile_index[j].nb_tps - 1].end_pos;
+ }
+ }
+
+ for (j = p_j2k->m_specific_param.m_decoder.m_start_tile_y;
+ j < p_j2k->m_specific_param.m_decoder.m_end_tile_y; ++j) {
+ OPJ_UINT32 i;
+ for (i = p_j2k->m_specific_param.m_decoder.m_start_tile_x;
+ i < p_j2k->m_specific_param.m_decoder.m_end_tile_x; ++i) {
+ const OPJ_UINT32 tile_number = j * p_j2k->m_cp.tw + i;
+ m_num_intersecting_tile_parts +=
+ p_j2k->cstr_index->tile_index[tile_number].nb_tps;
+ }
+ }
+
+ p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset =
+ (OPJ_OFF_T*)
+ opj_malloc(m_num_intersecting_tile_parts * sizeof(OPJ_OFF_T));
+ if (m_num_intersecting_tile_parts > 0 &&
+ p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset) {
+ OPJ_UINT32 idx = 0;
+ for (j = p_j2k->m_specific_param.m_decoder.m_start_tile_y;
+ j < p_j2k->m_specific_param.m_decoder.m_end_tile_y; ++j) {
+ OPJ_UINT32 i;
+ for (i = p_j2k->m_specific_param.m_decoder.m_start_tile_x;
+ i < p_j2k->m_specific_param.m_decoder.m_end_tile_x; ++i) {
+ const OPJ_UINT32 tile_number = j * p_j2k->m_cp.tw + i;
+ OPJ_UINT32 k;
+ for (k = 0; k < p_j2k->cstr_index->tile_index[tile_number].nb_tps; ++k) {
+ const OPJ_OFF_T next_tp_sot_pos =
+ p_j2k->cstr_index->tile_index[tile_number].tp_index[k].start_pos;
+ p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset[idx] =
+ next_tp_sot_pos;
+ ++idx;
+ }
+ }
+ }
+
+ p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts = idx;
+
+ /* Sort by increasing offset */
+ qsort(p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset,
+ p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts,
+ sizeof(OPJ_OFF_T),
+ CompareOffT);
+ }
+ }
+
for (;;) {
if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 &&
p_j2k->m_cp.tcps[0].m_data != NULL) {
@@ -11810,6 +12157,12 @@
if (++nr_tiles == p_j2k->m_cp.th * p_j2k->m_cp.tw) {
break;
}
+ if (p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts > 0 &&
+ p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts ==
+ p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts) {
+ opj_stream_seek(p_stream, end_pos + 2, p_manager);
+ break;
+ }
}
if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) {
@@ -11853,12 +12206,6 @@
OPJ_UINT32 l_nb_tiles;
OPJ_UINT32 i;
- /*Allocate and initialize some elements of codestrem index if not already done*/
- if (!p_j2k->cstr_index->tile_index) {
- if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) {
- return OPJ_FALSE;
- }
- }
/* Move into the codestream to the first SOT used to decode the desired tile */
l_tile_no_to_dec = (OPJ_UINT32)
p_j2k->m_specific_param.m_decoder.m_tile_ind_to_dec;
@@ -11873,12 +12220,38 @@
return OPJ_FALSE;
}
} else {
+ OPJ_OFF_T sot_pos =
+ p_j2k->cstr_index->tile_index[l_tile_no_to_dec].tp_index[0].start_pos;
+ OPJ_UINT32 l_marker;
+
+#if 0
+ opj_event_msg(p_manager, EVT_INFO,
+ "opj_j2k_decode_one_tile(%u): seek to %" PRId64 "\n",
+ l_tile_no_to_dec,
+ sot_pos);
+#endif
if (!(opj_stream_read_seek(p_stream,
- p_j2k->cstr_index->tile_index[l_tile_no_to_dec].tp_index[0].start_pos + 2,
+ sot_pos,
p_manager))) {
opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n");
return OPJ_FALSE;
}
+
+ /* Try to read 2 bytes (the marker ID) from stream and copy them into the buffer */
+ if (opj_stream_read_data(p_stream,
+ p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) {
+ opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
+ return OPJ_FALSE;
+ }
+
+ /* Read 2 bytes from the buffer as the marker ID */
+ opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, &l_marker,
+ 2);
+
+ if (l_marker != J2K_MS_SOT) {
+ opj_event_msg(p_manager, EVT_ERROR, "Did not get expected SOT marker\n");
+ return OPJ_FALSE;
+ }
}
/* Special case if we have previously read the EOC marker (if the previous tile getted is the last ) */
if (p_j2k->m_specific_param.m_decoder.m_state == J2K_STATE_EOC) {
diff --git a/third_party/libopenjpeg/j2k.h b/third_party/libopenjpeg/j2k.h
index 08dc35a..01baf47 100644
--- a/third_party/libopenjpeg/j2k.h
+++ b/third_party/libopenjpeg/j2k.h
@@ -466,6 +466,24 @@
/* <<UniPG */
} opj_cp_t;
+/** Entry of a TLM marker segment */
+typedef struct opj_j2k_tlm_tile_part_info {
+ /** Tile index of the tile part. Ttlmi field */
+ OPJ_UINT16 m_tile_index;
+ /** Length in bytes, from the beginning of the SOT marker to the end of
+ * the bit stream data for that tile-part. Ptlmi field */
+ OPJ_UINT32 m_length;
+} opj_j2k_tlm_tile_part_info_t;
+
+/** Information got from the concatenation of TLM marker semgnets. */
+typedef struct opj_j2k_tlm_info {
+ /** Number of entries in m_tile_part_infos. */
+ OPJ_UINT32 m_entries_count;
+ /** Array of m_entries_count values. */
+ opj_j2k_tlm_tile_part_info_t* m_tile_part_infos;
+
+ OPJ_BOOL m_is_invalid;
+} opj_j2k_tlm_info_t;
typedef struct opj_j2k_dec {
/** locate in which part of the codestream the decoder is (main header, tile header, end) */
@@ -499,6 +517,18 @@
OPJ_UINT32 m_numcomps_to_decode;
OPJ_UINT32 *m_comps_indices_to_decode;
+ opj_j2k_tlm_info_t m_tlm;
+
+ /** Below if used when there's TLM information available and we use
+ * opj_set_decoded_area() to a subset of all tiles.
+ */
+ /* Current index in m_intersecting_tile_parts_offset[] to seek to */
+ OPJ_UINT32 m_idx_intersecting_tile_parts;
+ /* Number of elements of m_intersecting_tile_parts_offset[] */
+ OPJ_UINT32 m_num_intersecting_tile_parts;
+ /* Start offset of contributing tile parts */
+ OPJ_OFF_T* m_intersecting_tile_parts_offset;
+
/** to tell that a tile can be decoded. */
OPJ_BITFIELD m_can_decode : 1;
OPJ_BITFIELD m_discard_tiles : 1;
diff --git a/third_party/libopenjpeg/jp2.c b/third_party/libopenjpeg/jp2.c
index d69178b..6b4d5db 100644
--- a/third_party/libopenjpeg/jp2.c
+++ b/third_party/libopenjpeg/jp2.c
@@ -2010,12 +2010,16 @@
jp2->enumcs = 0;
} else {
jp2->meth = 1;
- if (image->color_space == 1) {
+ if (image->color_space == OPJ_CLRSPC_SRGB) {
jp2->enumcs = 16; /* sRGB as defined by IEC 61966-2-1 */
- } else if (image->color_space == 2) {
- jp2->enumcs = 17; /* greyscale */
- } else if (image->color_space == 3) {
+ } else if (image->color_space == OPJ_CLRSPC_GRAY) {
+ jp2->enumcs = 17;
+ } else if (image->color_space == OPJ_CLRSPC_SYCC) {
jp2->enumcs = 18; /* YUV */
+ } else if (image->color_space == OPJ_CLRSPC_EYCC) {
+ jp2->enumcs = 24;
+ } else if (image->color_space == OPJ_CLRSPC_CMYK) {
+ jp2->enumcs = 12;
}
}
diff --git a/third_party/libopenjpeg/openjpeg.h b/third_party/libopenjpeg/openjpeg.h
index 67d168b..59abd32 100644
--- a/third_party/libopenjpeg/openjpeg.h
+++ b/third_party/libopenjpeg/openjpeg.h
@@ -546,7 +546,7 @@
} opj_cparameters_t;
#define OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG 0x0001
-#define OPJ_DPARAMETERS_DUMP_FLAG 0x0002
+#define OPJ_DPARAMETERS_DUMP_FLAG 0x0002
/**
* Decompression parameters
@@ -772,7 +772,7 @@
OPJ_OFF_T end_ph_pos;
/** packet end position */
OPJ_OFF_T end_pos;
- /** packet distorsion */
+ /** packet distortion */
double disto;
} opj_packet_info_t;
@@ -1348,9 +1348,13 @@
opj_dparameters_t *parameters);
/**
- * Set strict decoding parameter for this decoder. If strict decoding is enabled, partial bit
- * streams will fail to decode. If strict decoding is disabled, the decoder will decode partial
- * bitstreams as much as possible without erroring
+ * Set strict decoding parameter for this decoder.
+ * If strict decoding is enabled, partial bit streams will fail to decode, and
+ * the check for invalid TPSOT values added in https://github.com/uclouvain/openjpeg/pull/514
+ * will be disabled.
+ * If strict decoding is disabled, the decoder will decode partial
+ * bitstreams as much as possible without erroring, and the TPSOT fixing logic
+ * will be enabled.
*
* @param p_codec decompressor handler
* @param strict OPJ_TRUE to enable strict decoding, OPJ_FALSE to disable
diff --git a/third_party/libopenjpeg/opj_common.h b/third_party/libopenjpeg/opj_common.h
index ee8adf4..2923a35 100644
--- a/third_party/libopenjpeg/opj_common.h
+++ b/third_party/libopenjpeg/opj_common.h
@@ -28,8 +28,8 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef OPJ_COMMMON_H
-#define OPJ_COMMMON_H
+#ifndef OPJ_COMMON_H
+#define OPJ_COMMON_H
/*
==========================================================
@@ -44,4 +44,4 @@
#define OPJ_COMP_PARAM_DEFAULT_PROG_ORDER OPJ_LRCP
#define OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION 6
-#endif /* OPJ_COMMMON_H */
+#endif /* OPJ_COMMON_H */
diff --git a/third_party/libopenjpeg/opj_config.h b/third_party/libopenjpeg/opj_config.h
index b0c616f..5aefbfe 100644
--- a/third_party/libopenjpeg/opj_config.h
+++ b/third_party/libopenjpeg/opj_config.h
@@ -13,4 +13,4 @@
/* Version number. */
#define OPJ_VERSION_MAJOR 2
#define OPJ_VERSION_MINOR 5
-#define OPJ_VERSION_BUILD 2
+#define OPJ_VERSION_BUILD 3
diff --git a/third_party/libopenjpeg/opj_config_private.h b/third_party/libopenjpeg/opj_config_private.h
index 85a2969..74a8845 100644
--- a/third_party/libopenjpeg/opj_config_private.h
+++ b/third_party/libopenjpeg/opj_config_private.h
@@ -7,7 +7,7 @@
/* create opj_config_private.h for CMake */
#define OPJ_HAVE_INTTYPES_H 1
-#define OPJ_PACKAGE_VERSION "2.5.2"
+#define OPJ_PACKAGE_VERSION "2.5.3"
/* Not used by openjp2*/
/*#define HAVE_MEMORY_H 1*/
diff --git a/third_party/libopenjpeg/t1.c b/third_party/libopenjpeg/t1.c
index ecc3355..98dce47 100644
--- a/third_party/libopenjpeg/t1.c
+++ b/third_party/libopenjpeg/t1.c
@@ -47,6 +47,9 @@
#ifdef __SSE2__
#include <emmintrin.h>
#endif
+#if (defined(__AVX2__) || defined(__AVX512F__))
+#include <immintrin.h>
+#endif
#if defined(__GNUC__)
#pragma GCC poison malloc calloc realloc free
@@ -1796,6 +1799,39 @@
OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
(OPJ_SIZE_T)x];
for (j = 0; j < cblk_h; ++j) {
+ //positive -> round down aka. (83)/2 = 41.5 -> 41
+ //negative -> round up aka. (-83)/2 = -41.5 -> -41
+#if defined(__AVX512F__)
+ OPJ_INT32* ptr_in = datap + (j * cblk_w);
+ OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
+ for (i = 0; i < cblk_w / 16; ++i) {
+ __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in));
+ const __m512i add_avx = _mm512_srli_epi32(in_avx, 31);
+ in_avx = _mm512_add_epi32(in_avx, add_avx);
+ _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1));
+ ptr_in += 16;
+ ptr_out += 16;
+ }
+
+ for (i = 0; i < cblk_w % 16; ++i) {
+ ptr_out[i] = ptr_in[i] / 2;
+ }
+#elif defined(__AVX2__)
+ OPJ_INT32* ptr_in = datap + (j * cblk_w);
+ OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w);
+ for (i = 0; i < cblk_w / 8; ++i) {
+ __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in));
+ const __m256i add_avx = _mm256_srli_epi32(in_avx, 31);
+ in_avx = _mm256_add_epi32(in_avx, add_avx);
+ _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1));
+ ptr_in += 8;
+ ptr_out += 8;
+ }
+
+ for (i = 0; i < cblk_w % 8; ++i) {
+ ptr_out[i] = ptr_in[i] / 2;
+ }
+#else
i = 0;
for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
@@ -1811,6 +1847,7 @@
OPJ_INT32 tmp = datap[(j * cblk_w) + i];
((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
}
+#endif
}
} else { /* if (tccp->qmfbid == 0) */
const float stepsize = 0.5f * band->stepsize;
@@ -2130,7 +2167,7 @@
opj_mutex_lock(p_manager_mutex);
}
opj_event_msg(p_manager, EVT_WARNING,
- "PTERM check failure: %d synthetized 0xFF markers read\n",
+ "PTERM check failure: %d synthesized 0xFF markers read\n",
mqc->end_of_byte_stream_counter);
if (p_manager_mutex) {
opj_mutex_unlock(p_manager_mutex);
@@ -2233,6 +2270,111 @@
OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
/* Change from "natural" order to "zigzag" order of T1 passes */
for (j = 0; j < (cblk_h & ~3U); j += 4) {
+#if defined(__AVX512F__)
+ const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13);
+ const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0);
+ OPJ_UINT32* ptr = tiledp_u;
+ for (i = 0; i < cblk_w / 16; ++i) {
+ // INPUT OUTPUT
+ // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
+ // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B
+ // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F
+ __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
+ (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
+ __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
+ (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
+ __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
+ (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
+ __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr +
+ (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
+
+ __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2);
+ __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4);
+ __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2);
+ __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4);
+
+ in1 = _mm512_unpacklo_epi64(tmp1, tmp2);
+ in2 = _mm512_unpacklo_epi64(tmp3, tmp4);
+ in3 = _mm512_unpackhi_epi64(tmp1, tmp2);
+ in4 = _mm512_unpackhi_epi64(tmp3, tmp4);
+
+ _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1));
+ _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3));
+ _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2));
+ _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4));
+
+ tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3);
+ tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4);
+
+ _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1));
+ _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2));
+ _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1,
+ 0x1));
+ _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2,
+ 0x1));
+ _mm256_storeu_si256((__m256i*)(t1data + 48),
+ _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3)));
+ _mm256_storeu_si256((__m256i*)(t1data + 56),
+ _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4)));
+ t1data += 64;
+ ptr += 16;
+ }
+ for (i = 0; i < cblk_w % 16; ++i) {
+ t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data += 4;
+ ptr += 1;
+ }
+#elif defined(__AVX2__)
+ OPJ_UINT32* ptr = tiledp_u;
+ for (i = 0; i < cblk_w / 8; ++i) {
+ // INPUT OUTPUT
+ // 00 01 02 03 04 05 06 07 00 10 20 30 01 11 21 31
+ // 10 11 12 13 14 15 16 17 02 12 22 32 03 13 23 33
+ // 20 21 22 23 24 25 26 27 04 14 24 34 05 15 25 35
+ // 30 31 32 33 34 35 36 37 06 16 26 36 07 17 27 37
+ __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
+ (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS);
+ __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
+ (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS);
+ __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
+ (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS);
+ __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr +
+ (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS);
+
+ __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2);
+ __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4);
+ __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2);
+ __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4);
+
+ in1 = _mm256_unpacklo_epi64(tmp1, tmp2);
+ in2 = _mm256_unpacklo_epi64(tmp3, tmp4);
+ in3 = _mm256_unpackhi_epi64(tmp1, tmp2);
+ in4 = _mm256_unpackhi_epi64(tmp3, tmp4);
+
+ _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1));
+ _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3));
+ _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2));
+ _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4));
+ _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3,
+ 0x31));
+ _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4,
+ 0x31));
+ t1data += 32;
+ ptr += 8;
+ }
+ for (i = 0; i < cblk_w % 8; ++i) {
+ t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS;
+ t1data += 4;
+ ptr += 1;
+ }
+#else
for (i = 0; i < cblk_w; ++i) {
t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
@@ -2240,6 +2382,7 @@
t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
t1data += 4;
}
+#endif
}
if (j < cblk_h) {
for (i = 0; i < cblk_w; ++i) {
diff --git a/third_party/libopenjpeg/t2.c b/third_party/libopenjpeg/t2.c
index 657ded7..4e8cf60 100644
--- a/third_party/libopenjpeg/t2.c
+++ b/third_party/libopenjpeg/t2.c
@@ -1111,6 +1111,7 @@
/* SOP markers */
if (p_tcp->csty & J2K_CP_CSTY_SOP) {
+ /* SOP markers are allowed (i.e. optional), just warn */
if (p_max_length < 6) {
opj_event_msg(p_manager, EVT_WARNING,
"Not enough space for expected SOP marker\n");
@@ -1163,12 +1164,15 @@
/* EPH markers */
if (p_tcp->csty & J2K_CP_CSTY_EPH) {
+ /* EPH markers are required */
if ((*l_modified_length_ptr - (OPJ_UINT32)(l_header_data -
*l_header_data_start)) < 2U) {
- opj_event_msg(p_manager, EVT_WARNING,
- "Not enough space for expected EPH marker\n");
+ opj_event_msg(p_manager, EVT_ERROR,
+ "Not enough space for required EPH marker\n");
+ return OPJ_FALSE;
} else if ((*l_header_data) != 0xff || (*(l_header_data + 1) != 0x92)) {
- opj_event_msg(p_manager, EVT_WARNING, "Expected EPH marker\n");
+ opj_event_msg(p_manager, EVT_ERROR, "Expected EPH marker\n");
+ return OPJ_FALSE;
} else {
l_header_data += 2;
}
@@ -1340,12 +1344,15 @@
/* EPH markers */
if (p_tcp->csty & J2K_CP_CSTY_EPH) {
+ /* EPH markers are required */
if ((*l_modified_length_ptr - (OPJ_UINT32)(l_header_data -
*l_header_data_start)) < 2U) {
- opj_event_msg(p_manager, EVT_WARNING,
- "Not enough space for expected EPH marker\n");
+ opj_event_msg(p_manager, EVT_ERROR,
+ "Not enough space for required EPH marker\n");
+ return OPJ_FALSE;
} else if ((*l_header_data) != 0xff || (*(l_header_data + 1) != 0x92)) {
- opj_event_msg(p_manager, EVT_WARNING, "Expected EPH marker\n");
+ opj_event_msg(p_manager, EVT_ERROR, "Expected EPH marker\n");
+ return OPJ_FALSE;
} else {
l_header_data += 2;
}
@@ -1353,6 +1360,9 @@
l_header_length = (OPJ_UINT32)(l_header_data - *l_header_data_start);
JAS_FPRINTF(stderr, "hdrlen=%d \n", l_header_length);
+ if (!l_header_length) {
+ return OPJ_FALSE;
+ }
JAS_FPRINTF(stderr, "packet body\n");
*l_modified_length_ptr -= l_header_length;
*l_header_data_start += l_header_length;
diff --git a/third_party/libopenjpeg/tcd.c b/third_party/libopenjpeg/tcd.c
index 981d90c..0957b6e 100644
--- a/third_party/libopenjpeg/tcd.c
+++ b/third_party/libopenjpeg/tcd.c
@@ -243,7 +243,7 @@
/* ----------------------------------------------------------------------- */
/** Returns OPJ_TRUE if the layer allocation is unchanged w.r.t to the previous
- * invokation with a different threshold */
+ * invocation with a different threshold */
static
OPJ_BOOL opj_tcd_makelayer(opj_tcd_t *tcd,
OPJ_UINT32 layno,
@@ -2869,12 +2869,12 @@
return intersects;
}
-/** Returns whether a tile componenent is fully decoded, taking into account
+/** Returns whether a tile component is fully decoded, taking into account
* p_tcd->win_* members.
*
* @param p_tcd TCD handle.
* @param compno Component number
- * @return OPJ_TRUE whether the tile componenent is fully decoded
+ * @return OPJ_TRUE whether the tile component is fully decoded
*/
static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd,
OPJ_UINT32 compno)
diff --git a/third_party/libopenjpeg/tcd.h b/third_party/libopenjpeg/tcd.h
index 4ab2271..3371b08 100644
--- a/third_party/libopenjpeg/tcd.h
+++ b/third_party/libopenjpeg/tcd.h
@@ -313,7 +313,7 @@
/**
Dump the content of a tcd structure
*/
-/*void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t *img);*/ /* TODO MSD shoul use the new v2 structures */
+/*void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t *img);*/ /* TODO MSD should use the new v2 structures */
/**
Create a new TCD handle
@@ -444,7 +444,7 @@
OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd);
/**
- * Initialize the tile coder and may reuse some meory.
+ * Initialize the tile coder and may reuse some memory.
*
* @param p_tcd TCD handle.
* @param p_tile_no current tile index to encode.
@@ -492,7 +492,7 @@
* @param y0 Upper left y in subband coordinates
* @param x1 Lower right x in subband coordinates
* @param y1 Lower right y in subband coordinates
- * @return OPJ_TRUE whether the sub-band region contributs to the area of
+ * @return OPJ_TRUE whether the sub-band region contributes to the area of
* interest.
*/
OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd,