Upgrade OpenJPEG to 2.5.0.

- Apply changes and add new files from upstream.
- Delete obsolete patches and a patch hunk that no longer applies.

Change-Id: I9beb97d0ec21aafcdc89eef44e067cb5a298bacb
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/93773
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/third_party/BUILD.gn b/third_party/BUILD.gn
index 5ddfdd6..cb8c356 100644
--- a/third_party/BUILD.gn
+++ b/third_party/BUILD.gn
@@ -406,6 +406,7 @@
     "libopenjpeg20/dwt.c",
     "libopenjpeg20/event.c",
     "libopenjpeg20/function_list.c",
+    "libopenjpeg20/ht_dec.c",
     "libopenjpeg20/image.c",
     "libopenjpeg20/invert.c",
     "libopenjpeg20/j2k.c",
@@ -421,6 +422,7 @@
     "libopenjpeg20/sparse_array.c",
     "libopenjpeg20/sparse_array.h",
     "libopenjpeg20/t1.c",
+    "libopenjpeg20/t1_ht_luts.h",
     "libopenjpeg20/t2.c",
     "libopenjpeg20/tcd.c",
     "libopenjpeg20/tgt.c",
diff --git a/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch b/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch
index 0eb3654..90aabdb 100644
--- a/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch
+++ b/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch
@@ -32,19 +32,6 @@
      } else {
          p_j2k->m_specific_param.m_decoder.m_start_tile_x = 0;
          p_j2k->m_specific_param.m_decoder.m_start_tile_y = 0;
-@@ -7839,10 +7835,8 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
-             opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n");
-             return OPJ_FALSE;
-         }
--        cp->tw = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->x1 - cp->tx0),
--                                             (OPJ_INT32)cp->tdx);
--        cp->th = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->y1 - cp->ty0),
--                                             (OPJ_INT32)cp->tdy);
-+        cp->tw = opj_uint_ceildiv(image->x1 - cp->tx0, cp->tdx);
-+        cp->th = opj_uint_ceildiv(image->y1 - cp->ty0, cp->tdy);
-     } else {
-         cp->tdx = image->x1 - cp->tx0;
-         cp->tdy = image->y1 - cp->ty0;
 @@ -10035,10 +10029,8 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image,
              return OPJ_FALSE;
          }
diff --git a/third_party/libopenjpeg20/0040-dwt_overflows.patch b/third_party/libopenjpeg20/0040-dwt_overflows.patch
deleted file mode 100644
index 588cd3f..0000000
--- a/third_party/libopenjpeg20/0040-dwt_overflows.patch
+++ /dev/null
@@ -1,124 +0,0 @@
-commit badbd93af92836c7a38ef069d410a829e2575ce2
-Author: Even Rouault <even.rouault@spatialys.com>
-Date:   Fri Sep 3 15:17:56 2021 +0200
-
-    Avoid integer overflows in DWT. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=11700 and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=30646
-
-diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
-index 4164ba09..824a9fc4 100644
---- a/src/lib/openjp2/dwt.c
-+++ b/src/lib/openjp2/dwt.c
-@@ -385,7 +385,8 @@ static void  opj_idwt53_h_cas0(OPJ_INT32* tmp,
-         s0n = s1n - ((d1c + d1n + 2) >> 2);
- 
-         tmp[i  ] = s0c;
--        tmp[i + 1] = d1c + ((s0c + s0n) >> 1);
-+        tmp[i + 1] = opj_int_add_no_overflow(d1c, opj_int_add_no_overflow(s0c,
-+                                             s0n) >> 1);
-     }
- 
-     tmp[i] = s0n;
-@@ -450,7 +451,7 @@ static void  opj_idwt53_h_cas1(OPJ_INT32* tmp,
- 
-         dn = in_odd[j] - ((s1 + s2 + 2) >> 2);
-         tmp[i  ] = dc;
--        tmp[i + 1] = s1 + ((dn + dc) >> 1);
-+        tmp[i + 1] = opj_int_add_no_overflow(s1, opj_int_add_no_overflow(dn, dc) >> 1);
- 
-         dc = dn;
-         s1 = s2;
-@@ -796,7 +797,8 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
-         s1n = tiledp_col[(OPJ_SIZE_T)(j + 1) * stride];
-         d1n = tiledp_col[(OPJ_SIZE_T)(sn + j + 1) * stride];
- 
--        s0n = s1n - ((d1c + d1n + 2) >> 2);
-+        s0n = opj_int_sub_no_overflow(s1n,
-+                                      opj_int_add_no_overflow(opj_int_add_no_overflow(d1c, d1n), 2) >> 2);
- 
-         tmp[i  ] = s0c;
-         tmp[i + 1] = d1c + ((s0c + s0n) >> 1);
-@@ -2343,10 +2345,13 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
-             OPJ_S(0) /= 2;
-         } else {
-             for (i = win_l_x0; i < win_l_x1; i++) {
--                OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
-+                OPJ_D(i) = opj_int_sub_no_overflow(OPJ_D(i),
-+                                                   opj_int_add_no_overflow(opj_int_add_no_overflow(OPJ_SS_(i), OPJ_SS_(i + 1)),
-+                                                           2) >> 2);
-             }
-             for (i = win_h_x0; i < win_h_x1; i++) {
--                OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
-+                OPJ_S(i) = opj_int_add_no_overflow(OPJ_S(i),
-+                                                   opj_int_add_no_overflow(OPJ_DD_(i), OPJ_DD_(i - 1)) >> 1);
-             }
-         }
-     }
-@@ -2484,12 +2489,17 @@ static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a,
-         } else {
-             for (i = win_l_x0; i < win_l_x1; i++) {
-                 for (off = 0; off < 4; off++) {
--                    OPJ_D_off(i, off) -= (OPJ_SS__off(i, off) + OPJ_SS__off(i + 1, off) + 2) >> 2;
-+                    OPJ_D_off(i, off) = opj_int_sub_no_overflow(
-+                                            OPJ_D_off(i, off),
-+                                            opj_int_add_no_overflow(
-+                                                opj_int_add_no_overflow(OPJ_SS__off(i, off), OPJ_SS__off(i + 1, off)), 2) >> 2);
-                 }
-             }
-             for (i = win_h_x0; i < win_h_x1; i++) {
-                 for (off = 0; off < 4; off++) {
--                    OPJ_S_off(i, off) += (OPJ_DD__off(i, off) + OPJ_DD__off(i - 1, off)) >> 1;
-+                    OPJ_S_off(i, off) = opj_int_add_no_overflow(
-+                                            OPJ_S_off(i, off),
-+                                            opj_int_add_no_overflow(OPJ_DD__off(i, off), OPJ_DD__off(i - 1, off)) >> 1);
-                 }
-             }
-         }
-diff --git a/src/lib/openjp2/opj_intmath.h b/src/lib/openjp2/opj_intmath.h
-index afe69d90..f8cc6139 100644
---- a/src/lib/openjp2/opj_intmath.h
-+++ b/src/lib/openjp2/opj_intmath.h
-@@ -276,6 +276,44 @@ static INLINE OPJ_INT32 opj_int_fix_mul_t1(OPJ_INT32 a, OPJ_INT32 b)
-     return (OPJ_INT32)(temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) ;
- }
- 
-+/**
-+Addtion two signed integers with a wrap-around behaviour.
-+Assumes complement-to-two signed integers.
-+@param a
-+@param b
-+@return Returns a + b
-+*/
-+static INLINE OPJ_INT32 opj_int_add_no_overflow(OPJ_INT32 a, OPJ_INT32 b)
-+{
-+    void* pa = &a;
-+    void* pb = &b;
-+    OPJ_UINT32* upa = (OPJ_UINT32*)pa;
-+    OPJ_UINT32* upb = (OPJ_UINT32*)pb;
-+    OPJ_UINT32 ures = *upa + *upb;
-+    void* pures = &ures;
-+    OPJ_INT32* ipres = (OPJ_INT32*)pures;
-+    return *ipres;
-+}
-+
-+/**
-+Subtract two signed integers with a wrap-around behaviour.
-+Assumes complement-to-two signed integers.
-+@param a
-+@param b
-+@return Returns a - b
-+*/
-+static INLINE OPJ_INT32 opj_int_sub_no_overflow(OPJ_INT32 a, OPJ_INT32 b)
-+{
-+    void* pa = &a;
-+    void* pb = &b;
-+    OPJ_UINT32* upa = (OPJ_UINT32*)pa;
-+    OPJ_UINT32* upb = (OPJ_UINT32*)pb;
-+    OPJ_UINT32 ures = *upa - *upb;
-+    void* pures = &ures;
-+    OPJ_INT32* ipres = (OPJ_INT32*)pures;
-+    return *ipres;
-+}
-+
- /* ----------------------------------------------------------------------- */
- /*@}*/
- 
diff --git a/third_party/libopenjpeg20/0042-dwt_overflows.patch b/third_party/libopenjpeg20/0042-dwt_overflows.patch
deleted file mode 100644
index 9ad27ae..0000000
--- a/third_party/libopenjpeg20/0042-dwt_overflows.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-commit 1462e9403fb7d1186e999701dfe72980262a089c
-Author: Even Rouault <even.rouault@spatialys.com>
-Date:   Thu Feb 10 14:30:13 2022 +0100
-
-    Avoid integer overflows in DWT. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=44544
-
-diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
-index 2b9b9e92..abc500ec 100644
---- a/src/lib/openjp2/dwt.c
-+++ b/src/lib/openjp2/dwt.c
-@@ -801,7 +801,8 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
-                                       opj_int_add_no_overflow(opj_int_add_no_overflow(d1c, d1n), 2) >> 2);
- 
-         tmp[i  ] = s0c;
--        tmp[i + 1] = d1c + ((s0c + s0n) >> 1);
-+        tmp[i + 1] = opj_int_add_no_overflow(d1c, opj_int_add_no_overflow(s0c,
-+                                             s0n) >> 1);
-     }
- 
-     tmp[i] = s0n;
diff --git a/third_party/libopenjpeg20/README.pdfium b/third_party/libopenjpeg20/README.pdfium
index d6127b0..43692e1 100644
--- a/third_party/libopenjpeg20/README.pdfium
+++ b/third_party/libopenjpeg20/README.pdfium
@@ -1,9 +1,9 @@
 Name: OpenJPEG
 URL: http://www.openjpeg.org/
-Version: 2.4.0 (also update in opj_config*)
+Version: 2.5.0 (also update in opj_config*)
 Security Critical: yes
 License: 2-clause BSD
-CPEPrefix: cpe:/a:uclouvain:openjpeg:2.4.0
+CPEPrefix: cpe:/a:uclouvain:openjpeg:2.5.0
 
 Description:
 JPEG 2000 library.
@@ -28,6 +28,4 @@
 0034-opj_malloc.patch: PDFium changes in opj_malloc.
 0035-opj_image_data_free.patch: Use the right free function in opj_jp2_apply_pclr.
 0039-opj_mqc_renorme.patch: Remove unused opj_mqc_renorme().
-0040-dwt_overflows.patch: Avoid integer overflows in DWT.
 0041-remove_opj_clock.patch: Remove unused opj_clock.h include.
-0042-dwt_overflows.patch: Avoid integer overflows in DWT.
diff --git a/third_party/libopenjpeg20/cio.h b/third_party/libopenjpeg20/cio.h
index 6996a9a..7caee30 100644
--- a/third_party/libopenjpeg20/cio.h
+++ b/third_party/libopenjpeg20/cio.h
@@ -118,7 +118,7 @@
     opj_stream_seek_fn      m_seek_fn;
 
     /**
-     * Actual data stored into the stream if readed from. Data is read by chunk of fixed size.
+     * Actual data stored into the stream if read from. Data is read by chunk of fixed size.
      * you should never access this data directly.
      */
     OPJ_BYTE *                  m_stored_data;
diff --git a/third_party/libopenjpeg20/dwt.c b/third_party/libopenjpeg20/dwt.c
index 063a689..0184aa1 100644
--- a/third_party/libopenjpeg20/dwt.c
+++ b/third_party/libopenjpeg20/dwt.c
@@ -518,7 +518,7 @@
 
 #if (defined(__SSE2__) || defined(__AVX2__)) && !defined(STANDARD_SLOW_VERSION)
 
-/* Conveniency macros to improve the readabilty of the formulas */
+/* Conveniency macros to improve the readability of the formulas */
 #if __AVX2__
 #define VREG        __m256i
 #define LOAD_CST(x) _mm256_set1_epi32(x)
diff --git a/third_party/libopenjpeg20/ht_dec.c b/third_party/libopenjpeg20/ht_dec.c
new file mode 100644
index 0000000..1eb4d52
--- /dev/null
+++ b/third_party/libopenjpeg20/ht_dec.c
@@ -0,0 +1,2640 @@
+//***************************************************************************/
+// This software is released under the 2-Clause BSD license, included
+// below.
+//
+// Copyright (c) 2021, Aous Naman
+// Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
+// Copyright (c) 2021, The University of New South Wales, Australia
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//***************************************************************************/
+// This file is part of the OpenJpeg software implementation.
+// File: ht_dec.c
+// Author: Aous Naman
+// Date: 01 September 2021
+//***************************************************************************/
+
+//***************************************************************************/
+/** @file ht_dec.c
+ *  @brief implements HTJ2K block decoder
+ */
+
+#include <assert.h>
+#include <string.h>
+#include "opj_includes.h"
+
+#include "t1_ht_luts.h"
+
+/////////////////////////////////////////////////////////////////////////////
+// compiler detection
+/////////////////////////////////////////////////////////////////////////////
+#ifdef _MSC_VER
+#define OPJ_COMPILER_MSVC
+#elif (defined __GNUC__)
+#define OPJ_COMPILER_GNUC
+#endif
+
+//************************************************************************/
+/** @brief Displays the error message for disabling the decoding of SPP and
+  * MRP passes
+  */
+static OPJ_BOOL only_cleanup_pass_is_decoded = OPJ_FALSE;
+
+//************************************************************************/
+/** @brief Generates population count (i.e., the number of set bits)
+  *
+  *   @param [in]  val is the value for which population count is sought
+  */
+static INLINE
+OPJ_UINT32 population_count(OPJ_UINT32 val)
+{
+#ifdef OPJ_COMPILER_MSVC
+    return (OPJ_UINT32)__popcnt(val);
+#elif (defined OPJ_COMPILER_GNUC)
+    return (OPJ_UINT32)__builtin_popcount(val);
+#else
+    val -= ((val >> 1) & 0x55555555);
+    val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
+    val = (((val >> 4) + val) & 0x0f0f0f0f);
+    val += (val >> 8);
+    val += (val >> 16);
+    return (OPJ_UINT32)(val & 0x0000003f);
+#endif
+}
+
+//************************************************************************/
+/** @brief Counts the number of leading zeros
+  *
+  *   @param [in]  val is the value for which leading zero count is sought
+  */
+#ifdef OPJ_COMPILER_MSVC
+#pragma intrinsic(_BitScanReverse)
+#endif
+static INLINE
+OPJ_UINT32 count_leading_zeros(OPJ_UINT32 val)
+{
+#ifdef OPJ_COMPILER_MSVC
+    unsigned long result = 0;
+    _BitScanReverse(&result, val);
+    return 31U ^ (OPJ_UINT32)result;
+#elif (defined OPJ_COMPILER_GNUC)
+    return (OPJ_UINT32)__builtin_clz(val);
+#else
+    val |= (val >> 1);
+    val |= (val >> 2);
+    val |= (val >> 4);
+    val |= (val >> 8);
+    val |= (val >> 16);
+    return 32U - population_count(val);
+#endif
+}
+
+//************************************************************************/
+/** @brief Read a little-endian serialized UINT32.
+  *
+  *   @param [in]  dataIn pointer to byte stream to read from
+  */
+static INLINE OPJ_UINT32 read_le_uint32(const void* dataIn)
+{
+#if defined(OPJ_BIG_ENDIAN)
+    const OPJ_UINT8* data = (const OPJ_UINT8*)dataIn;
+    return ((OPJ_UINT32)data[0]) | (OPJ_UINT32)(data[1] << 8) | (OPJ_UINT32)(
+               data[2] << 16) | (((
+                                      OPJ_UINT32)data[3]) <<
+                                 24U);
+#else
+    return *(OPJ_UINT32*)dataIn;
+#endif
+}
+
+//************************************************************************/
+/** @brief MEL state structure for reading and decoding the MEL bitstream
+  *
+  *  A number of events is decoded from the MEL bitstream ahead of time
+  *  and stored in run/num_runs.
+  *  Each run represents the number of zero events before a one event.
+  */
+typedef struct dec_mel {
+    // data decoding machinery
+    OPJ_UINT8* data;  //!<the address of data (or bitstream)
+    OPJ_UINT64 tmp;   //!<temporary buffer for read data
+    int bits;         //!<number of bits stored in tmp
+    int size;         //!<number of bytes in MEL code
+    OPJ_BOOL unstuff; //!<true if the next bit needs to be unstuffed
+    int k;            //!<state of MEL decoder
+
+    // queue of decoded runs
+    int num_runs;    //!<number of decoded runs left in runs (maximum 8)
+    OPJ_UINT64 runs; //!<runs of decoded MEL codewords (7 bits/run)
+} dec_mel_t;
+
+//************************************************************************/
+/** @brief Reads and unstuffs the MEL bitstream
+  *
+  *  This design needs more bytes in the codeblock buffer than the length
+  *  of the cleanup pass by up to 2 bytes.
+  *
+  *  Unstuffing removes the MSB of the byte following a byte whose
+  *  value is 0xFF; this prevents sequences larger than 0xFF7F in value
+  *  from appearing the bitstream.
+  *
+  *  @param [in]  melp is a pointer to dec_mel_t structure
+  */
+static INLINE
+void mel_read(dec_mel_t *melp)
+{
+    OPJ_UINT32 val;
+    int bits;
+    OPJ_UINT32 t;
+    OPJ_BOOL unstuff;
+
+    if (melp->bits > 32) { //there are enough bits in the tmp variable
+        return;    // return without reading new data
+    }
+
+    val = 0xFFFFFFFF;      // feed in 0xFF if buffer is exhausted
+    if (melp->size > 4) {  // if there is more than 4 bytes the MEL segment
+        val = read_le_uint32(melp->data);  // read 32 bits from MEL data
+        melp->data += 4;           // advance pointer
+        melp->size -= 4;           // reduce counter
+    } else if (melp->size > 0) { // 4 or less
+        OPJ_UINT32 m, v;
+        int i = 0;
+        while (melp->size > 1) {
+            OPJ_UINT32 v = *melp->data++; // read one byte at a time
+            OPJ_UINT32 m = ~(0xFFu << i); // mask of location
+            val = (val & m) | (v << i);   // put byte in its correct location
+            --melp->size;
+            i += 8;
+        }
+        // size equal to 1
+        v = *melp->data++;  // the one before the last is different
+        v |= 0xF;                         // MEL and VLC segments can overlap
+        m = ~(0xFFu << i);
+        val = (val & m) | (v << i);
+        --melp->size;
+    }
+
+    // next we unstuff them before adding them to the buffer
+    bits = 32 - melp->unstuff;      // number of bits in val, subtract 1 if
+    // the previously read byte requires
+    // unstuffing
+
+    // data is unstuffed and accumulated in t
+    // bits has the number of bits in t
+    t = val & 0xFF;
+    unstuff = ((val & 0xFF) == 0xFF); // true if the byte needs unstuffing
+    bits -= unstuff; // there is one less bit in t if unstuffing is needed
+    t = t << (8 - unstuff); // move up to make room for the next byte
+
+    //this is a repeat of the above
+    t |= (val >> 8) & 0xFF;
+    unstuff = (((val >> 8) & 0xFF) == 0xFF);
+    bits -= unstuff;
+    t = t << (8 - unstuff);
+
+    t |= (val >> 16) & 0xFF;
+    unstuff = (((val >> 16) & 0xFF) == 0xFF);
+    bits -= unstuff;
+    t = t << (8 - unstuff);
+
+    t |= (val >> 24) & 0xFF;
+    melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
+
+    // move t to tmp, and push the result all the way up, so we read from
+    // the MSB
+    melp->tmp |= ((OPJ_UINT64)t) << (64 - bits - melp->bits);
+    melp->bits += bits; //increment the number of bits in tmp
+}
+
+//************************************************************************/
+/** @brief Decodes unstuffed MEL segment bits stored in tmp to runs
+  *
+  *  Runs are stored in "runs" and the number of runs in "num_runs".
+  *  Each run represents a number of zero events that may or may not
+  *  terminate in a 1 event.
+  *  Each run is stored in 7 bits.  The LSB is 1 if the run terminates in
+  *  a 1 event, 0 otherwise.  The next 6 bits, for the case terminating
+  *  with 1, contain the number of consecutive 0 zero events * 2; for the
+  *  case terminating with 0, they store (number of consecutive 0 zero
+  *  events - 1) * 2.
+  *  A total of 6 bits (made up of 1 + 5) should have been enough.
+  *
+  *  @param [in]  melp is a pointer to dec_mel_t structure
+  */
+static INLINE
+void mel_decode(dec_mel_t *melp)
+{
+    static const int mel_exp[13] = { //MEL exponents
+        0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
+    };
+
+    if (melp->bits < 6) { // if there are less than 6 bits in tmp
+        mel_read(melp);    // then read from the MEL bitstream
+    }
+    // 6 bits is the largest decodable MEL cwd
+
+    //repeat so long that there is enough decodable bits in tmp,
+    // and the runs store is not full (num_runs < 8)
+    while (melp->bits >= 6 && melp->num_runs < 8) {
+        int eval = mel_exp[melp->k]; // number of bits associated with state
+        int run = 0;
+        if (melp->tmp & (1ull << 63)) { //The next bit to decode (stored in MSB)
+            //one is found
+            run = 1 << eval;
+            run--; // consecutive runs of 0 events - 1
+            melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
+            melp->tmp <<= 1; // consume one bit from tmp
+            melp->bits -= 1;
+            run = run << 1; // a stretch of zeros not terminating in one
+        } else {
+            //0 is found
+            run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
+            melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
+            melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
+            melp->bits -= eval + 1;
+            run = (run << 1) + 1; // a stretch of zeros terminating with one
+        }
+        eval = melp->num_runs * 7;                 // 7 bits per run
+        melp->runs &= ~((OPJ_UINT64)0x3F << eval); // 6 bits are sufficient
+        melp->runs |= ((OPJ_UINT64)run) << eval;   // store the value in runs
+        melp->num_runs++;                          // increment count
+    }
+}
+
+//************************************************************************/
+/** @brief Initiates a dec_mel_t structure for MEL decoding and reads
+  *         some bytes in order to get the read address to a multiple
+  *         of 4
+  *
+  *  @param [in]  melp is a pointer to dec_mel_t structure
+  *  @param [in]  bbuf is a pointer to byte buffer
+  *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
+  *  @param [in]  scup is the length of MEL+VLC segments
+  */
+static INLINE
+void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
+{
+    int num;
+    int i;
+
+    melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
+    melp->bits = 0;                  // 0 bits in tmp
+    melp->tmp = 0;                   //
+    melp->unstuff = OPJ_FALSE;       // no unstuffing
+    melp->size = scup - 1;           // size is the length of MEL+VLC-1
+    melp->k = 0;                     // 0 for state
+    melp->num_runs = 0;              // num_runs is 0
+    melp->runs = 0;                  //
+
+    //This code is borrowed; original is for a different architecture
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MEL segment
+    num = 4 - (int)((intptr_t)(melp->data) & 0x3);
+    for (i = 0; i < num; ++i) { // this code is similar to mel_read
+        OPJ_UINT64 d;
+        int d_bits;
+
+        assert(melp->unstuff == OPJ_FALSE || melp->data[0] <= 0x8F);
+        d = (melp->size > 0) ? *melp->data : 0xFF; // if buffer is consumed
+        // set data to 0xFF
+        if (melp->size == 1) {
+            d |= 0xF;    //if this is MEL+VLC-1, set LSBs to 0xF
+        }
+        // see the standard
+        melp->data += melp->size-- > 0; //increment if the end is not reached
+        d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
+        melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
+        melp->bits += d_bits;  //increment tmp by number of bits
+        melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs
+        //unstuffing
+    }
+    melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
+    // is the MSB
+}
+
+//************************************************************************/
+/** @brief Retrieves one run from dec_mel_t; if there are no runs stored
+  *         MEL segment is decoded
+  *
+  * @param [in]  melp is a pointer to dec_mel_t structure
+  */
+static INLINE
+int mel_get_run(dec_mel_t *melp)
+{
+    int t;
+    if (melp->num_runs == 0) { //if no runs, decode more bit from MEL segment
+        mel_decode(melp);
+    }
+
+    t = melp->runs & 0x7F; //retrieve one run
+    melp->runs >>= 7;  // remove the retrieved run
+    melp->num_runs--;
+    return t; // return run
+}
+
+//************************************************************************/
+/** @brief A structure for reading and unstuffing a segment that grows
+  *         backward, such as VLC and MRP
+  */
+typedef struct rev_struct {
+    //storage
+    OPJ_UINT8* data;  //!<pointer to where to read data
+    OPJ_UINT64 tmp;     //!<temporary buffer of read data
+    OPJ_UINT32 bits;  //!<number of bits stored in tmp
+    int size;         //!<number of bytes left
+    OPJ_BOOL unstuff; //!<true if the last byte is more than 0x8F
+    //!<then the current byte is unstuffed if it is 0x7F
+} rev_struct_t;
+
+//************************************************************************/
+/** @brief Read and unstuff data from a backwardly-growing segment
+  *
+  *  This reader can read up to 8 bytes from before the VLC segment.
+  *  Care must be taken not read from unreadable memory, causing a
+  *  segmentation fault.
+  *
+  *  Note that there is another subroutine rev_read_mrp that is slightly
+  *  different.  The other one fills zeros when the buffer is exhausted.
+  *  This one basically does not care if the bytes are consumed, because
+  *  any extra data should not be used in the actual decoding.
+  *
+  *  Unstuffing is needed to prevent sequences more than 0xFF8F from
+  *  appearing in the bits stream; since we are reading backward, we keep
+  *  watch when a value larger than 0x8F appears in the bitstream.
+  *  If the byte following this is 0x7F, we unstuff this byte (ignore the
+  *  MSB of that byte, which should be 0).
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct_t structure
+  */
+static INLINE
+void rev_read(rev_struct_t *vlcp)
+{
+    OPJ_UINT32 val;
+    OPJ_UINT32 tmp;
+    OPJ_UINT32 bits;
+    OPJ_BOOL unstuff;
+
+    //process 4 bytes at a time
+    if (vlcp->bits > 32) { // if there are more than 32 bits in tmp, then
+        return;    // reading 32 bits can overflow vlcp->tmp
+    }
+    val = 0;
+    //the next line (the if statement) needs to be tested first
+    if (vlcp->size > 3) { // if there are more than 3 bytes left in VLC
+        // (vlcp->data - 3) move pointer back to read 32 bits at once
+        val = read_le_uint32(vlcp->data - 3); // then read 32 bits
+        vlcp->data -= 4;                // move data pointer back by 4
+        vlcp->size -= 4;                // reduce available byte by 4
+    } else if (vlcp->size > 0) { // 4 or less
+        int i = 24;
+        while (vlcp->size > 0) {
+            OPJ_UINT32 v = *vlcp->data--; // read one byte at a time
+            val |= (v << i);              // put byte in its correct location
+            --vlcp->size;
+            i -= 8;
+        }
+    }
+
+    //accumulate in tmp, number of bits in tmp are stored in bits
+    tmp = val >> 24;  //start with the MSB byte
+
+    // test unstuff (previous byte is >0x8F), and this byte is 0x7F
+    bits = 8u - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val >> 24) > 0x8F; //this is for the next byte
+
+    tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
+    bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 16) & 0xFF) > 0x8F;
+
+    tmp |= ((val >> 8) & 0xFF) << bits;
+    bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 8) & 0xFF) > 0x8F;
+
+    tmp |= (val & 0xFF) << bits;
+    bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val & 0xFF) > 0x8F;
+
+    // now move the read and unstuffed bits into vlcp->tmp
+    vlcp->tmp |= (OPJ_UINT64)tmp << vlcp->bits;
+    vlcp->bits += bits;
+    vlcp->unstuff = unstuff; // this for the next read
+}
+
+//************************************************************************/
+/** @brief Initiates the rev_struct_t structure and reads a few bytes to
+  *         move the read address to multiple of 4
+  *
+  *  There is another similar rev_init_mrp subroutine.  The difference is
+  *  that this one, rev_init, discards the first 12 bits (they have the
+  *  sum of the lengths of VLC and MEL segments), and first unstuff depends
+  *  on first 4 bits.
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct_t structure
+  *  @param [in]  data is a pointer to byte at the start of the cleanup pass
+  *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
+  *  @param [in]  scup is the length of MEL+VLC segments
+  */
+static INLINE
+void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
+{
+    OPJ_UINT32 d;
+    int num, tnum, i;
+
+    //first byte has only the upper 4 bits
+    vlcp->data = data + lcup - 2;
+
+    //size can not be larger than this, in fact it should be smaller
+    vlcp->size = scup - 2;
+
+    d = *vlcp->data--;            // read one byte (this is a half byte)
+    vlcp->tmp = d >> 4;           // both initialize and set
+    vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
+    vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
+
+    //This code is designed for an architecture that read address should
+    // align to the read size (address multiple of 4 if read size is 4)
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream.
+    // To read 32 bits, read from (vlcp->data - 3)
+    num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
+    tnum = num < vlcp->size ? num : vlcp->size;
+    for (i = 0; i < tnum; ++i) {
+        OPJ_UINT64 d;
+        OPJ_UINT32 d_bits;
+        d = *vlcp->data--;  // read one byte and move read pointer
+        //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
+        d_bits = 8u - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
+        vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
+        vlcp->bits += d_bits;
+        vlcp->unstuff = d > 0x8F; // for next byte
+    }
+    vlcp->size -= tnum;
+    rev_read(vlcp);  // read another 32 buts
+}
+
+//************************************************************************/
+/** @brief Retrieves 32 bits from the head of a rev_struct structure
+  *
+  *  By the end of this call, vlcp->tmp must have no less than 33 bits
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct structure
+  */
+static INLINE
+OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
+{
+    if (vlcp->bits < 32) { // if there are less then 32 bits, read more
+        rev_read(vlcp);     // read 32 bits, but unstuffing might reduce this
+        if (vlcp->bits < 32) { // if there is still space in vlcp->tmp for 32 bits
+            rev_read(vlcp);    // read another 32
+        }
+    }
+    return (OPJ_UINT32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
+}
+
+//************************************************************************/
+/** @brief Consumes num_bits from a rev_struct structure
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct structure
+  *  @param [in]  num_bits is the number of bits to be removed
+  */
+static INLINE
+OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
+{
+    assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
+    vlcp->tmp >>= num_bits;         // remove bits
+    vlcp->bits -= num_bits;         // decrement the number of bits
+    return (OPJ_UINT32)vlcp->tmp;
+}
+
+//************************************************************************/
+/** @brief Reads and unstuffs from rev_struct
+  *
+  *  This is different than rev_read in that this fills in zeros when the
+  *  the available data is consumed.  The other does not care about the
+  *  values when all data is consumed.
+  *
+  *  See rev_read for more information about unstuffing
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  */
+static INLINE
+void rev_read_mrp(rev_struct_t *mrp)
+{
+    OPJ_UINT32 val;
+    OPJ_UINT32 tmp;
+    OPJ_UINT32 bits;
+    OPJ_BOOL unstuff;
+
+    //process 4 bytes at a time
+    if (mrp->bits > 32) {
+        return;
+    }
+    val = 0;
+    if (mrp->size > 3) { // If there are 3 byte or more
+        // (mrp->data - 3) move pointer back to read 32 bits at once
+        val = read_le_uint32(mrp->data - 3); // read 32 bits
+        mrp->data -= 4;                      // move back pointer
+        mrp->size -= 4;                      // reduce count
+    } else if (mrp->size > 0) {
+        int i = 24;
+        while (mrp->size > 0) {
+            OPJ_UINT32 v = *mrp->data--; // read one byte at a time
+            val |= (v << i);             // put byte in its correct location
+            --mrp->size;
+            i -= 8;
+        }
+    }
+
+
+    //accumulate in tmp, and keep count in bits
+    tmp = val >> 24;
+
+    //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
+    bits = 8u - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val >> 24) > 0x8F;
+
+    //process the next byte
+    tmp |= ((val >> 16) & 0xFF) << bits;
+    bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 16) & 0xFF) > 0x8F;
+
+    tmp |= ((val >> 8) & 0xFF) << bits;
+    bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 8) & 0xFF) > 0x8F;
+
+    tmp |= (val & 0xFF) << bits;
+    bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val & 0xFF) > 0x8F;
+
+    mrp->tmp |= (OPJ_UINT64)tmp << mrp->bits; // move data to mrp pointer
+    mrp->bits += bits;
+    mrp->unstuff = unstuff;                   // next byte
+}
+
+//************************************************************************/
+/** @brief Initialized rev_struct structure for MRP segment, and reads
+  *         a number of bytes such that the next 32 bits read are from
+  *         an address that is a multiple of 4. Note this is designed for
+  *         an architecture that read size must be compatible with the
+  *         alignment of the read address
+  *
+  *  There is another similar subroutine rev_init.  This subroutine does
+  *  NOT skip the first 12 bits, and starts with unstuff set to true.
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  *  @param [in]  data is a pointer to byte at the start of the cleanup pass
+  *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
+  *  @param [in]  len2 is the length of SPP+MRP segments
+  */
+static INLINE
+void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup, int len2)
+{
+    int num, i;
+
+    mrp->data = data + lcup + len2 - 1;
+    mrp->size = len2;
+    mrp->unstuff = OPJ_TRUE;
+    mrp->bits = 0;
+    mrp->tmp = 0;
+
+    //This code is designed for an architecture that read address should
+    // align to the read size (address multiple of 4 if read size is 4)
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MRP stream
+    num = 1 + (int)((intptr_t)(mrp->data) & 0x3);
+    for (i = 0; i < num; ++i) {
+        OPJ_UINT64 d;
+        OPJ_UINT32 d_bits;
+
+        //read a byte, 0 if no more data
+        d = (mrp->size-- > 0) ? *mrp->data-- : 0;
+        //check if unstuffing is needed
+        d_bits = 8u - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
+        mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
+        mrp->bits += d_bits;
+        mrp->unstuff = d > 0x8F; // for next byte
+    }
+    rev_read_mrp(mrp);
+}
+
+//************************************************************************/
+/** @brief Retrieves 32 bits from the head of a rev_struct structure
+  *
+  *  By the end of this call, mrp->tmp must have no less than 33 bits
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  */
+static INLINE
+OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
+{
+    if (mrp->bits < 32) { // if there are less than 32 bits in mrp->tmp
+        rev_read_mrp(mrp);    // read 30-32 bits from mrp
+        if (mrp->bits < 32) { // if there is a space of 32 bits
+            rev_read_mrp(mrp);    // read more
+        }
+    }
+    return (OPJ_UINT32)mrp->tmp;  // return the head of mrp->tmp
+}
+
+//************************************************************************/
+/** @brief Consumes num_bits from a rev_struct structure
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  *  @param [in]  num_bits is the number of bits to be removed
+  */
+static INLINE
+OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
+{
+    assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
+    mrp->tmp >>= num_bits;         // discard the lowest num_bits bits
+    mrp->bits -= num_bits;
+    return (OPJ_UINT32)mrp->tmp;   // return data after consumption
+}
+
+//************************************************************************/
+/** @brief Decode initial UVLC to get the u value (or u_q)
+  *
+  *  @param [in]  vlc is the head of the VLC bitstream
+  *  @param [in]  mode is 0, 1, 2, 3, or 4. Values in 0 to 3 are composed of
+  *               u_off of 1st quad and 2nd quad of a quad pair.  The value
+  *               4 occurs when both bits are 1, and the event decoded
+  *               from MEL bitstream is also 1.
+  *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
+  *               this value is a partial calculation of u + kappa.
+  */
+static INLINE
+OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, OPJ_UINT32 *u)
+{
+    //table stores possible decoding three bits from vlc
+    // there are 8 entries for xx1, x10, 100, 000, where x means do not care
+    // table value is made up of
+    // 2 bits in the LSB for prefix length
+    // 3 bits for suffix length
+    // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
+    static const OPJ_UINT8 dec[8] = { // the index is the prefix codeword
+        3 | (5 << 2) | (5 << 5),        //000 == 000, prefix codeword "000"
+        1 | (0 << 2) | (1 << 5),        //001 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5),        //010 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5),        //011 == xx1, prefix codeword "1"
+        3 | (1 << 2) | (3 << 5),        //100 == 100, prefix codeword "001"
+        1 | (0 << 2) | (1 << 5),        //101 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5),        //110 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5)         //111 == xx1, prefix codeword "1"
+    };
+
+    OPJ_UINT32 consumed_bits = 0;
+    if (mode == 0) { // both u_off are 0
+        u[0] = u[1] = 1; //Kappa is 1 for initial line
+    } else if (mode <= 2) { // u_off are either 01 or 10
+        OPJ_UINT32 d;
+        OPJ_UINT32 suffix_len;
+
+        d = dec[vlc & 0x7];   //look at the least significant 3 bits
+        vlc >>= d & 0x3;                 //prefix length
+        consumed_bits += d & 0x3;
+
+        suffix_len = ((d >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = (mode == 1) ? d + 1 : 1; // kappa is 1 for initial line
+        u[1] = (mode == 1) ? 1 : d + 1; // kappa is 1 for initial line
+    } else if (mode == 3) { // both u_off are 1, and MEL event is 0
+        OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d1 & 0x3;                // Consume bits
+        consumed_bits += d1 & 0x3;
+
+        if ((d1 & 0x3) > 2) {
+            OPJ_UINT32 suffix_len;
+
+            //u_{q_2} prefix
+            u[1] = (vlc & 1) + 1 + 1; //Kappa is 1 for initial line
+            ++consumed_bits;
+            vlc >>= 1;
+
+            suffix_len = ((d1 >> 2) & 0x7);
+            consumed_bits += suffix_len;
+            d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+            u[0] = d1 + 1; //Kappa is 1 for initial line
+        } else {
+            OPJ_UINT32 d2;
+            OPJ_UINT32 suffix_len;
+
+            d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+            vlc >>= d2 & 0x3;                // Consume bits
+            consumed_bits += d2 & 0x3;
+
+            suffix_len = ((d1 >> 2) & 0x7);
+            consumed_bits += suffix_len;
+
+            d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+            u[0] = d1 + 1; //Kappa is 1 for initial line
+            vlc >>= suffix_len;
+
+            suffix_len = ((d2 >> 2) & 0x7);
+            consumed_bits += suffix_len;
+
+            d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+            u[1] = d2 + 1; //Kappa is 1 for initial line
+        }
+    } else if (mode == 4) { // both u_off are 1, and MEL event is 1
+        OPJ_UINT32 d1;
+        OPJ_UINT32 d2;
+        OPJ_UINT32 suffix_len;
+
+        d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d1 & 0x3;                // Consume bits
+        consumed_bits += d1 & 0x3;
+
+        d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d2 & 0x3;                // Consume bits
+        consumed_bits += d2 & 0x3;
+
+        suffix_len = ((d1 >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = d1 + 3; // add 2+kappa
+        vlc >>= suffix_len;
+
+        suffix_len = ((d2 >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[1] = d2 + 3; // add 2+kappa
+    }
+    return consumed_bits;
+}
+
+//************************************************************************/
+/** @brief Decode non-initial UVLC to get the u value (or u_q)
+  *
+  *  @param [in]  vlc is the head of the VLC bitstream
+  *  @param [in]  mode is 0, 1, 2, or 3. The 1st bit is u_off of 1st quad
+  *               and 2nd for 2nd quad of a quad pair
+  *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
+  *               this value is a partial calculation of u + kappa.
+  */
+static INLINE
+OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, OPJ_UINT32 *u)
+{
+    //table stores possible decoding three bits from vlc
+    // there are 8 entries for xx1, x10, 100, 000, where x means do not care
+    // table value is made up of
+    // 2 bits in the LSB for prefix length
+    // 3 bits for suffix length
+    // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
+    static const OPJ_UINT8 dec[8] = {
+        3 | (5 << 2) | (5 << 5), //000 == 000, prefix codeword "000"
+        1 | (0 << 2) | (1 << 5), //001 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5), //010 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5), //011 == xx1, prefix codeword "1"
+        3 | (1 << 2) | (3 << 5), //100 == 100, prefix codeword "001"
+        1 | (0 << 2) | (1 << 5), //101 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5), //110 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5)  //111 == xx1, prefix codeword "1"
+    };
+
+    OPJ_UINT32 consumed_bits = 0;
+    if (mode == 0) {
+        u[0] = u[1] = 1; //for kappa
+    } else if (mode <= 2) { //u_off are either 01 or 10
+        OPJ_UINT32 d;
+        OPJ_UINT32 suffix_len;
+
+        d = dec[vlc & 0x7];  //look at the least significant 3 bits
+        vlc >>= d & 0x3;                //prefix length
+        consumed_bits += d & 0x3;
+
+        suffix_len = ((d >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = (mode == 1) ? d + 1 : 1; //for kappa
+        u[1] = (mode == 1) ? 1 : d + 1; //for kappa
+    } else if (mode == 3) { // both u_off are 1
+        OPJ_UINT32 d1;
+        OPJ_UINT32 d2;
+        OPJ_UINT32 suffix_len;
+
+        d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d1 & 0x3;                // Consume bits
+        consumed_bits += d1 & 0x3;
+
+        d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d2 & 0x3;                // Consume bits
+        consumed_bits += d2 & 0x3;
+
+        suffix_len = ((d1 >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = d1 + 1;  //1 for kappa
+        vlc >>= suffix_len;
+
+        suffix_len = ((d2 >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[1] = d2 + 1;  //1 for kappa
+    }
+    return consumed_bits;
+}
+
+//************************************************************************/
+/** @brief State structure for reading and unstuffing of forward-growing
+  *         bitstreams; these are: MagSgn and SPP bitstreams
+  */
+typedef struct frwd_struct {
+    const OPJ_UINT8* data; //!<pointer to bitstream
+    OPJ_UINT64 tmp;        //!<temporary buffer of read data
+    OPJ_UINT32 bits;       //!<number of bits stored in tmp
+    OPJ_BOOL unstuff;      //!<true if a bit needs to be unstuffed from next byte
+    int size;              //!<size of data
+    OPJ_UINT32 X;          //!<0 or 0xFF, X's are inserted at end of bitstream
+} frwd_struct_t;
+
+//************************************************************************/
+/** @brief Read and unstuffs 32 bits from forward-growing bitstream
+  *
+  *  A subroutine to read from both the MagSgn or SPP bitstreams;
+  *  in particular, when MagSgn bitstream is consumed, 0xFF's are fed,
+  *  while when SPP is exhausted 0's are fed in.
+  *  X controls this value.
+  *
+  *  Unstuffing prevent sequences that are more than 0xFF7F from appearing
+  *  in the conpressed sequence.  So whenever a value of 0xFF is coded, the
+  *  MSB of the next byte is set 0 and must be ignored during decoding.
+  *
+  *  Reading can go beyond the end of buffer by up to 3 bytes.
+  *
+  *  @param  [in]  msp is a pointer to frwd_struct_t structure
+  *
+  */
+static INLINE
+void frwd_read(frwd_struct_t *msp)
+{
+    OPJ_UINT32 val;
+    OPJ_UINT32 bits;
+    OPJ_UINT32 t;
+    OPJ_BOOL unstuff;
+
+    assert(msp->bits <= 32); // assert that there is a space for 32 bits
+
+    val = 0u;
+    if (msp->size > 3) {
+        val = read_le_uint32(msp->data);  // read 32 bits
+        msp->data += 4;           // increment pointer
+        msp->size -= 4;           // reduce size
+    } else if (msp->size > 0) {
+        int i = 0;
+        val = msp->X != 0 ? 0xFFFFFFFFu : 0;
+        while (msp->size > 0) {
+            OPJ_UINT32 v = *msp->data++;  // read one byte at a time
+            OPJ_UINT32 m = ~(0xFFu << i); // mask of location
+            val = (val & m) | (v << i);   // put one byte in its correct location
+            --msp->size;
+            i += 8;
+        }
+    } else {
+        val = msp->X != 0 ? 0xFFFFFFFFu : 0;
+    }
+
+    // we accumulate in t and keep a count of the number of bits in bits
+    bits = 8u - (msp->unstuff ? 1u : 0u);
+    t = val & 0xFF;
+    unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
+
+    t |= ((val >> 8) & 0xFF) << bits;
+    bits += 8u - (unstuff ? 1u : 0u);
+    unstuff = (((val >> 8) & 0xFF) == 0xFF);
+
+    t |= ((val >> 16) & 0xFF) << bits;
+    bits += 8u - (unstuff ? 1u : 0u);
+    unstuff = (((val >> 16) & 0xFF) == 0xFF);
+
+    t |= ((val >> 24) & 0xFF) << bits;
+    bits += 8u - (unstuff ? 1u : 0u);
+    msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
+
+    msp->tmp |= ((OPJ_UINT64)t) << msp->bits;  // move data to msp->tmp
+    msp->bits += bits;
+}
+
+//************************************************************************/
+/** @brief Initialize frwd_struct_t struct and reads some bytes
+  *
+  *  @param [in]  msp is a pointer to frwd_struct_t
+  *  @param [in]  data is a pointer to the start of data
+  *  @param [in]  size is the number of byte in the bitstream
+  *  @param [in]  X is the value fed in when the bitstream is exhausted.
+  *               See frwd_read.
+  */
+static INLINE
+void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size,
+               OPJ_UINT32 X)
+{
+    int num, i;
+
+    msp->data = data;
+    msp->tmp = 0;
+    msp->bits = 0;
+    msp->unstuff = OPJ_FALSE;
+    msp->size = size;
+    msp->X = X;
+    assert(msp->X == 0 || msp->X == 0xFF);
+
+    //This code is designed for an architecture that read address should
+    // align to the read size (address multiple of 4 if read size is 4)
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the bitstream
+    num = 4 - (int)((intptr_t)(msp->data) & 0x3);
+    for (i = 0; i < num; ++i) {
+        OPJ_UINT64 d;
+        //read a byte if the buffer is not exhausted, otherwise set it to X
+        d = msp->size-- > 0 ? *msp->data++ : msp->X;
+        msp->tmp |= (d << msp->bits);      // store data in msp->tmp
+        msp->bits += 8u - (msp->unstuff ? 1u : 0u); // number of bits added to msp->tmp
+        msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
+    }
+    frwd_read(msp); // read 32 bits more
+}
+
+//************************************************************************/
+/** @brief Consume num_bits bits from the bitstream of frwd_struct_t
+  *
+  *  @param [in]  msp is a pointer to frwd_struct_t
+  *  @param [in]  num_bits is the number of bit to consume
+  */
+static INLINE
+void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
+{
+    assert(num_bits <= msp->bits);
+    msp->tmp >>= num_bits;  // consume num_bits
+    msp->bits -= num_bits;
+}
+
+//************************************************************************/
+/** @brief Fetches 32 bits from the frwd_struct_t bitstream
+  *
+  *  @param [in]  msp is a pointer to frwd_struct_t
+  */
+static INLINE
+OPJ_UINT32 frwd_fetch(frwd_struct_t *msp)
+{
+    if (msp->bits < 32) {
+        frwd_read(msp);
+        if (msp->bits < 32) { //need to test
+            frwd_read(msp);
+        }
+    }
+    return (OPJ_UINT32)msp->tmp;
+}
+
+//************************************************************************/
+/** @brief Allocates T1 buffers
+  *
+  *  @param [in, out]  t1 is codeblock cofficients storage
+  *  @param [in]       w is codeblock width
+  *  @param [in]       h is codeblock height
+  */
+static OPJ_BOOL opj_t1_allocate_buffers(
+    opj_t1_t *t1,
+    OPJ_UINT32 w,
+    OPJ_UINT32 h)
+{
+    OPJ_UINT32 flagssize;
+
+    /* No risk of overflow. Prior checks ensure those assert are met */
+    /* They are per the specification */
+    assert(w <= 1024);
+    assert(h <= 1024);
+    assert(w * h <= 4096);
+
+    /* encoder uses tile buffer, so no need to allocate */
+    {
+        OPJ_UINT32 datasize = w * h;
+
+        if (datasize > t1->datasize) {
+            opj_aligned_free(t1->data);
+            t1->data = (OPJ_INT32*)
+                       opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
+            if (!t1->data) {
+                /* FIXME event manager error callback */
+                return OPJ_FALSE;
+            }
+            t1->datasize = datasize;
+        }
+        /* memset first arg is declared to never be null by gcc */
+        if (t1->data != NULL) {
+            memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
+        }
+    }
+
+    // We expand these buffers to multiples of 16 bytes.
+    // We need 4 buffers of 129 integers each, expanded to 132 integers each
+    // We also need 514 bytes of buffer, expanded to 528 bytes
+    flagssize = 132U * sizeof(OPJ_UINT32) * 4U; // expanded to multiple of 16
+    flagssize += 528U; // 514 expanded to multiples of 16
+
+    {
+        if (flagssize > t1->flagssize) {
+
+            opj_aligned_free(t1->flags);
+            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize);
+            if (!t1->flags) {
+                /* FIXME event manager error callback */
+                return OPJ_FALSE;
+            }
+        }
+        t1->flagssize = flagssize;
+
+        memset(t1->flags, 0, flagssize);
+    }
+
+    t1->w = w;
+    t1->h = h;
+
+    return OPJ_TRUE;
+}
+
+//************************************************************************/
+/** @brief Decodes one codeblock, processing the cleanup, siginificance
+  *         propagation, and magnitude refinement pass
+  *
+  *  @param [in, out]  t1 is codeblock cofficients storage
+  *  @param [in]       cblk is codeblock properties
+  *  @param [in]       orient is the subband to which the codeblock belongs (not needed)
+  *  @param [in]       roishift is region of interest shift
+  *  @param [in]       cblksty is codeblock style
+  *  @param [in]       p_manager is events print manager
+  *  @param [in]       p_manager_mutex a mutex to control access to p_manager
+  *  @param [in]       check_pterm: check termination (not used)
+  */
+OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
+                               opj_tcd_cblk_dec_t* cblk,
+                               OPJ_UINT32 orient,
+                               OPJ_UINT32 roishift,
+                               OPJ_UINT32 cblksty,
+                               opj_event_mgr_t *p_manager,
+                               opj_mutex_t* p_manager_mutex,
+                               OPJ_BOOL check_pterm)
+{
+    OPJ_BYTE* cblkdata = NULL;
+    OPJ_UINT8* coded_data;
+    OPJ_UINT32* decoded_data;
+    OPJ_UINT32 zero_bplanes;
+    OPJ_UINT32 num_passes;
+    OPJ_UINT32 lengths1;
+    OPJ_UINT32 lengths2;
+    OPJ_INT32 width;
+    OPJ_INT32 height;
+    OPJ_INT32 stride;
+    OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
+    OPJ_UINT32 p;
+    OPJ_UINT32 zero_bplanes_p1;
+    int lcup, scup;
+    dec_mel_t mel;
+    rev_struct_t vlc;
+    frwd_struct_t magsgn;
+    frwd_struct_t sigprop;
+    rev_struct_t magref;
+    OPJ_UINT8 *lsp, *line_state;
+    int run;
+    OPJ_UINT32 vlc_val;              // fetched data from VLC bitstream
+    OPJ_UINT32 qinf[2];
+    OPJ_UINT32 c_q;
+    OPJ_UINT32* sp;
+    OPJ_INT32 x, y; // loop indices
+    OPJ_BOOL stripe_causal = (cblksty & J2K_CCP_CBLKSTY_VSC) != 0;
+    OPJ_UINT32 cblk_len = 0;
+
+    (void)(orient);      // stops unused parameter message
+    (void)(check_pterm); // stops unused parameter message
+
+    // We ignor orient, because the same decoder is used for all subbands
+    // We also ignore check_pterm, because I am not sure how it applies
+    if (roishift != 0) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "We do not support ROI in decoding "
+                      "HT codeblocks\n");
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
+
+    if (!opj_t1_allocate_buffers(
+                t1,
+                (OPJ_UINT32)(cblk->x1 - cblk->x0),
+                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
+        return OPJ_FALSE;
+    }
+
+    if (cblk->Mb == 0) {
+        return OPJ_TRUE;
+    }
+
+    /* numbps = Mb + 1 - zero_bplanes, Mb = Kmax, zero_bplanes = missing_msbs */
+    zero_bplanes = (cblk->Mb + 1) - cblk->numbps;
+
+    /* Compute whole codeblock length from chunk lengths */
+    cblk_len = 0;
+    {
+        OPJ_UINT32 i;
+        for (i = 0; i < cblk->numchunks; i++) {
+            cblk_len += cblk->chunks[i].len;
+        }
+    }
+
+    if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+        OPJ_UINT32 i;
+
+        /* Allocate temporary memory if needed */
+        if (cblk_len > t1->cblkdatabuffersize) {
+            cblkdata = (OPJ_BYTE*)opj_realloc(
+                           t1->cblkdatabuffer, cblk_len);
+            if (cblkdata == NULL) {
+                return OPJ_FALSE;
+            }
+            t1->cblkdatabuffer = cblkdata;
+            t1->cblkdatabuffersize = cblk_len;
+        }
+
+        /* Concatenate all chunks */
+        cblkdata = t1->cblkdatabuffer;
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
+            cblk_len += cblk->chunks[i].len;
+        }
+    } else if (cblk->numchunks == 1) {
+        cblkdata = cblk->chunks[0].data;
+    } else {
+        /* Not sure if that can happen in practice, but avoid Coverity to */
+        /* think we will dereference a null cblkdta pointer */
+        return OPJ_TRUE;
+    }
+
+    // OPJ_BYTE* coded_data is a pointer to bitstream
+    coded_data = cblkdata;
+    // OPJ_UINT32* decoded_data is a pointer to decoded codeblock data buf.
+    decoded_data = (OPJ_UINT32*)t1->data;
+    // OPJ_UINT32 num_passes is the number of passes: 1 if CUP only, 2 for
+    // CUP+SPP, and 3 for CUP+SPP+MRP
+    num_passes = cblk->numsegs > 0 ? cblk->segs[0].real_num_passes : 0;
+    num_passes += cblk->numsegs > 1 ? cblk->segs[1].real_num_passes : 0;
+    // OPJ_UINT32 lengths1 is the length of cleanup pass
+    lengths1 = num_passes > 0 ? cblk->segs[0].len : 0;
+    // OPJ_UINT32 lengths2 is the length of refinement passes (either SPP only or SPP+MRP)
+    lengths2 = num_passes > 1 ? cblk->segs[1].len : 0;
+    // OPJ_INT32 width is the decoded codeblock width
+    width = cblk->x1 - cblk->x0;
+    // OPJ_INT32 height is the decoded codeblock height
+    height = cblk->y1 - cblk->y0;
+    // OPJ_INT32 stride is the decoded codeblock buffer stride
+    stride = width;
+
+    /*  sigma1 and sigma2 contains significant (i.e., non-zero) pixel
+     *  locations.  The buffers are used interchangeably, because we need
+     *  more than 4 rows of significance information at a given time.
+     *  Each 32 bits contain significance information for 4 rows of 8
+     *  columns each.  If we denote 32 bits by 0xaaaaaaaa, the each "a" is
+     *  called a nibble and has significance information for 4 rows.
+     *  The least significant nibble has information for the first column,
+     *  and so on. The nibble's LSB is for the first row, and so on.
+     *  Since, at most, we can have 1024 columns in a quad, we need 128
+     *  entries; we added 1 for convenience when propagation of signifcance
+     *  goes outside the structure
+     *  To work in OpenJPEG these buffers has been expanded to 132.
+     */
+    // OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
+    pflags = (OPJ_UINT32 *)t1->flags;
+    sigma1 = pflags;
+    sigma2 = sigma1 + 132;
+    // mbr arrangement is similar to sigma; mbr contains locations
+    // that become significant during significance propagation pass
+    mbr1 = sigma2 + 132;
+    mbr2 = mbr1 + 132;
+    //a pointer to sigma
+    sip = sigma1;  //pointers to arrays to be used interchangeably
+    sip_shift = 0; //the amount of shift needed for sigma
+
+    if (num_passes > 1 && lengths2 == 0) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_WARNING, "A malformed codeblock that has "
+                      "more than one coding pass, but zero length for "
+                      "2nd and potentially the 3rd pass in an HT codeblock.\n");
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        num_passes = 1;
+    }
+    if (num_passes > 3) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "We do not support more than 3 "
+                      "coding passes in an HT codeblock; This codeblocks has "
+                      "%d passes.\n", num_passes);
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
+
+    if (cblk->Mb > 30) {
+        /* This check is better moved to opj_t2_read_packet_header() in t2.c
+           We do not have enough precision to decode any passes
+           The design of openjpeg assumes that the bits of a 32-bit integer are
+           assigned as follows:
+           bit 31 is for sign
+           bits 30-1 are for magnitude
+           bit 0 is for the center of the quantization bin
+           Therefore we can only do values of cblk->Mb <= 30
+         */
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "32 bits are not enough to "
+                      "decode this codeblock, since the number of "
+                      "bitplane, %d, is larger than 30.\n", cblk->Mb);
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
+    if (zero_bplanes > cblk->Mb) {
+        /* This check is better moved to opj_t2_read_packet_header() in t2.c,
+           in the line "l_cblk->numbps = (OPJ_UINT32)l_band->numbps + 1 - i;"
+           where i is the zero bitplanes, and should be no larger than cblk->Mb
+           We cannot have more zero bitplanes than there are planes. */
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                      "Decoding this codeblock is stopped. There are "
+                      "%d zero bitplanes in %d bitplanes.\n",
+                      zero_bplanes, cblk->Mb);
+
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    } else if (zero_bplanes == cblk->Mb && num_passes > 1) {
+        /* When the number of zero bitplanes is equal to the number of bitplanes,
+           only the cleanup pass makes sense*/
+        if (only_cleanup_pass_is_decoded == OPJ_FALSE) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            /* We have a second check to prevent the possibility of an overrun condition,
+               in the very unlikely event of a second thread discovering that
+               only_cleanup_pass_is_decoded is false before the first thread changing
+               the condition. */
+            if (only_cleanup_pass_is_decoded == OPJ_FALSE) {
+                only_cleanup_pass_is_decoded = OPJ_TRUE;
+                opj_event_msg(p_manager, EVT_WARNING, "Malformed HT codeblock. "
+                              "When the number of zero planes bitplanes is "
+                              "equal to the number of bitplanes, only the cleanup "
+                              "pass makes sense, but we have %d passes in this "
+                              "codeblock. Therefore, only the cleanup pass will be "
+                              "decoded. This message will not be displayed again.\n",
+                              num_passes);
+            }
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+        }
+        num_passes = 1;
+    }
+
+    /* OPJ_UINT32 */
+    p = cblk->numbps;
+
+    // OPJ_UINT32 zero planes plus 1
+    zero_bplanes_p1 = zero_bplanes + 1;
+
+    if (lengths1 < 2 || (OPJ_UINT32)lengths1 > cblk_len ||
+            (OPJ_UINT32)(lengths1 + lengths2) > cblk_len) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                      "Invalid codeblock length values.\n");
+
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
+    // read scup and fix the bytes there
+    lcup = (int)lengths1;  // length of CUP
+    //scup is the length of MEL + VLC
+    scup = (((int)coded_data[lcup - 1]) << 4) + (coded_data[lcup - 2] & 0xF);
+    if (scup < 2 || scup > lcup || scup > 4079) { //something is wrong
+        /* The standard stipulates 2 <= Scup <= min(Lcup, 4079) */
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                      "One of the following condition is not met: "
+                      "2 <= Scup <= min(Lcup, 4079)\n");
+
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
+
+    // init structures
+    mel_init(&mel, coded_data, lcup, scup);
+    rev_init(&vlc, coded_data, lcup, scup);
+    frwd_init(&magsgn, coded_data, lcup - scup, 0xFF);
+    if (num_passes > 1) { // needs to be tested
+        frwd_init(&sigprop, coded_data + lengths1, (int)lengths2, 0);
+    }
+    if (num_passes > 2) {
+        rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
+    }
+
+    /** State storage
+      *  One byte per quad; for 1024 columns, or 512 quads, we need
+      *  512 bytes. We are using 2 extra bytes one on the left and one on
+      *  the right for convenience.
+      *
+      *  The MSB bit in each byte is (\sigma^nw | \sigma^n), and the 7 LSBs
+      *  contain max(E^nw | E^n)
+      */
+
+    // 514 is enough for a block width of 1024, +2 extra
+    // here expanded to 528
+    line_state = (OPJ_UINT8 *)(mbr2 + 132);
+
+    //initial 2 lines
+    /////////////////
+    lsp = line_state;              // point to line state
+    lsp[0] = 0;                    // for initial row of quad, we set to 0
+    run = mel_get_run(&mel);    // decode runs of events from MEL bitstrm
+    // data represented as runs of 0 events
+    // See mel_decode description
+    qinf[0] = qinf[1] = 0;      // quad info decoded from VLC bitstream
+    c_q = 0;                    // context for quad q
+    sp = decoded_data;          // decoded codeblock samples
+    // vlc_val;                 // fetched data from VLC bitstream
+
+    for (x = 0; x < width; x += 4) { // one iteration per quad pair
+        OPJ_UINT32 U_q[2]; // u values for the quad pair
+        OPJ_UINT32 uvlc_mode;
+        OPJ_UINT32 consumed_bits;
+        OPJ_UINT32 m_n, v_n;
+        OPJ_UINT32 ms_val;
+        OPJ_UINT32 locs;
+
+        // decode VLC
+        /////////////
+
+        //first quad
+        // Get the head of the VLC bitstream. One fetch is enough for two
+        // quads, since the largest VLC code is 7 bits, and maximum number of
+        // bits used for u is 8.  Therefore for two quads we need 30 bits
+        // (if we include unstuffing, then 32 bits are enough, since we have
+        // a maximum of one stuffing per two bytes)
+        vlc_val = rev_fetch(&vlc);
+
+        //decode VLC using the context c_q and the head of the VLC bitstream
+        qinf[0] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F) ];
+
+        if (c_q == 0) { // if zero context, we need to use one MEL event
+            run -= 2; //the number of 0 events is multiplied by 2, so subtract 2
+
+            // Is the run terminated in 1? if so, use decoded VLC code,
+            // otherwise, discard decoded data, since we will decoded again
+            // using a different context
+            qinf[0] = (run == -1) ? qinf[0] : 0;
+
+            // is run -1 or -2? this means a run has been consumed
+            if (run < 0) {
+                run = mel_get_run(&mel);    // get another run
+            }
+        }
+
+        // prepare context for the next quad; eqn. 1 in ITU T.814
+        c_q = ((qinf[0] & 0x10) >> 4) | ((qinf[0] & 0xE0) >> 5);
+
+        //remove data from vlc stream (0 bits are removed if qinf is not used)
+        vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
+
+        //update sigma
+        // The update depends on the value of x; consider one OPJ_UINT32
+        // if x is 0, 8, 16 and so on, then this line update c locations
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   c c 0 0 0 0 0 0
+        //                               c c 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        // if x is 4, 12, 20, then this line update locations c
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   0 0 0 0 c c 0 0
+        //                               0 0 0 0 c c 0 0
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        *sip |= (((qinf[0] & 0x30) >> 4) | ((qinf[0] & 0xC0) >> 2)) << sip_shift;
+
+        //second quad
+        qinf[1] = 0;
+        if (x + 2 < width) { // do not run if codeblock is narrower
+            //decode VLC using the context c_q and the head of the VLC bitstream
+            qinf[1] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F)];
+
+            // if context is zero, use one MEL event
+            if (c_q == 0) { //zero context
+                run -= 2; //subtract 2, since events number if multiplied by 2
+
+                // if event is 0, discard decoded qinf
+                qinf[1] = (run == -1) ? qinf[1] : 0;
+
+                if (run < 0) { // have we consumed all events in a run
+                    run = mel_get_run(&mel);    // if yes, then get another run
+                }
+            }
+
+            //prepare context for the next quad, eqn. 1 in ITU T.814
+            c_q = ((qinf[1] & 0x10) >> 4) | ((qinf[1] & 0xE0) >> 5);
+
+            //remove data from vlc stream, if qinf is not used, cwdlen is 0
+            vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+        }
+
+        //update sigma
+        // The update depends on the value of x; consider one OPJ_UINT32
+        // if x is 0, 8, 16 and so on, then this line update c locations
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   0 0 c c 0 0 0 0
+        //                               0 0 c c 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        // if x is 4, 12, 20, then this line update locations c
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   0 0 0 0 0 0 c c
+        //                               0 0 0 0 0 0 c c
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0) << 2))) << (4 + sip_shift);
+
+        sip += x & 0x7 ? 1 : 0; // move sigma pointer to next entry
+        sip_shift ^= 0x10;      // increment/decrement sip_shift by 16
+
+        // retrieve u
+        /////////////
+
+        // uvlc_mode is made up of u_offset bits from the quad pair
+        uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+        if (uvlc_mode == 3) { // if both u_offset are set, get an event from
+            // the MEL run of events
+            run -= 2; //subtract 2, since events number if multiplied by 2
+            uvlc_mode += (run == -1) ? 1 : 0; //increment uvlc_mode if event is 1
+            if (run < 0) { // if run is consumed (run is -1 or -2), get another run
+                run = mel_get_run(&mel);
+            }
+        }
+        //decode uvlc_mode to get u for both quads
+        consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
+        if (U_q[0] > zero_bplanes_p1 || U_q[1] > zero_bplanes_p1) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. Decoding "
+                          "this codeblock is stopped. U_q is larger than zero "
+                          "bitplanes + 1 \n");
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+            return OPJ_FALSE;
+        }
+
+        //consume u bits in the VLC code
+        vlc_val = rev_advance(&vlc, consumed_bits);
+
+        //decode magsgn and update line_state
+        /////////////////////////////////////
+
+        //We obtain a mask for the samples locations that needs evaluation
+        locs = 0xFF;
+        if (x + 4 > width) {
+            locs >>= (x + 4 - width) << 1;    // limits width
+        }
+        locs = height > 1 ? locs : (locs & 0x55);         // limits height
+
+        if ((((qinf[0] & 0xF0) >> 4) | (qinf[1] & 0xF0)) & ~locs) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                          "VLC code produces significant samples outside "
+                          "the codeblock area.\n");
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+            return OPJ_FALSE;
+        }
+
+        //first quad, starting at first sample in quad and moving on
+        if (qinf[0] & 0x10) { //is it significant? (sigma_n)
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);         //get 32 bits of magsgn data
+            m_n = U_q[0] - ((qinf[0] >> 12) & 1); //evaluate m_n (number of bits
+            // to read from bitstream), using EMB e_k
+            frwd_advance(&magsgn, m_n);         //consume m_n
+            val = ms_val << 31;                 //get sign bit
+            v_n = ms_val & ((1U << m_n) - 1);   //keep only m_n bits
+            v_n |= ((qinf[0] & 0x100) >> 8) << m_n;  //add EMB e_1 as MSB
+            v_n |= 1;                                //add center of bin
+            //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
+            //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x1) { // if this is inside the codeblock, set the
+            sp[0] = 0;           // sample to zero
+        }
+
+        if (qinf[0] & 0x20) { //sigma_n
+            OPJ_UINT32 val, t;
+
+            ms_val = frwd_fetch(&magsgn);         //get 32 bits
+            m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n, uses EMB e_k
+            frwd_advance(&magsgn, m_n);           //consume m_n
+            val = ms_val << 31;                   //get sign bit
+            v_n = ms_val & ((1U << m_n) - 1);     //keep only m_n bits
+            v_n |= ((qinf[0] & 0x200) >> 9) << m_n; //add EMB e_1
+            v_n |= 1;                               //bin center
+            //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
+            //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //update line_state: bit 7 (\sigma^N), and E^N
+            t = lsp[0] & 0x7F;       // keep E^NW
+            v_n = 32 - count_leading_zeros(v_n);
+            lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
+        } else if (locs & 0x2) { // if this is inside the codeblock, set the
+            sp[stride] = 0;      // sample to zero
+        }
+
+        ++lsp; // move to next quad information
+        ++sp;  // move to next column of samples
+
+        //this is similar to the above two samples
+        if (qinf[0] & 0x40) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[0] - ((qinf[0] >> 14) & 1);
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
+            v_n |= 1;
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x4) {
+            sp[0] = 0;
+        }
+
+        lsp[0] = 0;
+        if (qinf[0] & 0x80) {
+            OPJ_UINT32 val;
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
+            v_n |= 1; //center of bin
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //line_state: bit 7 (\sigma^NW), and E^NW for next quad
+            lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+        } else if (locs & 0x8) { //if outside set to 0
+            sp[stride] = 0;
+        }
+
+        ++sp; //move to next column
+
+        //second quad
+        if (qinf[1] & 0x10) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
+            v_n |= 1;
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x10) {
+            sp[0] = 0;
+        }
+
+        if (qinf[1] & 0x20) {
+            OPJ_UINT32 val, t;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
+            v_n |= 1;
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //update line_state: bit 7 (\sigma^N), and E^N
+            t = lsp[0] & 0x7F;            //E^NW
+            v_n = 32 - count_leading_zeros(v_n);     //E^N
+            lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
+        } else if (locs & 0x20) {
+            sp[stride] = 0;    //no need to update line_state
+        }
+
+        ++lsp; //move line state to next quad
+        ++sp;  //move to next sample
+
+        if (qinf[1] & 0x40) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
+            v_n |= 1;
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x40) {
+            sp[0] = 0;
+        }
+
+        lsp[0] = 0;
+        if (qinf[1] & 0x80) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
+            v_n |= 1; //center of bin
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //line_state: bit 7 (\sigma^NW), and E^NW for next quad
+            lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+        } else if (locs & 0x80) {
+            sp[stride] = 0;
+        }
+
+        ++sp;
+    }
+
+    //non-initial lines
+    //////////////////////////
+    for (y = 2; y < height; /*done at the end of loop*/) {
+        OPJ_UINT32 *sip;
+        OPJ_UINT8 ls0;
+        OPJ_INT32 x;
+
+        sip_shift ^= 0x2;  // shift sigma to the upper half od the nibble
+        sip_shift &= 0xFFFFFFEFU; //move back to 0 (it might have been at 0x10)
+        sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
+
+        lsp = line_state;
+        ls0 = lsp[0];                   // read the line state value
+        lsp[0] = 0;                     // and set it to zero
+        sp = decoded_data + y * stride; // generated samples
+        c_q = 0;                        // context
+        for (x = 0; x < width; x += 4) {
+            OPJ_UINT32 U_q[2];
+            OPJ_UINT32 uvlc_mode, consumed_bits;
+            OPJ_UINT32 m_n, v_n;
+            OPJ_UINT32 ms_val;
+            OPJ_UINT32 locs;
+
+            // decode vlc
+            /////////////
+
+            //first quad
+            // get context, eqn. 2 ITU T.814
+            // c_q has \sigma^W | \sigma^SW
+            c_q |= (ls0 >> 7);          //\sigma^NW | \sigma^N
+            c_q |= (lsp[1] >> 5) & 0x4; //\sigma^NE | \sigma^NF
+
+            //the following is very similar to previous code, so please refer to
+            // that
+            vlc_val = rev_fetch(&vlc);
+            qinf[0] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
+            if (c_q == 0) { //zero context
+                run -= 2;
+                qinf[0] = (run == -1) ? qinf[0] : 0;
+                if (run < 0) {
+                    run = mel_get_run(&mel);
+                }
+            }
+            //prepare context for the next quad, \sigma^W | \sigma^SW
+            c_q = ((qinf[0] & 0x40) >> 5) | ((qinf[0] & 0x80) >> 6);
+
+            //remove data from vlc stream
+            vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
+
+            //update sigma
+            // The update depends on the value of x and y; consider one OPJ_UINT32
+            // if x is 0, 8, 16 and so on, and y is 2, 6, etc., then this
+            // line update c locations
+            //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+            //                         LSB   0 0 0 0 0 0 0 0
+            //                               0 0 0 0 0 0 0 0
+            //                               c c 0 0 0 0 0 0
+            //                               c c 0 0 0 0 0 0
+            *sip |= (((qinf[0] & 0x30) >> 4) | ((qinf[0] & 0xC0) >> 2)) << sip_shift;
+
+            //second quad
+            qinf[1] = 0;
+            if (x + 2 < width) {
+                c_q |= (lsp[1] >> 7);
+                c_q |= (lsp[2] >> 5) & 0x4;
+                qinf[1] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
+                if (c_q == 0) { //zero context
+                    run -= 2;
+                    qinf[1] = (run == -1) ? qinf[1] : 0;
+                    if (run < 0) {
+                        run = mel_get_run(&mel);
+                    }
+                }
+                //prepare context for the next quad
+                c_q = ((qinf[1] & 0x40) >> 5) | ((qinf[1] & 0x80) >> 6);
+                //remove data from vlc stream
+                vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+            }
+
+            //update sigma
+            *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0) << 2))) << (4 + sip_shift);
+
+            sip += x & 0x7 ? 1 : 0;
+            sip_shift ^= 0x10;
+
+            //retrieve u
+            ////////////
+            uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+            consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
+            vlc_val = rev_advance(&vlc, consumed_bits);
+
+            //calculate E^max and add it to U_q, eqns 5 and 6 in ITU T.814
+            if ((qinf[0] & 0xF0) & ((qinf[0] & 0xF0) - 1)) { // is \gamma_q 1?
+                OPJ_UINT32 E = (ls0 & 0x7Fu);
+                E = E > (lsp[1] & 0x7Fu) ? E : (lsp[1] & 0x7Fu); //max(E, E^NE, E^NF)
+                //since U_q already has u_q + 1, we subtract 2 instead of 1
+                U_q[0] += E > 2 ? E - 2 : 0;
+            }
+
+            if ((qinf[1] & 0xF0) & ((qinf[1] & 0xF0) - 1)) { //is \gamma_q 1?
+                OPJ_UINT32 E = (lsp[1] & 0x7Fu);
+                E = E > (lsp[2] & 0x7Fu) ? E : (lsp[2] & 0x7Fu); //max(E, E^NE, E^NF)
+                //since U_q already has u_q + 1, we subtract 2 instead of 1
+                U_q[1] += E > 2 ? E - 2 : 0;
+            }
+
+            if (U_q[0] > zero_bplanes_p1 || U_q[1] > zero_bplanes_p1) {
+                if (p_manager_mutex) {
+                    opj_mutex_lock(p_manager_mutex);
+                }
+                opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                              "Decoding this codeblock is stopped. U_q is"
+                              "larger than bitplanes + 1 \n");
+                if (p_manager_mutex) {
+                    opj_mutex_unlock(p_manager_mutex);
+                }
+                return OPJ_FALSE;
+            }
+
+            ls0 = lsp[2]; //for next double quad
+            lsp[1] = lsp[2] = 0;
+
+            //decode magsgn and update line_state
+            /////////////////////////////////////
+
+            //locations where samples need update
+            locs = 0xFF;
+            if (x + 4 > width) {
+                locs >>= (x + 4 - width) << 1;
+            }
+            locs = y + 2 <= height ? locs : (locs & 0x55);
+
+            if ((((qinf[0] & 0xF0) >> 4) | (qinf[1] & 0xF0)) & ~locs) {
+                if (p_manager_mutex) {
+                    opj_mutex_lock(p_manager_mutex);
+                }
+                opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                              "VLC code produces significant samples outside "
+                              "the codeblock area.\n");
+                if (p_manager_mutex) {
+                    opj_mutex_unlock(p_manager_mutex);
+                }
+                return OPJ_FALSE;
+            }
+
+
+
+            if (qinf[0] & 0x10) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 12) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= ((qinf[0] & 0x100) >> 8) << m_n;
+                v_n |= 1; //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x1) {
+                sp[0] = 0;
+            }
+
+            if (qinf[0] & 0x20) { //sigma_n
+                OPJ_UINT32 val, t;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= ((qinf[0] & 0x200) >> 9) << m_n;
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^N), and E^N
+                t = lsp[0] & 0x7F;          //E^NW
+                v_n = 32 - count_leading_zeros(v_n);
+                lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
+            } else if (locs & 0x2) {
+                sp[stride] = 0;    //no need to update line_state
+            }
+
+            ++lsp;
+            ++sp;
+
+            if (qinf[0] & 0x40) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 14) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
+                v_n |= 1;                            //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x4) {
+                sp[0] = 0;
+            }
+
+            if (qinf[0] & 0x80) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
+                lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+            } else if (locs & 0x8) {
+                sp[stride] = 0;
+            }
+
+            ++sp;
+
+            if (qinf[1] & 0x10) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
+                v_n |= 1;                            //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x10) {
+                sp[0] = 0;
+            }
+
+            if (qinf[1] & 0x20) { //sigma_n
+                OPJ_UINT32 val, t;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^N), and E^N
+                t = lsp[0] & 0x7F;          //E^NW
+                v_n = 32 - count_leading_zeros(v_n);
+                lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
+            } else if (locs & 0x20) {
+                sp[stride] = 0;    //no need to update line_state
+            }
+
+            ++lsp;
+            ++sp;
+
+            if (qinf[1] & 0x40) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
+                v_n |= 1;                            //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x40) {
+                sp[0] = 0;
+            }
+
+            if (qinf[1] & 0x80) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
+                lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+            } else if (locs & 0x80) {
+                sp[stride] = 0;
+            }
+
+            ++sp;
+        }
+
+        y += 2;
+        if (num_passes > 1 && (y & 3) == 0) { //executed at multiples of 4
+            // This is for SPP and potentially MRP
+
+            if (num_passes > 2) { //do MRP
+                // select the current stripe
+                OPJ_UINT32 *cur_sig = y & 0x4 ? sigma1 : sigma2;
+                // the address of the data that needs updating
+                OPJ_UINT32 *dpp = decoded_data + (y - 4) * stride;
+                OPJ_UINT32 half = 1u << (p - 2); // half the center of the bin
+                OPJ_INT32 i;
+                for (i = 0; i < width; i += 8) {
+                    //Process one entry from sigma array at a time
+                    // Each nibble (4 bits) in the sigma array represents 4 rows,
+                    // and the 32 bits contain 8 columns
+                    OPJ_UINT32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
+                    OPJ_UINT32 sig = *cur_sig++; // 32 bit that will be processed now
+                    OPJ_UINT32 col_mask = 0xFu;  // a mask for a column in sig
+                    OPJ_UINT32 *dp = dpp + i;    // next column in decode samples
+                    if (sig) { // if any of the 32 bits are set
+                        int j;
+                        for (j = 0; j < 8; ++j, dp++) { //one column at a time
+                            if (sig & col_mask) { // lowest nibble
+                                OPJ_UINT32 sample_mask = 0x11111111u & col_mask; //LSB
+
+                                if (sig & sample_mask) { //if LSB is set
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[0] != 0); // decoded value cannot be zero
+                                    sym = cwd & 1; // get it value
+                                    // remove center of bin if sym is 0
+                                    dp[0] ^= (1 - sym) << (p - 1);
+                                    dp[0] |= half;      // put half the center of bin
+                                    cwd >>= 1;          //consume word
+                                }
+                                sample_mask += sample_mask; //next row
+
+                                if (sig & sample_mask) {
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[stride] != 0);
+                                    sym = cwd & 1;
+                                    dp[stride] ^= (1 - sym) << (p - 1);
+                                    dp[stride] |= half;
+                                    cwd >>= 1;
+                                }
+                                sample_mask += sample_mask;
+
+                                if (sig & sample_mask) {
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[2 * stride] != 0);
+                                    sym = cwd & 1;
+                                    dp[2 * stride] ^= (1 - sym) << (p - 1);
+                                    dp[2 * stride] |= half;
+                                    cwd >>= 1;
+                                }
+                                sample_mask += sample_mask;
+
+                                if (sig & sample_mask) {
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[3 * stride] != 0);
+                                    sym = cwd & 1;
+                                    dp[3 * stride] ^= (1 - sym) << (p - 1);
+                                    dp[3 * stride] |= half;
+                                    cwd >>= 1;
+                                }
+                                sample_mask += sample_mask;
+                            }
+                            col_mask <<= 4; //next column
+                        }
+                    }
+                    // consume data according to the number of bits set
+                    rev_advance_mrp(&magref, population_count(sig));
+                }
+            }
+
+            if (y >= 4) { // update mbr array at the end of each stripe
+                //generate mbr corresponding to a stripe
+                OPJ_UINT32 *sig = y & 0x4 ? sigma1 : sigma2;
+                OPJ_UINT32 *mbr = y & 0x4 ? mbr1 : mbr2;
+
+                //data is processed in patches of 8 columns, each
+                // each 32 bits in sigma1 or mbr1 represent 4 rows
+
+                //integrate horizontally
+                OPJ_UINT32 prev = 0; // previous columns
+                OPJ_INT32 i;
+                for (i = 0; i < width; i += 8, mbr++, sig++) {
+                    OPJ_UINT32 t, z;
+
+                    mbr[0] = sig[0];         //start with significant samples
+                    mbr[0] |= prev >> 28;    //for first column, left neighbors
+                    mbr[0] |= sig[0] << 4;   //left neighbors
+                    mbr[0] |= sig[0] >> 4;   //right neighbors
+                    mbr[0] |= sig[1] << 28;  //for last column, right neighbors
+                    prev = sig[0];           // for next group of columns
+
+                    //integrate vertically
+                    t = mbr[0], z = mbr[0];
+                    z |= (t & 0x77777777) << 1; //above neighbors
+                    z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
+                    mbr[0] = z & ~sig[0]; //remove already significance samples
+                }
+            }
+
+            if (y >= 8) { //wait until 8 rows has been processed
+                OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+                OPJ_UINT32 prev;
+                OPJ_UINT32 val;
+                OPJ_INT32 i;
+
+                // add membership from the next stripe, obtained above
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                cur_mbr = y & 0x4 ? mbr2 : mbr1;
+                nxt_sig = y & 0x4 ? sigma1 : sigma2;  //future samples
+                prev = 0; // the columns before these group of 8 columns
+                for (i = 0; i < width; i += 8, cur_mbr++, cur_sig++, nxt_sig++) {
+                    OPJ_UINT32 t = nxt_sig[0];
+                    t |= prev >> 28;        //for first column, left neighbors
+                    t |= nxt_sig[0] << 4;   //left neighbors
+                    t |= nxt_sig[0] >> 4;   //right neighbors
+                    t |= nxt_sig[1] << 28;  //for last column, right neighbors
+                    prev = nxt_sig[0];      // for next group of columns
+
+                    if (!stripe_causal) {
+                        cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
+                    }
+                    cur_mbr[0] &= ~cur_sig[0]; //remove already significance samples
+                }
+
+                //find new locations and get signs
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                cur_mbr = y & 0x4 ? mbr2 : mbr1;
+                nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
+                nxt_mbr = y & 0x4 ? mbr1 : mbr2;     //future samples
+                val = 3u << (p - 2); // sample values for newly discovered
+                // significant samples including the bin center
+                for (i = 0; i < width;
+                        i += 8, cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++) {
+                    OPJ_UINT32 ux, tx;
+                    OPJ_UINT32 mbr = *cur_mbr;
+                    OPJ_UINT32 new_sig = 0;
+                    if (mbr) { //are there any samples that might be significant
+                        OPJ_INT32 n;
+                        for (n = 0; n < 8; n += 4) {
+                            OPJ_UINT32 col_mask;
+                            OPJ_UINT32 inv_sig;
+                            OPJ_INT32 end;
+                            OPJ_INT32 j;
+
+                            OPJ_UINT32 cwd = frwd_fetch(&sigprop); //get 32 bits
+                            OPJ_UINT32 cnt = 0;
+
+                            OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
+                            dp += i + n; //address for decoded samples
+
+                            col_mask = 0xFu << (4 * n); //a mask to select a column
+
+                            inv_sig = ~cur_sig[0]; // insignificant samples
+
+                            //find the last sample we operate on
+                            end = n + 4 + i < width ? n + 4 : width - i;
+
+                            for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                                OPJ_UINT32 sample_mask;
+
+                                if ((col_mask & mbr) == 0) { //no samples need checking
+                                    continue;
+                                }
+
+                                //scan mbr to find a new significant sample
+                                sample_mask = 0x11111111u & col_mask; // LSB
+                                if (mbr & sample_mask) {
+                                    assert(dp[0] == 0); // the sample must have been 0
+                                    if (cwd & 1) { //if this sample has become significant
+                                        // must propagate it to nearby samples
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;  // new significant samples
+                                        t = 0x32u << (j * 4);// propagation to neighbors
+                                        mbr |= t & inv_sig; //remove already significant samples
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt; //consume bit and increment number of
+                                    //consumed bits
+                                }
+
+                                sample_mask += sample_mask;  // next row
+                                if (mbr & sample_mask) {
+                                    assert(dp[stride] == 0);
+                                    if (cwd & 1) {
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;
+                                        t = 0x74u << (j * 4);
+                                        mbr |= t & inv_sig;
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (mbr & sample_mask) {
+                                    assert(dp[2 * stride] == 0);
+                                    if (cwd & 1) {
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;
+                                        t = 0xE8u << (j * 4);
+                                        mbr |= t & inv_sig;
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (mbr & sample_mask) {
+                                    assert(dp[3 * stride] == 0);
+                                    if (cwd & 1) {
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;
+                                        t = 0xC0u << (j * 4);
+                                        mbr |= t & inv_sig;
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+                            }
+
+                            //obtain signs here
+                            if (new_sig & (0xFFFFu << (4 * n))) { //if any
+                                OPJ_UINT32 col_mask;
+                                OPJ_INT32 j;
+                                OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
+                                dp += i + n; // decoded samples address
+                                col_mask = 0xFu << (4 * n); //mask to select a column
+
+                                for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                                    OPJ_UINT32 sample_mask;
+
+                                    if ((col_mask & new_sig) == 0) { //if non is significant
+                                        continue;
+                                    }
+
+                                    //scan 4 signs
+                                    sample_mask = 0x11111111u & col_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[0] == 0);
+                                        dp[0] |= ((cwd & 1) << 31) | val; //put value and sign
+                                        cwd >>= 1;
+                                        ++cnt; //consume bit and increment number
+                                        //of consumed bits
+                                    }
+
+                                    sample_mask += sample_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[stride] == 0);
+                                        dp[stride] |= ((cwd & 1) << 31) | val;
+                                        cwd >>= 1;
+                                        ++cnt;
+                                    }
+
+                                    sample_mask += sample_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[2 * stride] == 0);
+                                        dp[2 * stride] |= ((cwd & 1) << 31) | val;
+                                        cwd >>= 1;
+                                        ++cnt;
+                                    }
+
+                                    sample_mask += sample_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[3 * stride] == 0);
+                                        dp[3 * stride] |= ((cwd & 1) << 31) | val;
+                                        cwd >>= 1;
+                                        ++cnt;
+                                    }
+                                }
+
+                            }
+                            frwd_advance(&sigprop, cnt); //consume the bits from bitstrm
+                            cnt = 0;
+
+                            //update the next 8 columns
+                            if (n == 4) {
+                                //horizontally
+                                OPJ_UINT32 t = new_sig >> 28;
+                                t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
+                                cur_mbr[1] |= t & ~cur_sig[1];
+                            }
+                        }
+                    }
+                    //update the next stripe (vertically propagation)
+                    new_sig |= cur_sig[0];
+                    ux = (new_sig & 0x88888888) >> 3;
+                    tx = ux | (ux << 4) | (ux >> 4); //left and right neighbors
+                    if (i > 0) {
+                        nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
+                    }
+                    nxt_mbr[0] |= tx & ~nxt_sig[0];
+                    nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
+                }
+
+                //clear current sigma
+                //mbr need not be cleared because it is overwritten
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                memset(cur_sig, 0, ((((OPJ_UINT32)width + 7u) >> 3) + 1u) << 2);
+            }
+        }
+    }
+
+    //terminating
+    if (num_passes > 1) {
+        OPJ_INT32 st, y;
+
+        if (num_passes > 2 && ((height & 3) == 1 || (height & 3) == 2)) {
+            //do magref
+            OPJ_UINT32 *cur_sig = height & 0x4 ? sigma2 : sigma1; //reversed
+            OPJ_UINT32 *dpp = decoded_data + (height & 0xFFFFFC) * stride;
+            OPJ_UINT32 half = 1u << (p - 2);
+            OPJ_INT32 i;
+            for (i = 0; i < width; i += 8) {
+                OPJ_UINT32 cwd = rev_fetch_mrp(&magref);
+                OPJ_UINT32 sig = *cur_sig++;
+                OPJ_UINT32 col_mask = 0xF;
+                OPJ_UINT32 *dp = dpp + i;
+                if (sig) {
+                    int j;
+                    for (j = 0; j < 8; ++j, dp++) {
+                        if (sig & col_mask) {
+                            OPJ_UINT32 sample_mask = 0x11111111 & col_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[0] != 0);
+                                sym = cwd & 1;
+                                dp[0] ^= (1 - sym) << (p - 1);
+                                dp[0] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[stride] != 0);
+                                sym = cwd & 1;
+                                dp[stride] ^= (1 - sym) << (p - 1);
+                                dp[stride] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[2 * stride] != 0);
+                                sym = cwd & 1;
+                                dp[2 * stride] ^= (1 - sym) << (p - 1);
+                                dp[2 * stride] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[3 * stride] != 0);
+                                sym = cwd & 1;
+                                dp[3 * stride] ^= (1 - sym) << (p - 1);
+                                dp[3 * stride] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+                        }
+                        col_mask <<= 4;
+                    }
+                }
+                rev_advance_mrp(&magref, population_count(sig));
+            }
+        }
+
+        //do the last incomplete stripe
+        // for cases of (height & 3) == 0 and 3
+        // the should have been processed previously
+        if ((height & 3) == 1 || (height & 3) == 2) {
+            //generate mbr of first stripe
+            OPJ_UINT32 *sig = height & 0x4 ? sigma2 : sigma1;
+            OPJ_UINT32 *mbr = height & 0x4 ? mbr2 : mbr1;
+            //integrate horizontally
+            OPJ_UINT32 prev = 0;
+            OPJ_INT32 i;
+            for (i = 0; i < width; i += 8, mbr++, sig++) {
+                OPJ_UINT32 t, z;
+
+                mbr[0] = sig[0];
+                mbr[0] |= prev >> 28;    //for first column, left neighbors
+                mbr[0] |= sig[0] << 4;   //left neighbors
+                mbr[0] |= sig[0] >> 4;   //left neighbors
+                mbr[0] |= sig[1] << 28;  //for last column, right neighbors
+                prev = sig[0];
+
+                //integrate vertically
+                t = mbr[0], z = mbr[0];
+                z |= (t & 0x77777777) << 1; //above neighbors
+                z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
+                mbr[0] = z & ~sig[0]; //remove already significance samples
+            }
+        }
+
+        st = height;
+        st -= height > 6 ? (((height + 1) & 3) + 3) : height;
+        for (y = st; y < height; y += 4) {
+            OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+            OPJ_UINT32 val;
+            OPJ_INT32 i;
+
+            OPJ_UINT32 pattern = 0xFFFFFFFFu; // a pattern needed samples
+            if (height - y == 3) {
+                pattern = 0x77777777u;
+            } else if (height - y == 2) {
+                pattern = 0x33333333u;
+            } else if (height - y == 1) {
+                pattern = 0x11111111u;
+            }
+
+            //add membership from the next stripe, obtained above
+            if (height - y > 4) {
+                OPJ_UINT32 prev = 0;
+                OPJ_INT32 i;
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                cur_mbr = y & 0x4 ? mbr2 : mbr1;
+                nxt_sig = y & 0x4 ? sigma1 : sigma2;
+                for (i = 0; i < width; i += 8, cur_mbr++, cur_sig++, nxt_sig++) {
+                    OPJ_UINT32 t = nxt_sig[0];
+                    t |= prev >> 28;     //for first column, left neighbors
+                    t |= nxt_sig[0] << 4;   //left neighbors
+                    t |= nxt_sig[0] >> 4;   //left neighbors
+                    t |= nxt_sig[1] << 28;  //for last column, right neighbors
+                    prev = nxt_sig[0];
+
+                    if (!stripe_causal) {
+                        cur_mbr[0] |= (t & 0x11111111u) << 3;
+                    }
+                    //remove already significance samples
+                    cur_mbr[0] &= ~cur_sig[0];
+                }
+            }
+
+            //find new locations and get signs
+            cur_sig = y & 0x4 ? sigma2 : sigma1;
+            cur_mbr = y & 0x4 ? mbr2 : mbr1;
+            nxt_sig = y & 0x4 ? sigma1 : sigma2;
+            nxt_mbr = y & 0x4 ? mbr1 : mbr2;
+            val = 3u << (p - 2);
+            for (i = 0; i < width; i += 8,
+                    cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++) {
+                OPJ_UINT32 mbr = *cur_mbr & pattern; //skip unneeded samples
+                OPJ_UINT32 new_sig = 0;
+                OPJ_UINT32 ux, tx;
+                if (mbr) {
+                    OPJ_INT32 n;
+                    for (n = 0; n < 8; n += 4) {
+                        OPJ_UINT32 col_mask;
+                        OPJ_UINT32 inv_sig;
+                        OPJ_INT32 end;
+                        OPJ_INT32 j;
+
+                        OPJ_UINT32 cwd = frwd_fetch(&sigprop);
+                        OPJ_UINT32 cnt = 0;
+
+                        OPJ_UINT32 *dp = decoded_data + y * stride;
+                        dp += i + n;
+
+                        col_mask = 0xFu << (4 * n);
+
+                        inv_sig = ~cur_sig[0] & pattern;
+
+                        end = n + 4 + i < width ? n + 4 : width - i;
+                        for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                            OPJ_UINT32 sample_mask;
+
+                            if ((col_mask & mbr) == 0) {
+                                continue;
+                            }
+
+                            //scan 4 mbr
+                            sample_mask = 0x11111111u & col_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[0] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0x32u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+
+                            sample_mask += sample_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[stride] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0x74u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+
+                            sample_mask += sample_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[2 * stride] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0xE8u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+
+                            sample_mask += sample_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[3 * stride] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0xC0u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+                        }
+
+                        //signs here
+                        if (new_sig & (0xFFFFu << (4 * n))) {
+                            OPJ_UINT32 col_mask;
+                            OPJ_INT32 j;
+                            OPJ_UINT32 *dp = decoded_data + y * stride;
+                            dp += i + n;
+                            col_mask = 0xFu << (4 * n);
+
+                            for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                                OPJ_UINT32 sample_mask;
+                                if ((col_mask & new_sig) == 0) {
+                                    continue;
+                                }
+
+                                //scan 4 signs
+                                sample_mask = 0x11111111u & col_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[0] == 0);
+                                    dp[0] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[stride] == 0);
+                                    dp[stride] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[2 * stride] == 0);
+                                    dp[2 * stride] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[3 * stride] == 0);
+                                    dp[3 * stride] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+                            }
+
+                        }
+                        frwd_advance(&sigprop, cnt);
+                        cnt = 0;
+
+                        //update next columns
+                        if (n == 4) {
+                            //horizontally
+                            OPJ_UINT32 t = new_sig >> 28;
+                            t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
+                            cur_mbr[1] |= t & ~cur_sig[1];
+                        }
+                    }
+                }
+                //propagate down (vertically propagation)
+                new_sig |= cur_sig[0];
+                ux = (new_sig & 0x88888888) >> 3;
+                tx = ux | (ux << 4) | (ux >> 4);
+                if (i > 0) {
+                    nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
+                }
+                nxt_mbr[0] |= tx & ~nxt_sig[0];
+                nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
+            }
+        }
+    }
+
+    {
+        OPJ_INT32 x, y;
+        for (y = 0; y < height; ++y) {
+            OPJ_INT32* sp = (OPJ_INT32*)decoded_data + y * stride;
+            for (x = 0; x < width; ++x, ++sp) {
+                OPJ_INT32 val = (*sp & 0x7FFFFFFF);
+                *sp = ((OPJ_UINT32) * sp & 0x80000000) ? -val : val;
+            }
+        }
+    }
+
+    return OPJ_TRUE;
+}
diff --git a/third_party/libopenjpeg20/image.c b/third_party/libopenjpeg20/image.c
index fe37390..017201a 100644
--- a/third_party/libopenjpeg20/image.c
+++ b/third_party/libopenjpeg20/image.c
@@ -66,7 +66,6 @@
             comp->x0 = cmptparms[compno].x0;
             comp->y0 = cmptparms[compno].y0;
             comp->prec = cmptparms[compno].prec;
-            comp->bpp = cmptparms[compno].bpp;
             comp->sgnd = cmptparms[compno].sgnd;
             if (comp->h != 0 &&
                     (OPJ_SIZE_T)comp->w > SIZE_MAX / comp->h / sizeof(OPJ_INT32)) {
diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
index 711dd73..fce12e4 100644
--- a/third_party/libopenjpeg20/j2k.c
+++ b/third_party/libopenjpeg20/j2k.c
@@ -50,7 +50,7 @@
 /*@{*/
 
 /**
- * Sets up the procedures to do on reading header. Developpers wanting to extend the library can add their own reading procedures.
+ * Sets up the procedures to do on reading header. Developers wanting to extend the library can add their own reading procedures.
  */
 static OPJ_BOOL opj_j2k_setup_header_reading(opj_j2k_t *p_j2k,
         opj_event_mgr_t * p_manager);
@@ -89,22 +89,22 @@
         opj_event_mgr_t * p_manager);
 
 /**
- * Sets up the validation ,i.e. adds the procedures to lauch to make sure the codec parameters
- * are valid. Developpers wanting to extend the library can add their own validation procedures.
+ * Sets up the validation ,i.e. adds the procedures to launch to make sure the codec parameters
+ * are valid. Developers wanting to extend the library can add their own validation procedures.
  */
 static OPJ_BOOL opj_j2k_setup_encoding_validation(opj_j2k_t *p_j2k,
         opj_event_mgr_t * p_manager);
 
 /**
- * Sets up the validation ,i.e. adds the procedures to lauch to make sure the codec parameters
- * are valid. Developpers wanting to extend the library can add their own validation procedures.
+ * Sets up the validation ,i.e. adds the procedures to launch to make sure the codec parameters
+ * are valid. Developers wanting to extend the library can add their own validation procedures.
  */
 static OPJ_BOOL opj_j2k_setup_decoding_validation(opj_j2k_t *p_j2k,
         opj_event_mgr_t * p_manager);
 
 /**
- * Sets up the validation ,i.e. adds the procedures to lauch to make sure the codec parameters
- * are valid. Developpers wanting to extend the library can add their own validation procedures.
+ * Sets up the validation ,i.e. adds the procedures to launch to make sure the codec parameters
+ * are valid. Developers wanting to extend the library can add their own validation procedures.
  */
 static OPJ_BOOL opj_j2k_setup_end_compress(opj_j2k_t *p_j2k,
         opj_event_mgr_t * p_manager);
@@ -147,7 +147,7 @@
                                    opj_event_mgr_t * p_manager);
 
 /**
- * Excutes the given procedures on the given codec.
+ * Executes the given procedures on the given codec.
  *
  * @param       p_procedure_list        the list of procedures to execute
  * @param       p_j2k                           the jpeg2000 codec to execute the procedures on.
@@ -910,9 +910,15 @@
 
 static void opj_j2k_update_tlm(opj_j2k_t * p_j2k, OPJ_UINT32 p_tile_part_size)
 {
-    opj_write_bytes(p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current,
-                    p_j2k->m_current_tile_number, 1);           /* PSOT */
-    ++p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current;
+    if (p_j2k->m_specific_param.m_encoder.m_Ttlmi_is_byte) {
+        opj_write_bytes(p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current,
+                        p_j2k->m_current_tile_number, 1);
+        p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current += 1;
+    } else {
+        opj_write_bytes(p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current,
+                        p_j2k->m_current_tile_number, 2);
+        p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current += 2;
+    }
 
     opj_write_bytes(p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_current,
                     p_tile_part_size, 4);                                       /* PSOT */
@@ -1171,6 +1177,32 @@
                                  OPJ_UINT32 p_header_size,
                                  opj_event_mgr_t * p_manager);
 
+/**
+ * Reads a CAP marker (extended capabilities definition). Empty implementation.
+ * Found in HTJ2K files
+ *
+ * @param       p_header_data   the data contained in the CAP box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CAP marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cap(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager);
+
+/**
+ * Reads a CPF marker (corresponding profile). Empty implementation. Found in HTJ2K files
+ * @param       p_header_data   the data contained in the CPF box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CPF marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cpf(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager);
+
 
 /**
  * Writes COC marker for each component.
@@ -1393,10 +1425,12 @@
     {J2K_MS_COM, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_com},
     {J2K_MS_MCT, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_mct},
     {J2K_MS_CBD, J2K_STATE_MH, opj_j2k_read_cbd},
+    {J2K_MS_CAP, J2K_STATE_MH, opj_j2k_read_cap},
+    {J2K_MS_CPF, J2K_STATE_MH, opj_j2k_read_cpf},
     {J2K_MS_MCC, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_mcc},
     {J2K_MS_MCO, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_mco},
 #ifdef USE_JPWL
-#ifdef TODO_MS /* remove these functions which are not commpatible with the v2 API */
+#ifdef TODO_MS /* remove these functions which are not compatible with the v2 API */
     {J2K_MS_EPC, J2K_STATE_MH | J2K_STATE_TPH, j2k_read_epc},
     {J2K_MS_EPB, J2K_STATE_MH | J2K_STATE_TPH, j2k_read_epb},
     {J2K_MS_ESD, J2K_STATE_MH | J2K_STATE_TPH, j2k_read_esd},
@@ -1643,7 +1677,7 @@
 
     assert(p_nb_pocs > 0);
 
-    packet_array = (OPJ_UINT32*) opj_calloc(step_l * p_num_layers,
+    packet_array = (OPJ_UINT32*) opj_calloc((size_t)step_l * p_num_layers,
                                             sizeof(OPJ_UINT32));
     if (packet_array == 00) {
         opj_event_msg(p_manager, EVT_ERROR,
@@ -1756,7 +1790,7 @@
                 tpnum *= l_current_poc->layE;
                 break;
             }
-            /* whould we split here ? */
+            /* would we split here ? */
             if (cp->m_specific_param.m_enc.m_tp_flag == prog[i]) {
                 cp->m_specific_param.m_enc.m_tp_pos = i;
                 break;
@@ -4166,13 +4200,33 @@
 {
     OPJ_BYTE * l_current_data = 00;
     OPJ_UINT32 l_tlm_size;
+    OPJ_UINT32 size_per_tile_part;
 
     /* preconditions */
     assert(p_j2k != 00);
     assert(p_manager != 00);
     assert(p_stream != 00);
 
-    l_tlm_size = 6 + (5 * p_j2k->m_specific_param.m_encoder.m_total_tile_parts);
+    /* 10921 = (65535 - header_size) / size_per_tile_part where */
+    /* header_size = 4 and size_per_tile_part = 6 */
+    if (p_j2k->m_specific_param.m_encoder.m_total_tile_parts > 10921) {
+        /* We could do more but it would require writing several TLM markers */
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "A maximum of 10921 tile-parts are supported currently "
+                      "when writing TLM marker\n");
+        return OPJ_FALSE;
+    }
+
+    if (p_j2k->m_specific_param.m_encoder.m_total_tile_parts <= 255) {
+        size_per_tile_part = 5;
+        p_j2k->m_specific_param.m_encoder.m_Ttlmi_is_byte = OPJ_TRUE;
+    } else {
+        size_per_tile_part = 6;
+        p_j2k->m_specific_param.m_encoder.m_Ttlmi_is_byte = OPJ_FALSE;
+    }
+
+    l_tlm_size = 2 + 4 + (size_per_tile_part *
+                          p_j2k->m_specific_param.m_encoder.m_total_tile_parts);
 
     if (l_tlm_size > p_j2k->m_specific_param.m_encoder.m_header_tile_data_size) {
         OPJ_BYTE *new_header_tile_data = (OPJ_BYTE *) opj_realloc(
@@ -4187,6 +4241,7 @@
         p_j2k->m_specific_param.m_encoder.m_header_tile_data = new_header_tile_data;
         p_j2k->m_specific_param.m_encoder.m_header_tile_data_size = l_tlm_size;
     }
+    memset(p_j2k->m_specific_param.m_encoder.m_header_tile_data, 0, l_tlm_size);
 
     l_current_data = p_j2k->m_specific_param.m_encoder.m_header_tile_data;
 
@@ -4206,11 +4261,14 @@
                     1);                                                    /* Ztlm=0*/
     ++l_current_data;
 
-    opj_write_bytes(l_current_data, 0x50,
-                    1);                                                 /* Stlm ST=1(8bits-255 tiles max),SP=1(Ptlm=32bits) */
+    /* Stlm 0x50= ST=1(8bits-255 tiles max),SP=1(Ptlm=32bits) */
+    /* Stlm 0x60= ST=2(16bits-65535 tiles max),SP=1(Ptlm=32bits) */
+    opj_write_bytes(l_current_data,
+                    size_per_tile_part == 5 ? 0x50 : 0x60,
+                    1);
     ++l_current_data;
 
-    /* do nothing on the 5 * l_j2k->m_specific_param.m_encoder.m_total_tile_parts remaining data */
+    /* do nothing on the size_per_tile_part * l_j2k->m_specific_param.m_encoder.m_total_tile_parts remaining data */
     if (opj_stream_write_data(p_stream,
                               p_j2k->m_specific_param.m_encoder.m_header_tile_data, l_tlm_size,
                               p_manager) != l_tlm_size) {
@@ -4902,9 +4960,14 @@
         /* Check enough bytes left in stream before allocation */
         if ((OPJ_OFF_T)p_j2k->m_specific_param.m_decoder.m_sot_length >
                 opj_stream_get_number_byte_left(p_stream)) {
-            opj_event_msg(p_manager, EVT_ERROR,
-                          "Tile part length size inconsistent with stream length\n");
-            return OPJ_FALSE;
+            if (p_j2k->m_cp.strict) {
+                opj_event_msg(p_manager, EVT_ERROR,
+                              "Tile part length size inconsistent with stream length\n");
+                return OPJ_FALSE;
+            } else {
+                opj_event_msg(p_manager, EVT_WARNING,
+                              "Tile part length size inconsistent with stream length\n");
+            }
         }
         if (p_j2k->m_specific_param.m_decoder.m_sot_length >
                 UINT_MAX - OPJ_COMMON_CBLK_DATA_EXTRA) {
@@ -5350,9 +5413,9 @@
         return OPJ_FALSE;
     }
 
-    if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+    if (p_j2k->m_specific_param.m_encoder.m_TLM) {
         p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_buffer =
-            (OPJ_BYTE *) opj_malloc(5 *
+            (OPJ_BYTE *) opj_malloc(6 *
                                     p_j2k->m_specific_param.m_encoder.m_total_tile_parts);
         if (! p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_buffer) {
             return OPJ_FALSE;
@@ -6564,6 +6627,60 @@
     return OPJ_TRUE;
 }
 
+/**
+ * Reads a CAP marker (extended capabilities definition). Empty implementation.
+ * Found in HTJ2K files.
+ *
+ * @param       p_header_data   the data contained in the CAP box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CAP marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cap(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager
+                                )
+{
+    /* preconditions */
+    assert(p_header_data != 00);
+    assert(p_j2k != 00);
+    assert(p_manager != 00);
+
+    (void)p_j2k;
+    (void)p_header_data;
+    (void)p_header_size;
+    (void)p_manager;
+
+    return OPJ_TRUE;
+}
+
+/**
+ * Reads a CPF marker (corresponding profile). Empty implementation. Found in HTJ2K files
+ * @param       p_header_data   the data contained in the CPF box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CPF marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cpf(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager
+                                )
+{
+    /* preconditions */
+    assert(p_header_data != 00);
+    assert(p_j2k != 00);
+    assert(p_manager != 00);
+
+    (void)p_j2k;
+    (void)p_header_data;
+    (void)p_header_size;
+    (void)p_manager;
+
+    return OPJ_TRUE;
+}
+
 /* ----------------------------------------------------------------------- */
 /* J2K / JPT decoder interface                                             */
 /* ----------------------------------------------------------------------- */
@@ -6583,6 +6700,13 @@
     }
 }
 
+void opj_j2k_decoder_set_strict_mode(opj_j2k_t *j2k, OPJ_BOOL strict)
+{
+    if (j2k) {
+        j2k->m_cp.strict = strict;
+    }
+}
+
 OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads)
 {
     /* Currently we pass the thread-pool to the tcd, so we cannot re-set it */
@@ -6863,7 +6987,7 @@
 
     /* Bitdepth */
     for (i = 0; i < image->numcomps; i++) {
-        if ((image->comps[i].bpp != 12) | (image->comps[i].sgnd)) {
+        if ((image->comps[i].prec != 12) | (image->comps[i].sgnd)) {
             char signed_str[] = "signed";
             char unsigned_str[] = "unsigned";
             char *tmp_str = image->comps[i].sgnd ? signed_str : unsigned_str;
@@ -6872,7 +6996,7 @@
                           "Precision of each component shall be 12 bits unsigned"
                           "-> At least component %d of input image (%d bits, %s) is not compliant\n"
                           "-> Non-profile-3 codestream will be generated\n",
-                          i, image->comps[i].bpp, tmp_str);
+                          i, image->comps[i].prec, tmp_str);
             return OPJ_FALSE;
         }
     }
@@ -7070,20 +7194,20 @@
                       "-> Non-IMF codestream will be generated\n",
                       mainlevel);
         ret = OPJ_FALSE;
-    }
-
-    /* Validate sublevel */
-    assert(sizeof(tabMaxSubLevelFromMainLevel) ==
-           (OPJ_IMF_MAINLEVEL_MAX + 1) * sizeof(tabMaxSubLevelFromMainLevel[0]));
-    if (sublevel > tabMaxSubLevelFromMainLevel[mainlevel]) {
-        opj_event_msg(p_manager, EVT_WARNING,
-                      "IMF profile require sublevel <= %d for mainlevel = %d.\n"
-                      "-> %d is thus not compliant\n"
-                      "-> Non-IMF codestream will be generated\n",
-                      tabMaxSubLevelFromMainLevel[mainlevel],
-                      mainlevel,
-                      sublevel);
-        ret = OPJ_FALSE;
+    } else {
+        /* Validate sublevel */
+        assert(sizeof(tabMaxSubLevelFromMainLevel) ==
+               (OPJ_IMF_MAINLEVEL_MAX + 1) * sizeof(tabMaxSubLevelFromMainLevel[0]));
+        if (sublevel > tabMaxSubLevelFromMainLevel[mainlevel]) {
+            opj_event_msg(p_manager, EVT_WARNING,
+                          "IMF profile require sublevel <= %d for mainlevel = %d.\n"
+                          "-> %d is thus not compliant\n"
+                          "-> Non-IMF codestream will be generated\n",
+                          tabMaxSubLevelFromMainLevel[mainlevel],
+                          mainlevel,
+                          sublevel);
+            ret = OPJ_FALSE;
+        }
     }
 
     /* Number of components */
@@ -7163,7 +7287,7 @@
 
     /* Bitdepth */
     for (i = 0; i < image->numcomps; i++) {
-        if (!(image->comps[i].bpp >= 8 && image->comps[i].bpp <= 16) ||
+        if (!(image->comps[i].prec >= 8 && image->comps[i].prec <= 16) ||
                 (image->comps[i].sgnd)) {
             char signed_str[] = "signed";
             char unsigned_str[] = "unsigned";
@@ -7172,7 +7296,7 @@
                           "IMF profiles require precision of each component to b in [8-16] bits unsigned"
                           "-> At least component %d of input image (%d bits, %s) is not compliant\n"
                           "-> Non-IMF codestream will be generated\n",
-                          i, image->comps[i].bpp, tmp_str);
+                          i, image->comps[i].prec, tmp_str);
             ret = OPJ_FALSE;
         }
     }
@@ -7204,7 +7328,7 @@
         if (image->comps[i].dy != 1) {
             opj_event_msg(p_manager, EVT_WARNING,
                           "IMF profiles require YRsiz == 1. "
-                          "Here it is set to %d for component i.\n"
+                          "Here it is set to %d for component %d.\n"
                           "-> Non-IMF codestream will be generated\n",
                           image->comps[i].dy, i);
             ret = OPJ_FALSE;
@@ -7468,8 +7592,7 @@
             opj_event_msg(p_manager, EVT_WARNING,
                           "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n"
                           "-> Supplied values are different from that.\n"
-                          "-> Non-IMF codestream will be generated\n",
-                          NL);
+                          "-> Non-IMF codestream will be generated\n");
             ret = OPJ_FALSE;
         }
     } else {
@@ -7480,8 +7603,7 @@
                 opj_event_msg(p_manager, EVT_WARNING,
                               "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n"
                               "-> Supplied values are different from that.\n"
-                              "-> Non-IMF codestream will be generated\n",
-                              NL);
+                              "-> Non-IMF codestream will be generated\n");
                 ret = OPJ_FALSE;
             }
         }
@@ -7544,6 +7666,8 @@
         return OPJ_FALSE;
     }
 
+    p_j2k->m_specific_param.m_encoder.m_nb_comps = image->numcomps;
+
     /* keep a link to cp so that we can destroy it later in j2k_destroy_compress */
     cp = &(p_j2k->m_cp);
 
@@ -7709,6 +7833,10 @@
         }
     }
 
+    if (OPJ_IS_CINEMA(parameters->rsiz) || OPJ_IS_IMF(parameters->rsiz)) {
+        p_j2k->m_specific_param.m_encoder.m_TLM = OPJ_TRUE;
+    }
+
     /* Manage profiles and applications and set RSIZ */
     /* set cinema parameters if required */
     if (OPJ_IS_CINEMA(parameters->rsiz)) {
@@ -7835,8 +7963,17 @@
             opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n");
             return OPJ_FALSE;
         }
-        cp->tw = opj_uint_ceildiv(image->x1 - cp->tx0, cp->tdx);
-        cp->th = opj_uint_ceildiv(image->y1 - cp->ty0, cp->tdy);
+        cp->tw = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->x1 - cp->tx0),
+                                             (OPJ_INT32)cp->tdx);
+        cp->th = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->y1 - cp->ty0),
+                                             (OPJ_INT32)cp->tdy);
+        /* Check that the number of tiles is valid */
+        if (cp->tw > 65535 / cp->th) {
+            opj_event_msg(p_manager, EVT_ERROR,
+                          "Invalid number of tiles : %u x %u (maximum fixed by jpeg2000 norm is 65535 tiles)\n",
+                          cp->tw, cp->th);
+            return OPJ_FALSE;
+        }
     } else {
         cp->tdx = image->x1 - cp->tx0;
         cp->tdy = image->y1 - cp->ty0;
@@ -7903,7 +8040,7 @@
     }
 #endif /* USE_JPWL */
 
-    /* initialize the mutiple tiles */
+    /* initialize the multiple tiles */
     /* ---------------------------- */
     cp->tcps = (opj_tcp_t*) opj_calloc(cp->tw * cp->th, sizeof(opj_tcp_t));
     if (!cp->tcps) {
@@ -8713,7 +8850,7 @@
         if (l_marker_handler->id == J2K_MS_UNK) {
             if (! opj_j2k_read_unk(p_j2k, p_stream, &l_current_marker, p_manager)) {
                 opj_event_msg(p_manager, EVT_ERROR,
-                              "Unknow marker have been detected and generated error.\n");
+                              "Unknown marker has been detected and generated error.\n");
                 return OPJ_FALSE;
             }
 
@@ -9718,7 +9855,7 @@
     /* When using the opj_read_tile_header / opj_decode_tile_data API */
     /* such as in test_tile_decoder, m_output_image is NULL, so fall back */
     /* to the full image dimension. This is a bit surprising that */
-    /* opj_set_decode_area() is only used to determinte intersecting tiles, */
+    /* opj_set_decode_area() is only used to determine intersecting tiles, */
     /* but full tile decoding is done */
     l_image_for_bounds = p_j2k->m_output_image ? p_j2k->m_output_image :
                          p_j2k->m_private_image;
@@ -9764,10 +9901,10 @@
 
     if (p_j2k->m_specific_param.m_decoder.m_state != J2K_STATE_EOC) {
         if (opj_stream_read_data(p_stream, l_data, 2, p_manager) != 2) {
-            opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
-            return OPJ_FALSE;
+            opj_event_msg(p_manager, p_j2k->m_cp.strict ? EVT_ERROR : EVT_WARNING,
+                          "Stream too short\n");
+            return p_j2k->m_cp.strict ? OPJ_FALSE : OPJ_TRUE;
         }
-
         opj_read_bytes(l_data, &l_current_marker, 2);
 
         if (l_current_marker == J2K_MS_EOC) {
@@ -10288,6 +10425,9 @@
     /* per component is allowed */
     l_j2k->m_cp.allow_different_bit_depth_sign = 1;
 
+    /* Default to using strict mode. */
+    l_j2k->m_cp.strict = OPJ_TRUE;
+
 #ifdef OPJ_DISABLE_TPSOT_FIX
     l_j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked = 1;
 #endif
@@ -10591,9 +10731,10 @@
     /* SPcod (G) / SPcoc (D) */
     opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1);
     ++l_current_ptr;
-    if (l_tccp->cblksty & 0xC0U) { /* 2 msb are reserved, assume we can't read */
+    if ((l_tccp->cblksty & J2K_CCP_CBLKSTY_HTMIXED) != 0) {
+        /* We do not support HT mixed mode yet.  For conformance, it should be supported.*/
         opj_event_msg(p_manager, EVT_ERROR,
-                      "Error reading SPCod SPCoc element, Invalid code-block style found\n");
+                      "Error reading SPCod SPCoc element. Unsupported Mixed HT code-block style found\n");
         return OPJ_FALSE;
     }
 
@@ -11618,7 +11759,7 @@
 }
 
 /**
- * Sets up the procedures to do on decoding data. Developpers wanting to extend the library can add their own reading procedures.
+ * Sets up the procedures to do on decoding data. Developers wanting to extend the library can add their own reading procedures.
  */
 static OPJ_BOOL opj_j2k_setup_decoding(opj_j2k_t *p_j2k,
                                        opj_event_mgr_t * p_manager)
@@ -11750,7 +11891,7 @@
 }
 
 /**
- * Sets up the procedures to do on decoding one tile. Developpers wanting to extend the library can add their own reading procedures.
+ * Sets up the procedures to do on decoding one tile. Developers wanting to extend the library can add their own reading procedures.
  */
 static OPJ_BOOL opj_j2k_setup_decoding_tile(opj_j2k_t *p_j2k,
         opj_event_mgr_t * p_manager)
@@ -11974,7 +12115,7 @@
         opj_image_destroy(p_j2k->m_output_image);
     }
 
-    /* Create the ouput image from the information previously computed*/
+    /* Create the output image from the information previously computed*/
     p_j2k->m_output_image = opj_image_create0();
     if (!(p_j2k->m_output_image)) {
         return OPJ_FALSE;
@@ -12054,6 +12195,35 @@
                               "Invalid value for option: %s.\n", *p_option_iter);
                 return OPJ_FALSE;
             }
+        } else if (strncmp(*p_option_iter, "TLM=", 4) == 0) {
+            if (strcmp(*p_option_iter, "TLM=YES") == 0) {
+                p_j2k->m_specific_param.m_encoder.m_TLM = OPJ_TRUE;
+            } else if (strcmp(*p_option_iter, "TLM=NO") == 0) {
+                p_j2k->m_specific_param.m_encoder.m_TLM = OPJ_FALSE;
+            } else {
+                opj_event_msg(p_manager, EVT_ERROR,
+                              "Invalid value for option: %s.\n", *p_option_iter);
+                return OPJ_FALSE;
+            }
+        } else if (strncmp(*p_option_iter, "GUARD_BITS=", strlen("GUARD_BITS=")) == 0) {
+            OPJ_UINT32 tileno;
+            opj_cp_t *cp = cp = &(p_j2k->m_cp);
+
+            int numgbits = atoi(*p_option_iter + strlen("GUARD_BITS="));
+            if (numgbits < 0 || numgbits > 7) {
+                opj_event_msg(p_manager, EVT_ERROR,
+                              "Invalid value for option: %s. Should be in [0,7]\n", *p_option_iter);
+                return OPJ_FALSE;
+            }
+
+            for (tileno = 0; tileno < cp->tw * cp->th; tileno++) {
+                OPJ_UINT32 i;
+                opj_tcp_t *tcp = &cp->tcps[tileno];
+                for (i = 0; i < p_j2k->m_specific_param.m_encoder.m_nb_comps; i++) {
+                    opj_tccp_t *tccp = &tcp->tccps[i];
+                    tccp->numgbits = (OPJ_UINT32)numgbits;
+                }
+            }
         } else {
             opj_event_msg(p_manager, EVT_ERROR,
                           "Invalid option: %s.\n", *p_option_iter);
@@ -12449,7 +12619,7 @@
         return OPJ_FALSE;
     }
 
-    if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) {
+    if (p_j2k->m_specific_param.m_encoder.m_TLM) {
         if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list,
                                                (opj_procedure)opj_j2k_write_updated_tlm, p_manager)) {
             return OPJ_FALSE;
@@ -12532,7 +12702,7 @@
         return OPJ_FALSE;
     }
 
-    if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) {
+    if (p_j2k->m_specific_param.m_encoder.m_TLM) {
         if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list,
                                                (opj_procedure)opj_j2k_write_tlm, p_manager)) {
             return OPJ_FALSE;
@@ -12665,7 +12835,7 @@
     opj_write_bytes(l_begin_data + 6, l_nb_bytes_written,
                     4);                                 /* PSOT */
 
-    if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+    if (p_j2k->m_specific_param.m_encoder.m_TLM) {
         opj_j2k_update_tlm(p_j2k, l_nb_bytes_written);
     }
 
@@ -12735,7 +12905,7 @@
         opj_write_bytes(l_begin_data + 6, l_part_tile_size,
                         4);                                   /* PSOT */
 
-        if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+        if (p_j2k->m_specific_param.m_encoder.m_TLM) {
             opj_j2k_update_tlm(p_j2k, l_part_tile_size);
         }
 
@@ -12781,7 +12951,7 @@
             opj_write_bytes(l_begin_data + 6, l_part_tile_size,
                             4);                                   /* PSOT */
 
-            if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+            if (p_j2k->m_specific_param.m_encoder.m_TLM) {
                 opj_j2k_update_tlm(p_j2k, l_part_tile_size);
             }
 
@@ -12800,13 +12970,16 @@
 {
     OPJ_UINT32 l_tlm_size;
     OPJ_OFF_T l_tlm_position, l_current_position;
+    OPJ_UINT32 size_per_tile_part;
 
     /* preconditions */
     assert(p_j2k != 00);
     assert(p_manager != 00);
     assert(p_stream != 00);
 
-    l_tlm_size = 5 * p_j2k->m_specific_param.m_encoder.m_total_tile_parts;
+    size_per_tile_part = p_j2k->m_specific_param.m_encoder.m_Ttlmi_is_byte ? 5 : 6;
+    l_tlm_size = size_per_tile_part *
+                 p_j2k->m_specific_param.m_encoder.m_total_tile_parts;
     l_tlm_position = 6 + p_j2k->m_specific_param.m_encoder.m_tlm_start;
     l_current_position = opj_stream_tell(p_stream);
 
diff --git a/third_party/libopenjpeg20/j2k.h b/third_party/libopenjpeg20/j2k.h
index 9eb50b5..04fba64 100644
--- a/third_party/libopenjpeg20/j2k.h
+++ b/third_party/libopenjpeg20/j2k.h
@@ -61,6 +61,8 @@
 #define J2K_CCP_CBLKSTY_VSC 0x08      /**< Vertically stripe causal context */
 #define J2K_CCP_CBLKSTY_PTERM 0x10    /**< Predictable termination */
 #define J2K_CCP_CBLKSTY_SEGSYM 0x20   /**< Segmentation symbols are used */
+#define J2K_CCP_CBLKSTY_HT 0x40       /**< (high throughput) HT codeblocks */
+#define J2K_CCP_CBLKSTY_HTMIXED 0x80  /**< MIXED mode HT codeblocks */
 #define J2K_CCP_QNTSTY_NOQNT 0
 #define J2K_CCP_QNTSTY_SIQNT 1
 #define J2K_CCP_QNTSTY_SEQNT 2
@@ -71,9 +73,11 @@
 #define J2K_MS_SOT 0xff90   /**< SOT marker value */
 #define J2K_MS_SOD 0xff93   /**< SOD marker value */
 #define J2K_MS_EOC 0xffd9   /**< EOC marker value */
+#define J2K_MS_CAP 0xff50   /**< CAP marker value */
 #define J2K_MS_SIZ 0xff51   /**< SIZ marker value */
 #define J2K_MS_COD 0xff52   /**< COD marker value */
 #define J2K_MS_COC 0xff53   /**< COC marker value */
+#define J2K_MS_CPF 0xff59   /**< CPF marker value */
 #define J2K_MS_RGN 0xff5e   /**< RGN marker value */
 #define J2K_MS_QCD 0xff5c   /**< QCD marker value */
 #define J2K_MS_QCC 0xff5d   /**< QCC marker value */
@@ -398,6 +402,8 @@
     }
     m_specific_param;
 
+    /** OPJ_TRUE if entire bit stream must be decoded, OPJ_FALSE if partial bitstream decoding allowed */
+    OPJ_BOOL strict;
 
     /* UniPG>> */
 #ifdef USE_JPWL
@@ -503,6 +509,12 @@
     /** Tile part number currently coding, taking into account POC. m_current_tile_part_number holds the total number of tile parts while encoding the last tile part.*/
     OPJ_UINT32 m_current_tile_part_number; /*cur_tp_num */
 
+    /* whether to generate TLM markers */
+    OPJ_BOOL   m_TLM;
+
+    /* whether the Ttlmi field in a TLM marker is a byte (otherwise a uint16) */
+    OPJ_BOOL   m_Ttlmi_is_byte;
+
     /**
     locate the start position of the TLM marker
     after encoding the tilepart, a jump (in j2k_write_sod) is done to the TLM marker to store the value of its length.
@@ -540,6 +552,9 @@
     /* reserved bytes in m_encoded_tile_size for PLT markers */
     OPJ_UINT32 m_reserved_bytes_for_PLT;
 
+    /** Number of components */
+    OPJ_UINT32 m_nb_comps;
+
 } opj_j2k_enc_t;
 
 
@@ -612,6 +627,8 @@
 */
 void opj_j2k_setup_decoder(opj_j2k_t *j2k, opj_dparameters_t *parameters);
 
+void opj_j2k_decoder_set_strict_mode(opj_j2k_t *j2k, OPJ_BOOL strict);
+
 OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads);
 
 /**
diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
index 02f3d04..44d0c98 100644
--- a/third_party/libopenjpeg20/jp2.c
+++ b/third_party/libopenjpeg20/jp2.c
@@ -331,14 +331,14 @@
 
 /**
  * Sets up the procedures to do on writing header after the codestream.
- * Developpers wanting to extend the library can add their own writing procedures.
+ * Developers wanting to extend the library can add their own writing procedures.
  */
 static OPJ_BOOL opj_jp2_setup_end_header_writing(opj_jp2_t *jp2,
         opj_event_mgr_t * p_manager);
 
 /**
  * Sets up the procedures to do on reading header after the codestream.
- * Developpers wanting to extend the library can add their own writing procedures.
+ * Developers wanting to extend the library can add their own writing procedures.
  */
 static OPJ_BOOL opj_jp2_setup_end_header_reading(opj_jp2_t *jp2,
         opj_event_mgr_t * p_manager);
@@ -388,13 +388,13 @@
 
 /**
  * Sets up the validation ,i.e. adds the procedures to launch to make sure the codec parameters
- * are valid. Developpers wanting to extend the library can add their own validation procedures.
+ * are valid. Developers wanting to extend the library can add their own validation procedures.
  */
 static OPJ_BOOL opj_jp2_setup_encoding_validation(opj_jp2_t *jp2,
         opj_event_mgr_t * p_manager);
 
 /**
- * Sets up the procedures to do on writing header. Developpers wanting to extend the library can add their own writing procedures.
+ * Sets up the procedures to do on writing header. Developers wanting to extend the library can add their own writing procedures.
  */
 static OPJ_BOOL opj_jp2_setup_header_writing(opj_jp2_t *jp2,
         opj_event_mgr_t * p_manager);
@@ -457,14 +457,14 @@
 
 /**
  * Sets up the validation ,i.e. adds the procedures to launch to make sure the codec parameters
- * are valid. Developpers wanting to extend the library can add their own validation procedures.
+ * are valid. Developers wanting to extend the library can add their own validation procedures.
  */
 static OPJ_BOOL opj_jp2_setup_decoding_validation(opj_jp2_t *jp2,
         opj_event_mgr_t * p_manager);
 
 /**
  * Sets up the procedures to do on reading header.
- * Developpers wanting to extend the library can add their own writing procedures.
+ * Developers wanting to extend the library can add their own writing procedures.
  */
 static OPJ_BOOL opj_jp2_setup_header_reading(opj_jp2_t *jp2,
         opj_event_mgr_t * p_manager);
@@ -1151,9 +1151,9 @@
     }
 
     max = image->numcomps;
-    for (i = 0; i < max; ++i) {
-        if (old_comps[i].data) {
-            opj_image_data_free(old_comps[i].data);
+    for (j = 0; j < max; ++j) {
+        if (old_comps[j].data) {
+            opj_image_data_free(old_comps[j].data);
         }
     }
 
@@ -1917,6 +1917,11 @@
                                  OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG;
 }
 
+void opj_jp2_decoder_set_strict_mode(opj_jp2_t *jp2, OPJ_BOOL strict)
+{
+    opj_j2k_decoder_set_strict_mode(jp2->j2k, strict);
+}
+
 OPJ_BOOL opj_jp2_set_threads(opj_jp2_t *jp2, OPJ_UINT32 num_threads)
 {
     return opj_j2k_set_threads(jp2->j2k, num_threads);
diff --git a/third_party/libopenjpeg20/jp2.h b/third_party/libopenjpeg20/jp2.h
index 9e7fa56..173f251 100644
--- a/third_party/libopenjpeg20/jp2.h
+++ b/third_party/libopenjpeg20/jp2.h
@@ -235,6 +235,15 @@
 */
 void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters);
 
+/**
+Set the strict mode parameter.  When strict mode is enabled, the entire
+bitstream must be decoded or an error is returned.  When it is disabled,
+the decoder will decode partial bitstreams.
+@param jp2 JP2 decompressor handle
+@param strict OPJ_TRUE for strict mode
+*/
+void opj_jp2_decoder_set_strict_mode(opj_jp2_t *jp2, OPJ_BOOL strict);
+
 /** Allocates worker threads for the compressor/decompressor.
  *
  * @param jp2 JP2 decompressor handle
diff --git a/third_party/libopenjpeg20/mct.c b/third_party/libopenjpeg20/mct.c
index c4c2e73..2446efb 100644
--- a/third_party/libopenjpeg20/mct.c
+++ b/third_party/libopenjpeg20/mct.c
@@ -459,7 +459,7 @@
         for (j = 0; j < pNbComps; ++j) {
             lCurrentValue = lMatrix[lIndex];
             lIndex += pNbComps;
-            lNorms[i] += lCurrentValue * lCurrentValue;
+            lNorms[i] += (OPJ_FLOAT64) lCurrentValue * lCurrentValue;
         }
         lNorms[i] = sqrt(lNorms[i]);
     }
diff --git a/third_party/libopenjpeg20/openjpeg.c b/third_party/libopenjpeg20/openjpeg.c
index 9c9b6eb..29d3ee5 100644
--- a/third_party/libopenjpeg20/openjpeg.c
+++ b/third_party/libopenjpeg20/openjpeg.c
@@ -89,14 +89,16 @@
 /* ---------------------------------------------------------------------- */
 
 static OPJ_SIZE_T opj_read_from_file(void * p_buffer, OPJ_SIZE_T p_nb_bytes,
-                                     FILE * p_file)
+                                     void * p_user_data)
 {
-    OPJ_SIZE_T l_nb_read = fread(p_buffer, 1, p_nb_bytes, p_file);
+    FILE* p_file = (FILE*)p_user_data;
+    OPJ_SIZE_T l_nb_read = fread(p_buffer, 1, p_nb_bytes, (FILE*)p_file);
     return l_nb_read ? l_nb_read : (OPJ_SIZE_T) - 1;
 }
 
-static OPJ_UINT64 opj_get_data_length_from_file(FILE * p_file)
+static OPJ_UINT64 opj_get_data_length_from_file(void * p_user_data)
 {
+    FILE* p_file = (FILE*)p_user_data;
     OPJ_OFF_T file_length = 0;
 
     OPJ_FSEEK(p_file, 0, SEEK_END);
@@ -107,29 +109,38 @@
 }
 
 static OPJ_SIZE_T opj_write_from_file(void * p_buffer, OPJ_SIZE_T p_nb_bytes,
-                                      FILE * p_file)
+                                      void * p_user_data)
 {
+    FILE* p_file = (FILE*)p_user_data;
     return fwrite(p_buffer, 1, p_nb_bytes, p_file);
 }
 
-static OPJ_OFF_T opj_skip_from_file(OPJ_OFF_T p_nb_bytes, FILE * p_user_data)
+static OPJ_OFF_T opj_skip_from_file(OPJ_OFF_T p_nb_bytes, void * p_user_data)
 {
-    if (OPJ_FSEEK(p_user_data, p_nb_bytes, SEEK_CUR)) {
+    FILE* p_file = (FILE*)p_user_data;
+    if (OPJ_FSEEK(p_file, p_nb_bytes, SEEK_CUR)) {
         return -1;
     }
 
     return p_nb_bytes;
 }
 
-static OPJ_BOOL opj_seek_from_file(OPJ_OFF_T p_nb_bytes, FILE * p_user_data)
+static OPJ_BOOL opj_seek_from_file(OPJ_OFF_T p_nb_bytes, void * p_user_data)
 {
-    if (OPJ_FSEEK(p_user_data, p_nb_bytes, SEEK_SET)) {
+    FILE* p_file = (FILE*)p_user_data;
+    if (OPJ_FSEEK(p_file, p_nb_bytes, SEEK_SET)) {
         return OPJ_FALSE;
     }
 
     return OPJ_TRUE;
 }
 
+static void opj_close_from_file(void* p_user_data)
+{
+    FILE* p_file = (FILE*)p_user_data;
+    fclose(p_file);
+}
+
 /* ---------------------------------------------------------------------- */
 #ifdef _WIN32
 #ifndef OPJ_STATIC
@@ -208,6 +219,10 @@
         l_codec->m_codec_data.m_decompression.opj_setup_decoder =
             (void (*)(void *, opj_dparameters_t *)) opj_j2k_setup_decoder;
 
+        l_codec->m_codec_data.m_decompression.opj_decoder_set_strict_mode =
+            (void (*)(void *, OPJ_BOOL)) opj_j2k_decoder_set_strict_mode;
+
+
         l_codec->m_codec_data.m_decompression.opj_read_tile_header =
             (OPJ_BOOL(*)(void *,
                          OPJ_UINT32*,
@@ -315,6 +330,9 @@
         l_codec->m_codec_data.m_decompression.opj_setup_decoder =
             (void (*)(void *, opj_dparameters_t *)) opj_jp2_setup_decoder;
 
+        l_codec->m_codec_data.m_decompression.opj_decoder_set_strict_mode =
+            (void (*)(void *, OPJ_BOOL)) opj_jp2_decoder_set_strict_mode;
+
         l_codec->m_codec_data.m_decompression.opj_set_decode_area =
             (OPJ_BOOL(*)(void *,
                          opj_image_t*,
@@ -415,6 +433,26 @@
     return OPJ_FALSE;
 }
 
+OPJ_API OPJ_BOOL OPJ_CALLCONV opj_decoder_set_strict_mode(opj_codec_t *p_codec,
+        OPJ_BOOL strict)
+{
+    if (p_codec) {
+        opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec;
+
+        if (! l_codec->is_decompressor) {
+            opj_event_msg(&(l_codec->m_event_mgr), EVT_ERROR,
+                          "Codec provided to the opj_decoder_set_strict_mode function is not a decompressor handler.\n");
+            return OPJ_FALSE;
+        }
+
+        l_codec->m_codec_data.m_decompression.opj_decoder_set_strict_mode(
+            l_codec->m_codec,
+            strict);
+        return OPJ_TRUE;
+    }
+    return OPJ_FALSE;
+}
+
 OPJ_BOOL OPJ_CALLCONV opj_read_header(opj_stream_t *p_stream,
                                       opj_codec_t *p_codec,
                                       opj_image_t **p_image)
@@ -1074,15 +1112,14 @@
         return NULL;
     }
 
-    opj_stream_set_user_data(l_stream, p_file,
-                             (opj_stream_free_user_data_fn) fclose);
+    opj_stream_set_user_data(l_stream, p_file, opj_close_from_file);
     opj_stream_set_user_data_length(l_stream,
                                     opj_get_data_length_from_file(p_file));
-    opj_stream_set_read_function(l_stream, (opj_stream_read_fn) opj_read_from_file);
+    opj_stream_set_read_function(l_stream, opj_read_from_file);
     opj_stream_set_write_function(l_stream,
                                   (opj_stream_write_fn) opj_write_from_file);
-    opj_stream_set_skip_function(l_stream, (opj_stream_skip_fn) opj_skip_from_file);
-    opj_stream_set_seek_function(l_stream, (opj_stream_seek_fn) opj_seek_from_file);
+    opj_stream_set_skip_function(l_stream, opj_skip_from_file);
+    opj_stream_set_seek_function(l_stream, opj_seek_from_file);
 
     return l_stream;
 }
diff --git a/third_party/libopenjpeg20/openjpeg.h b/third_party/libopenjpeg20/openjpeg.h
index 269ac32..ebce53d 100644
--- a/third_party/libopenjpeg20/openjpeg.h
+++ b/third_party/libopenjpeg20/openjpeg.h
@@ -76,6 +76,12 @@
 #define OPJ_DEPRECATED(func) func
 #endif
 
+#if defined(__GNUC__) && __GNUC__ >= 6
+#define OPJ_DEPRECATED_STRUCT_MEMBER(memb, msg) __attribute__ ((deprecated(msg))) memb
+#else
+#define OPJ_DEPRECATED_STRUCT_MEMBER(memb, msg) memb
+#endif
+
 #if defined(OPJ_STATIC) || !defined(_WIN32)
 /* http://gcc.gnu.org/wiki/Visibility */
 #   if !defined(_WIN32) && __GNUC__ >= 4
@@ -449,9 +455,9 @@
     char infile[OPJ_PATH_LEN];
     /** output file name */
     char outfile[OPJ_PATH_LEN];
-    /** DEPRECATED. Index generation is now handeld with the opj_encode_with_info() function. Set to NULL */
+    /** DEPRECATED. Index generation is now handled with the opj_encode_with_info() function. Set to NULL */
     int index_on;
-    /** DEPRECATED. Index generation is now handeld with the opj_encode_with_info() function. Set to NULL */
+    /** DEPRECATED. Index generation is now handled with the opj_encode_with_info() function. Set to NULL */
     char index[OPJ_PATH_LEN];
     /** subimage encoding: origin image offset in x direction */
     int image_offset_x0;
@@ -681,10 +687,10 @@
     OPJ_UINT32 x0;
     /** y component offset compared to the whole image */
     OPJ_UINT32 y0;
-    /** precision */
+    /** precision: number of bits per component per pixel */
     OPJ_UINT32 prec;
-    /** image depth in bits */
-    OPJ_UINT32 bpp;
+    /** obsolete: use prec instead */
+    OPJ_DEPRECATED_STRUCT_MEMBER(OPJ_UINT32 bpp, "Use prec instead");
     /** signed (1) / unsigned (0) */
     OPJ_UINT32 sgnd;
     /** number of decoded resolution */
@@ -738,10 +744,10 @@
     OPJ_UINT32 x0;
     /** y component offset compared to the whole image */
     OPJ_UINT32 y0;
-    /** precision */
+    /** precision: number of bits per component per pixel */
     OPJ_UINT32 prec;
-    /** image depth in bits */
-    OPJ_UINT32 bpp;
+    /** obsolete: use prec instead */
+    OPJ_DEPRECATED_STRUCT_MEMBER(OPJ_UINT32 bpp, "Use prec instead");
     /** signed (1) / unsigned (0) */
     OPJ_UINT32 sgnd;
 } opj_image_cmptparm_t;
@@ -1340,6 +1346,20 @@
         opj_dparameters_t *parameters);
 
 /**
+ * Set strict decoding parameter for this decoder.  If strict decoding is enabled, partial bit
+ * streams will fail to decode.  If strict decoding is disabled, the decoder will decode partial
+ * bitstreams as much as possible without erroring
+ *
+ * @param p_codec       decompressor handler
+ * @param strict        OPJ_TRUE to enable strict decoding, OPJ_FALSE to disable
+ *
+ * @return true         if the decoder is correctly set
+ */
+
+OPJ_API OPJ_BOOL OPJ_CALLCONV opj_decoder_set_strict_mode(opj_codec_t *p_codec,
+        OPJ_BOOL strict);
+
+/**
  * Allocates worker threads for the compressor/decompressor.
  *
  * By default, only the main thread is used. If this function is not used,
@@ -1447,7 +1467,7 @@
  * Get the decoded tile from the codec
  *
  * @param   p_codec         the jpeg2000 codec.
- * @param   p_stream        input streamm
+ * @param   p_stream        input stream
  * @param   p_image         output image
  * @param   tile_index      index of the tile which will be decode
  *
@@ -1592,7 +1612,13 @@
  * <ul>
  * <li>PLT=YES/NO. Defaults to NO. If set to YES, PLT marker segments,
  *     indicating the length of each packet in the tile-part header, will be
- *     written. Since 2.3.2</li>
+ *     written. Since 2.4.0</li>
+ * <li>TLM=YES/NO. Defaults to NO (except for Cinema and IMF profiles).
+ *     If set to YES, TLM marker segments, indicating the length of each
+ *     tile-part part will be written. Since 2.4.0</li>
+ * <li>GUARD_BITS=value. Number of guard bits in [0,7] range. Default value is 2.
+ *     1 may be used sometimes (like in SMPTE DCP Bv2.1 Application Profile for 2K images).
+ *     Since 2.5.0</li>
  * </ul>
  *
  * @param p_codec       Compressor handle
@@ -1600,7 +1626,7 @@
  *                      array of strings. Each string is of the form KEY=VALUE.
  *
  * @return OPJ_TRUE in case of success.
- * @since 2.3.2
+ * @since 2.4.0
  */
 OPJ_API OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options(
     opj_codec_t *p_codec,
diff --git a/third_party/libopenjpeg20/opj_codec.h b/third_party/libopenjpeg20/opj_codec.h
index 8a8af91..7cff670 100644
--- a/third_party/libopenjpeg20/opj_codec.h
+++ b/third_party/libopenjpeg20/opj_codec.h
@@ -90,6 +90,9 @@
             /** Setup decoder function handler */
             void (*opj_setup_decoder)(void * p_codec, opj_dparameters_t * p_param);
 
+            /** Strict mode function handler */
+            void (*opj_decoder_set_strict_mode)(void * p_codec, OPJ_BOOL strict);
+
             /** Set decode area function handler */
             OPJ_BOOL(*opj_set_decode_area)(void * p_codec,
                                            opj_image_t * p_image,
diff --git a/third_party/libopenjpeg20/opj_intmath.h b/third_party/libopenjpeg20/opj_intmath.h
index f8cc613..1b0c9d0 100644
--- a/third_party/libopenjpeg20/opj_intmath.h
+++ b/third_party/libopenjpeg20/opj_intmath.h
@@ -277,7 +277,7 @@
 }
 
 /**
-Addtion two signed integers with a wrap-around behaviour.
+Addition two signed integers with a wrap-around behaviour.
 Assumes complement-to-two signed integers.
 @param a
 @param b
diff --git a/third_party/libopenjpeg20/pi.c b/third_party/libopenjpeg20/pi.c
index 1430d12..ce86dcc 100644
--- a/third_party/libopenjpeg20/pi.c
+++ b/third_party/libopenjpeg20/pi.c
@@ -952,7 +952,7 @@
     *p_dy_min = 0x7fffffff;
 
     for (compno = 0; compno < p_image->numcomps; ++compno) {
-        /* aritmetic variables to calculate*/
+        /* arithmetic variables to calculate*/
         OPJ_UINT32 l_level_no;
         OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1;
         OPJ_UINT32 l_px0, l_py0, l_px1, py1;
@@ -1234,7 +1234,7 @@
     /* loop*/
     OPJ_UINT32 pino;
 
-    /* encoding prameters to set*/
+    /* encoding parameters to set*/
     OPJ_UINT32 l_bound;
 
     opj_pi_iterator_t * l_current_pi = 00;
@@ -1281,7 +1281,7 @@
     /* loop*/
     OPJ_UINT32 pino;
 
-    /* encoding prameters to set*/
+    /* encoding parameters to set*/
     OPJ_UINT32 l_bound;
 
     opj_pi_iterator_t * l_current_pi = 00;
@@ -1321,7 +1321,7 @@
     opj_poc_t *tcp = &tcps->pocs[pino];
 
     if (pos >= 0) {
-        for (i = pos; pos >= 0; i--) {
+        for (i = pos; i >= 0; i--) {
             switch (prog[i]) {
             case 'R':
                 if (tcp->res_t == tcp->resE) {
@@ -1410,11 +1410,11 @@
     OPJ_UINT32 pino;
     OPJ_UINT32 compno, resno;
 
-    /* to store w, h, dx and dy fro all components and resolutions */
+    /* to store w, h, dx and dy for all components and resolutions */
     OPJ_UINT32 * l_tmp_data;
     OPJ_UINT32 ** l_tmp_ptr;
 
-    /* encoding prameters to set */
+    /* encoding parameters to set */
     OPJ_UINT32 l_max_res;
     OPJ_UINT32 l_max_prec;
     OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1;
@@ -1633,11 +1633,11 @@
     OPJ_UINT32 pino;
     OPJ_UINT32 compno, resno;
 
-    /* to store w, h, dx and dy fro all components and resolutions*/
+    /* to store w, h, dx and dy for all components and resolutions*/
     OPJ_UINT32 * l_tmp_data;
     OPJ_UINT32 ** l_tmp_ptr;
 
-    /* encoding prameters to set*/
+    /* encoding parameters to set*/
     OPJ_UINT32 l_max_res;
     OPJ_UINT32 l_max_prec;
     OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1;
diff --git a/third_party/libopenjpeg20/sparse_array.c b/third_party/libopenjpeg20/sparse_array.c
index 7319292..50d1a90 100644
--- a/third_party/libopenjpeg20/sparse_array.c
+++ b/third_party/libopenjpeg20/sparse_array.c
@@ -69,7 +69,7 @@
         return NULL;
     }
     sa->data_blocks = (OPJ_INT32**) opj_calloc(sizeof(OPJ_INT32*),
-                      sa->block_count_hor * sa->block_count_ver);
+                      (size_t) sa->block_count_hor * sa->block_count_ver);
     if (sa->data_blocks == NULL) {
         opj_free(sa);
         return NULL;
@@ -235,7 +235,7 @@
             } else {
                 if (src_block == NULL) {
                     src_block = (OPJ_INT32*) opj_calloc(1,
-                                                        sa->block_width * sa->block_height * sizeof(OPJ_INT32));
+                                                        (size_t) sa->block_width * sa->block_height * sizeof(OPJ_INT32));
                     if (src_block == NULL) {
                         return OPJ_FALSE;
                     }
diff --git a/third_party/libopenjpeg20/t1.c b/third_party/libopenjpeg20/t1.c
index 1bea54b..f5fd233 100644
--- a/third_party/libopenjpeg20/t1.c
+++ b/third_party/libopenjpeg20/t1.c
@@ -217,6 +217,27 @@
                                    opj_mutex_t* p_manager_mutex,
                                    OPJ_BOOL check_pterm);
 
+/**
+Decode 1 HT code-block
+@param t1 T1 handle
+@param cblk Code-block coding parameters
+@param orient
+@param roishift Region of interest shifting value
+@param cblksty Code-block style
+@param p_manager the event manager
+@param p_manager_mutex mutex for the event manager
+@param check_pterm whether PTERM correct termination should be checked
+*/
+OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
+                               opj_tcd_cblk_dec_t* cblk,
+                               OPJ_UINT32 orient,
+                               OPJ_UINT32 roishift,
+                               OPJ_UINT32 cblksty,
+                               opj_event_mgr_t *p_manager,
+                               opj_mutex_t* p_manager_mutex,
+                               OPJ_BOOL check_pterm);
+
+
 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
                                         OPJ_UINT32 w,
                                         OPJ_UINT32 h);
@@ -1665,18 +1686,34 @@
     }
     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
 
-    if (OPJ_FALSE == opj_t1_decode_cblk(
-                t1,
-                cblk,
-                band->bandno,
-                (OPJ_UINT32)tccp->roishift,
-                tccp->cblksty,
-                job->p_manager,
-                job->p_manager_mutex,
-                job->check_pterm)) {
-        *(job->pret) = OPJ_FALSE;
-        opj_free(job);
-        return;
+    if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
+        if (OPJ_FALSE == opj_t1_ht_decode_cblk(
+                    t1,
+                    cblk,
+                    band->bandno,
+                    (OPJ_UINT32)tccp->roishift,
+                    tccp->cblksty,
+                    job->p_manager,
+                    job->p_manager_mutex,
+                    job->check_pterm)) {
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
+    } else {
+        if (OPJ_FALSE == opj_t1_decode_cblk(
+                    t1,
+                    cblk,
+                    band->bandno,
+                    (OPJ_UINT32)tccp->roishift,
+                    tccp->cblksty,
+                    job->p_manager,
+                    job->p_manager_mutex,
+                    job->check_pterm)) {
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
     }
 
     x = cblk->x0 - band->x0;
diff --git a/third_party/libopenjpeg20/t1.h b/third_party/libopenjpeg20/t1.h
index 81ad0d0..ce43658 100644
--- a/third_party/libopenjpeg20/t1.h
+++ b/third_party/libopenjpeg20/t1.h
@@ -200,7 +200,7 @@
     OPJ_UINT32 flagssize;
     OPJ_BOOL   encoder;
 
-    /* Thre 3 variables below are only used by the decoder */
+    /* The 3 variables below are only used by the decoder */
     /* set to TRUE in multithreaded context */
     OPJ_BOOL     mustuse_cblkdatabuffer;
     /* Temporary buffer to concatenate all chunks of a codebock */
diff --git a/third_party/libopenjpeg20/t1_generate_luts.c b/third_party/libopenjpeg20/t1_generate_luts.c
index 9ad6f20..99c8c12 100644
--- a/third_party/libopenjpeg20/t1_generate_luts.c
+++ b/third_party/libopenjpeg20/t1_generate_luts.c
@@ -39,6 +39,12 @@
 
 #include "opj_includes.h"
 
+// defined elsewhere
+extern OPJ_BOOL vlc_init_tables();
+extern OPJ_BOOL vlc_tables_initialized;
+extern int vlc_tbl0[1024];
+extern int vlc_tbl1[1024];
+
 static int t1_init_ctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient)
 {
     int h, v, d, n, t, hv;
@@ -307,5 +313,11 @@
     printf("static const OPJ_INT16 lut_nmsedec_ref0[1U << T1_NMSEDEC_BITS] = {\n    ");
     dump_array16(lut_nmsedec_ref0, 1U << T1_NMSEDEC_BITS);
 
+    vlc_tables_initialized = vlc_init_tables();
+    printf("static const OPJ_UINT16 vlc_tbl0[1024] = {\n    ");
+    dump_array16(vlc_tbl0, 1024);
+    printf("static const OPJ_UINT16 vlc_tbl1[1024] = {\n    ");
+    dump_array16(vlc_tbl1, 1024);
+
     return 0;
 }
diff --git a/third_party/libopenjpeg20/t1_ht_luts.h b/third_party/libopenjpeg20/t1_ht_luts.h
new file mode 100644
index 0000000..f39d5d0
--- /dev/null
+++ b/third_party/libopenjpeg20/t1_ht_luts.h
@@ -0,0 +1,261 @@
+static const OPJ_UINT16 vlc_tbl0[1024] = {
+    0x0023, 0x00a5, 0x0043, 0x0066, 0x0083, 0xa8ee, 0x0014, 0xd8df,
+    0x0023, 0x10be, 0x0043, 0xf5ff, 0x0083, 0x207e, 0x0055, 0x515f,
+    0x0023, 0x0035, 0x0043, 0x444e, 0x0083, 0xc4ce, 0x0014, 0xcccf,
+    0x0023, 0xe2fe, 0x0043, 0x99ff, 0x0083, 0x0096, 0x00c5, 0x313f,
+    0x0023, 0x00a5, 0x0043, 0x445e, 0x0083, 0xc8ce, 0x0014, 0x11df,
+    0x0023, 0xf4fe, 0x0043, 0xfcff, 0x0083, 0x009e, 0x0055, 0x0077,
+    0x0023, 0x0035, 0x0043, 0xf1ff, 0x0083, 0x88ae, 0x0014, 0x00b7,
+    0x0023, 0xf8fe, 0x0043, 0xe4ef, 0x0083, 0x888e, 0x00c5, 0x111f,
+    0x0023, 0x00a5, 0x0043, 0x0066, 0x0083, 0xa8ee, 0x0014, 0x54df,
+    0x0023, 0x10be, 0x0043, 0x22ef, 0x0083, 0x207e, 0x0055, 0x227f,
+    0x0023, 0x0035, 0x0043, 0x444e, 0x0083, 0xc4ce, 0x0014, 0x11bf,
+    0x0023, 0xe2fe, 0x0043, 0x00f7, 0x0083, 0x0096, 0x00c5, 0x223f,
+    0x0023, 0x00a5, 0x0043, 0x445e, 0x0083, 0xc8ce, 0x0014, 0x00d7,
+    0x0023, 0xf4fe, 0x0043, 0xbaff, 0x0083, 0x009e, 0x0055, 0x006f,
+    0x0023, 0x0035, 0x0043, 0xe6ff, 0x0083, 0x88ae, 0x0014, 0xa2af,
+    0x0023, 0xf8fe, 0x0043, 0x00e7, 0x0083, 0x888e, 0x00c5, 0x222f,
+    0x0002, 0x00c5, 0x0084, 0x207e, 0x0002, 0xc4ce, 0x0024, 0x00f7,
+    0x0002, 0xa2fe, 0x0044, 0x0056, 0x0002, 0x009e, 0x0014, 0x00d7,
+    0x0002, 0x10be, 0x0084, 0x0066, 0x0002, 0x88ae, 0x0024, 0x11df,
+    0x0002, 0xa8ee, 0x0044, 0x0036, 0x0002, 0x888e, 0x0014, 0x111f,
+    0x0002, 0x00c5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0x88ff,
+    0x0002, 0xb8fe, 0x0044, 0x444e, 0x0002, 0x0096, 0x0014, 0x00b7,
+    0x0002, 0xe4fe, 0x0084, 0x445e, 0x0002, 0x00a6, 0x0024, 0x00e7,
+    0x0002, 0x54de, 0x0044, 0x222e, 0x0002, 0x003e, 0x0014, 0x0077,
+    0x0002, 0x00c5, 0x0084, 0x207e, 0x0002, 0xc4ce, 0x0024, 0xf1ff,
+    0x0002, 0xa2fe, 0x0044, 0x0056, 0x0002, 0x009e, 0x0014, 0x11bf,
+    0x0002, 0x10be, 0x0084, 0x0066, 0x0002, 0x88ae, 0x0024, 0x22ef,
+    0x0002, 0xa8ee, 0x0044, 0x0036, 0x0002, 0x888e, 0x0014, 0x227f,
+    0x0002, 0x00c5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0xe4ef,
+    0x0002, 0xb8fe, 0x0044, 0x444e, 0x0002, 0x0096, 0x0014, 0xa2af,
+    0x0002, 0xe4fe, 0x0084, 0x445e, 0x0002, 0x00a6, 0x0024, 0xd8df,
+    0x0002, 0x54de, 0x0044, 0x222e, 0x0002, 0x003e, 0x0014, 0x515f,
+    0x0002, 0x0055, 0x0084, 0x0066, 0x0002, 0x88de, 0x0024, 0x32ff,
+    0x0002, 0x11fe, 0x0044, 0x444e, 0x0002, 0x00ae, 0x0014, 0x00b7,
+    0x0002, 0x317e, 0x0084, 0x515e, 0x0002, 0x00c6, 0x0024, 0x00d7,
+    0x0002, 0x20ee, 0x0044, 0x111e, 0x0002, 0x009e, 0x0014, 0x0077,
+    0x0002, 0x0055, 0x0084, 0x545e, 0x0002, 0x44ce, 0x0024, 0x00e7,
+    0x0002, 0xf1fe, 0x0044, 0x0036, 0x0002, 0x00a6, 0x0014, 0x555f,
+    0x0002, 0x74fe, 0x0084, 0x113e, 0x0002, 0x20be, 0x0024, 0x747f,
+    0x0002, 0xc4de, 0x0044, 0xf8ff, 0x0002, 0x0096, 0x0014, 0x222f,
+    0x0002, 0x0055, 0x0084, 0x0066, 0x0002, 0x88de, 0x0024, 0x00f7,
+    0x0002, 0x11fe, 0x0044, 0x444e, 0x0002, 0x00ae, 0x0014, 0x888f,
+    0x0002, 0x317e, 0x0084, 0x515e, 0x0002, 0x00c6, 0x0024, 0xc8cf,
+    0x0002, 0x20ee, 0x0044, 0x111e, 0x0002, 0x009e, 0x0014, 0x006f,
+    0x0002, 0x0055, 0x0084, 0x545e, 0x0002, 0x44ce, 0x0024, 0xd1df,
+    0x0002, 0xf1fe, 0x0044, 0x0036, 0x0002, 0x00a6, 0x0014, 0x227f,
+    0x0002, 0x74fe, 0x0084, 0x113e, 0x0002, 0x20be, 0x0024, 0x22bf,
+    0x0002, 0xc4de, 0x0044, 0x22ef, 0x0002, 0x0096, 0x0014, 0x323f,
+    0x0003, 0xd4de, 0xf4fd, 0xfcff, 0x0014, 0x113e, 0x0055, 0x888f,
+    0x0003, 0x32be, 0x0085, 0x00e7, 0x0025, 0x515e, 0xaafe, 0x727f,
+    0x0003, 0x44ce, 0xf8fd, 0x44ef, 0x0014, 0x647e, 0x0045, 0xa2af,
+    0x0003, 0x00a6, 0x555d, 0x99df, 0xf1fd, 0x0036, 0xf5fe, 0x626f,
+    0x0003, 0xd1de, 0xf4fd, 0xe6ff, 0x0014, 0x717e, 0x0055, 0xb1bf,
+    0x0003, 0x88ae, 0x0085, 0xd5df, 0x0025, 0x444e, 0xf2fe, 0x667f,
+    0x0003, 0x00c6, 0xf8fd, 0xe2ef, 0x0014, 0x545e, 0x0045, 0x119f,
+    0x0003, 0x0096, 0x555d, 0xc8cf, 0xf1fd, 0x111e, 0xc8ee, 0x0067,
+    0x0003, 0xd4de, 0xf4fd, 0xf3ff, 0x0014, 0x113e, 0x0055, 0x11bf,
+    0x0003, 0x32be, 0x0085, 0xd8df, 0x0025, 0x515e, 0xaafe, 0x222f,
+    0x0003, 0x44ce, 0xf8fd, 0x00f7, 0x0014, 0x647e, 0x0045, 0x989f,
+    0x0003, 0x00a6, 0x555d, 0x00d7, 0xf1fd, 0x0036, 0xf5fe, 0x446f,
+    0x0003, 0xd1de, 0xf4fd, 0xb9ff, 0x0014, 0x717e, 0x0055, 0x00b7,
+    0x0003, 0x88ae, 0x0085, 0xdcdf, 0x0025, 0x444e, 0xf2fe, 0x0077,
+    0x0003, 0x00c6, 0xf8fd, 0xe4ef, 0x0014, 0x545e, 0x0045, 0x737f,
+    0x0003, 0x0096, 0x555d, 0xb8bf, 0xf1fd, 0x111e, 0xc8ee, 0x323f,
+    0x0002, 0x00a5, 0x0084, 0x407e, 0x0002, 0x10de, 0x0024, 0x11df,
+    0x0002, 0x72fe, 0x0044, 0x0056, 0x0002, 0xa8ae, 0x0014, 0xb2bf,
+    0x0002, 0x0096, 0x0084, 0x0066, 0x0002, 0x00c6, 0x0024, 0x00e7,
+    0x0002, 0xc8ee, 0x0044, 0x222e, 0x0002, 0x888e, 0x0014, 0x0077,
+    0x0002, 0x00a5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0x00f7,
+    0x0002, 0x91fe, 0x0044, 0x0036, 0x0002, 0xa2ae, 0x0014, 0xaaaf,
+    0x0002, 0xb8fe, 0x0084, 0x005e, 0x0002, 0x00be, 0x0024, 0xc4cf,
+    0x0002, 0x44ee, 0x0044, 0xf4ff, 0x0002, 0x223e, 0x0014, 0x111f,
+    0x0002, 0x00a5, 0x0084, 0x407e, 0x0002, 0x10de, 0x0024, 0x99ff,
+    0x0002, 0x72fe, 0x0044, 0x0056, 0x0002, 0xa8ae, 0x0014, 0x00b7,
+    0x0002, 0x0096, 0x0084, 0x0066, 0x0002, 0x00c6, 0x0024, 0x00d7,
+    0x0002, 0xc8ee, 0x0044, 0x222e, 0x0002, 0x888e, 0x0014, 0x444f,
+    0x0002, 0x00a5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0xe2ef,
+    0x0002, 0x91fe, 0x0044, 0x0036, 0x0002, 0xa2ae, 0x0014, 0x447f,
+    0x0002, 0xb8fe, 0x0084, 0x005e, 0x0002, 0x00be, 0x0024, 0x009f,
+    0x0002, 0x44ee, 0x0044, 0x76ff, 0x0002, 0x223e, 0x0014, 0x313f,
+    0x0003, 0x00c6, 0x0085, 0xd9ff, 0xf2fd, 0x647e, 0xf1fe, 0x99bf,
+    0x0003, 0xa2ae, 0x0025, 0x66ef, 0xf4fd, 0x0056, 0xe2ee, 0x737f,
+    0x0003, 0x98be, 0x0045, 0x00f7, 0xf8fd, 0x0066, 0x76fe, 0x889f,
+    0x0003, 0x888e, 0x0015, 0xd5df, 0x00a5, 0x222e, 0x98de, 0x444f,
+    0x0003, 0xb2be, 0x0085, 0xfcff, 0xf2fd, 0x226e, 0x0096, 0x00b7,
+    0x0003, 0xaaae, 0x0025, 0xd1df, 0xf4fd, 0x0036, 0xd4de, 0x646f,
+    0x0003, 0xa8ae, 0x0045, 0xeaef, 0xf8fd, 0x445e, 0xe8ee, 0x717f,
+    0x0003, 0x323e, 0x0015, 0xc4cf, 0x00a5, 0xfaff, 0x88ce, 0x313f,
+    0x0003, 0x00c6, 0x0085, 0x77ff, 0xf2fd, 0x647e, 0xf1fe, 0xb3bf,
+    0x0003, 0xa2ae, 0x0025, 0x00e7, 0xf4fd, 0x0056, 0xe2ee, 0x0077,
+    0x0003, 0x98be, 0x0045, 0xe4ef, 0xf8fd, 0x0066, 0x76fe, 0x667f,
+    0x0003, 0x888e, 0x0015, 0x00d7, 0x00a5, 0x222e, 0x98de, 0x333f,
+    0x0003, 0xb2be, 0x0085, 0x75ff, 0xf2fd, 0x226e, 0x0096, 0x919f,
+    0x0003, 0xaaae, 0x0025, 0x99df, 0xf4fd, 0x0036, 0xd4de, 0x515f,
+    0x0003, 0xa8ae, 0x0045, 0xecef, 0xf8fd, 0x445e, 0xe8ee, 0x727f,
+    0x0003, 0x323e, 0x0015, 0xb1bf, 0x00a5, 0xf3ff, 0x88ce, 0x111f,
+    0x0003, 0x54de, 0xf2fd, 0x111e, 0x0014, 0x647e, 0xf8fe, 0xcccf,
+    0x0003, 0x91be, 0x0045, 0x22ef, 0x0025, 0x222e, 0xf3fe, 0x888f,
+    0x0003, 0x00c6, 0x0085, 0x00f7, 0x0014, 0x115e, 0xfcfe, 0xa8af,
+    0x0003, 0x00a6, 0x0035, 0xc8df, 0xf1fd, 0x313e, 0x66fe, 0x646f,
+    0x0003, 0xc8ce, 0xf2fd, 0xf5ff, 0x0014, 0x0066, 0xf4fe, 0xbabf,
+    0x0003, 0x22ae, 0x0045, 0x00e7, 0x0025, 0x323e, 0xeafe, 0x737f,
+    0x0003, 0xb2be, 0x0085, 0x55df, 0x0014, 0x0056, 0x717e, 0x119f,
+    0x0003, 0x0096, 0x0035, 0xc4cf, 0xf1fd, 0x333e, 0xe8ee, 0x444f,
+    0x0003, 0x54de, 0xf2fd, 0x111e, 0x0014, 0x647e, 0xf8fe, 0x99bf,
+    0x0003, 0x91be, 0x0045, 0xe2ef, 0x0025, 0x222e, 0xf3fe, 0x667f,
+    0x0003, 0x00c6, 0x0085, 0xe4ef, 0x0014, 0x115e, 0xfcfe, 0x989f,
+    0x0003, 0x00a6, 0x0035, 0x00d7, 0xf1fd, 0x313e, 0x66fe, 0x226f,
+    0x0003, 0xc8ce, 0xf2fd, 0xb9ff, 0x0014, 0x0066, 0xf4fe, 0x00b7,
+    0x0003, 0x22ae, 0x0045, 0xd1df, 0x0025, 0x323e, 0xeafe, 0x0077,
+    0x0003, 0xb2be, 0x0085, 0xecef, 0x0014, 0x0056, 0x717e, 0x727f,
+    0x0003, 0x0096, 0x0035, 0xb8bf, 0xf1fd, 0x333e, 0xe8ee, 0x545f,
+    0xf1fc, 0xd1de, 0xfafd, 0x00d7, 0xf8fc, 0x0016, 0xfffd, 0x747f,
+    0xf4fc, 0x717e, 0xf3fd, 0xb3bf, 0xf2fc, 0xeaef, 0xe8ee, 0x444f,
+    0xf1fc, 0x22ae, 0x0005, 0xb8bf, 0xf8fc, 0x00f7, 0xfcfe, 0x0077,
+    0xf4fc, 0x115e, 0xf5fd, 0x757f, 0xf2fc, 0xd8df, 0xe2ee, 0x333f,
+    0xf1fc, 0xb2be, 0xfafd, 0x88cf, 0xf8fc, 0xfbff, 0xfffd, 0x737f,
+    0xf4fc, 0x006e, 0xf3fd, 0x00b7, 0xf2fc, 0x66ef, 0xf9fe, 0x313f,
+    0xf1fc, 0x009e, 0x0005, 0xbabf, 0xf8fc, 0xfdff, 0xf6fe, 0x0067,
+    0xf4fc, 0x0026, 0xf5fd, 0x888f, 0xf2fc, 0xdcdf, 0xd4de, 0x222f,
+    0xf1fc, 0xd1de, 0xfafd, 0xc4cf, 0xf8fc, 0x0016, 0xfffd, 0x727f,
+    0xf4fc, 0x717e, 0xf3fd, 0x99bf, 0xf2fc, 0xecef, 0xe8ee, 0x0047,
+    0xf1fc, 0x22ae, 0x0005, 0x00a7, 0xf8fc, 0xf7ff, 0xfcfe, 0x0057,
+    0xf4fc, 0x115e, 0xf5fd, 0x0097, 0xf2fc, 0xd5df, 0xe2ee, 0x0037,
+    0xf1fc, 0xb2be, 0xfafd, 0x00c7, 0xf8fc, 0xfeff, 0xfffd, 0x667f,
+    0xf4fc, 0x006e, 0xf3fd, 0xa8af, 0xf2fc, 0x00e7, 0xf9fe, 0x323f,
+    0xf1fc, 0x009e, 0x0005, 0xb1bf, 0xf8fc, 0xe4ef, 0xf6fe, 0x545f,
+    0xf4fc, 0x0026, 0xf5fd, 0x0087, 0xf2fc, 0x99df, 0xd4de, 0x111f
+};
+
+static const OPJ_UINT16 vlc_tbl1[1024] = {
+    0x0013, 0x0065, 0x0043, 0x00de, 0x0083, 0x888d, 0x0023, 0x444e,
+    0x0013, 0x00a5, 0x0043, 0x88ae, 0x0083, 0x0035, 0x0023, 0x00d7,
+    0x0013, 0x00c5, 0x0043, 0x009e, 0x0083, 0x0055, 0x0023, 0x222e,
+    0x0013, 0x0095, 0x0043, 0x007e, 0x0083, 0x10fe, 0x0023, 0x0077,
+    0x0013, 0x0065, 0x0043, 0x88ce, 0x0083, 0x888d, 0x0023, 0x111e,
+    0x0013, 0x00a5, 0x0043, 0x005e, 0x0083, 0x0035, 0x0023, 0x00e7,
+    0x0013, 0x00c5, 0x0043, 0x00be, 0x0083, 0x0055, 0x0023, 0x11ff,
+    0x0013, 0x0095, 0x0043, 0x003e, 0x0083, 0x40ee, 0x0023, 0xa2af,
+    0x0013, 0x0065, 0x0043, 0x00de, 0x0083, 0x888d, 0x0023, 0x444e,
+    0x0013, 0x00a5, 0x0043, 0x88ae, 0x0083, 0x0035, 0x0023, 0x44ef,
+    0x0013, 0x00c5, 0x0043, 0x009e, 0x0083, 0x0055, 0x0023, 0x222e,
+    0x0013, 0x0095, 0x0043, 0x007e, 0x0083, 0x10fe, 0x0023, 0x00b7,
+    0x0013, 0x0065, 0x0043, 0x88ce, 0x0083, 0x888d, 0x0023, 0x111e,
+    0x0013, 0x00a5, 0x0043, 0x005e, 0x0083, 0x0035, 0x0023, 0xc4cf,
+    0x0013, 0x00c5, 0x0043, 0x00be, 0x0083, 0x0055, 0x0023, 0x00f7,
+    0x0013, 0x0095, 0x0043, 0x003e, 0x0083, 0x40ee, 0x0023, 0x006f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0014, 0x0001, 0x00d7,
+    0x0001, 0x0024, 0x0001, 0x0096, 0x0001, 0x0045, 0x0001, 0x0077,
+    0x0001, 0x0084, 0x0001, 0x00c6, 0x0001, 0x0014, 0x0001, 0x888f,
+    0x0001, 0x0024, 0x0001, 0x00f7, 0x0001, 0x0035, 0x0001, 0x222f,
+    0x0001, 0x0084, 0x0001, 0x40fe, 0x0001, 0x0014, 0x0001, 0x00b7,
+    0x0001, 0x0024, 0x0001, 0x00bf, 0x0001, 0x0045, 0x0001, 0x0067,
+    0x0001, 0x0084, 0x0001, 0x00a6, 0x0001, 0x0014, 0x0001, 0x444f,
+    0x0001, 0x0024, 0x0001, 0x00e7, 0x0001, 0x0035, 0x0001, 0x113f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0014, 0x0001, 0x00cf,
+    0x0001, 0x0024, 0x0001, 0x0096, 0x0001, 0x0045, 0x0001, 0x006f,
+    0x0001, 0x0084, 0x0001, 0x00c6, 0x0001, 0x0014, 0x0001, 0x009f,
+    0x0001, 0x0024, 0x0001, 0x00ef, 0x0001, 0x0035, 0x0001, 0x323f,
+    0x0001, 0x0084, 0x0001, 0x40fe, 0x0001, 0x0014, 0x0001, 0x00af,
+    0x0001, 0x0024, 0x0001, 0x44ff, 0x0001, 0x0045, 0x0001, 0x005f,
+    0x0001, 0x0084, 0x0001, 0x00a6, 0x0001, 0x0014, 0x0001, 0x007f,
+    0x0001, 0x0024, 0x0001, 0x00df, 0x0001, 0x0035, 0x0001, 0x111f,
+    0x0001, 0x0024, 0x0001, 0x0056, 0x0001, 0x0085, 0x0001, 0x00bf,
+    0x0001, 0x0014, 0x0001, 0x00f7, 0x0001, 0x00c6, 0x0001, 0x0077,
+    0x0001, 0x0024, 0x0001, 0xf8ff, 0x0001, 0x0045, 0x0001, 0x007f,
+    0x0001, 0x0014, 0x0001, 0x00df, 0x0001, 0x00a6, 0x0001, 0x313f,
+    0x0001, 0x0024, 0x0001, 0x222e, 0x0001, 0x0085, 0x0001, 0x00b7,
+    0x0001, 0x0014, 0x0001, 0x44ef, 0x0001, 0xa2ae, 0x0001, 0x0067,
+    0x0001, 0x0024, 0x0001, 0x51ff, 0x0001, 0x0045, 0x0001, 0x0097,
+    0x0001, 0x0014, 0x0001, 0x00cf, 0x0001, 0x0036, 0x0001, 0x223f,
+    0x0001, 0x0024, 0x0001, 0x0056, 0x0001, 0x0085, 0x0001, 0xb2bf,
+    0x0001, 0x0014, 0x0001, 0x40ef, 0x0001, 0x00c6, 0x0001, 0x006f,
+    0x0001, 0x0024, 0x0001, 0x72ff, 0x0001, 0x0045, 0x0001, 0x009f,
+    0x0001, 0x0014, 0x0001, 0x00d7, 0x0001, 0x00a6, 0x0001, 0x444f,
+    0x0001, 0x0024, 0x0001, 0x222e, 0x0001, 0x0085, 0x0001, 0xa8af,
+    0x0001, 0x0014, 0x0001, 0x00e7, 0x0001, 0xa2ae, 0x0001, 0x005f,
+    0x0001, 0x0024, 0x0001, 0x44ff, 0x0001, 0x0045, 0x0001, 0x888f,
+    0x0001, 0x0014, 0x0001, 0xaaaf, 0x0001, 0x0036, 0x0001, 0x111f,
+    0x0002, 0xf8fe, 0x0024, 0x0056, 0x0002, 0x00b6, 0x0085, 0x66ff,
+    0x0002, 0x00ce, 0x0014, 0x111e, 0x0002, 0x0096, 0x0035, 0xa8af,
+    0x0002, 0x00f6, 0x0024, 0x313e, 0x0002, 0x00a6, 0x0045, 0xb3bf,
+    0x0002, 0xb2be, 0x0014, 0xf5ff, 0x0002, 0x0066, 0x517e, 0x545f,
+    0x0002, 0xf2fe, 0x0024, 0x222e, 0x0002, 0x22ae, 0x0085, 0x44ef,
+    0x0002, 0x00c6, 0x0014, 0xf4ff, 0x0002, 0x0076, 0x0035, 0x447f,
+    0x0002, 0x40de, 0x0024, 0x323e, 0x0002, 0x009e, 0x0045, 0x00d7,
+    0x0002, 0x88be, 0x0014, 0xfaff, 0x0002, 0x115e, 0xf1fe, 0x444f,
+    0x0002, 0xf8fe, 0x0024, 0x0056, 0x0002, 0x00b6, 0x0085, 0xc8ef,
+    0x0002, 0x00ce, 0x0014, 0x111e, 0x0002, 0x0096, 0x0035, 0x888f,
+    0x0002, 0x00f6, 0x0024, 0x313e, 0x0002, 0x00a6, 0x0045, 0x44df,
+    0x0002, 0xb2be, 0x0014, 0xa8ff, 0x0002, 0x0066, 0x517e, 0x006f,
+    0x0002, 0xf2fe, 0x0024, 0x222e, 0x0002, 0x22ae, 0x0085, 0x00e7,
+    0x0002, 0x00c6, 0x0014, 0xe2ef, 0x0002, 0x0076, 0x0035, 0x727f,
+    0x0002, 0x40de, 0x0024, 0x323e, 0x0002, 0x009e, 0x0045, 0xb1bf,
+    0x0002, 0x88be, 0x0014, 0x73ff, 0x0002, 0x115e, 0xf1fe, 0x333f,
+    0x0001, 0x0084, 0x0001, 0x20ee, 0x0001, 0x00c5, 0x0001, 0xc4cf,
+    0x0001, 0x0044, 0x0001, 0x32ff, 0x0001, 0x0015, 0x0001, 0x888f,
+    0x0001, 0x0084, 0x0001, 0x0066, 0x0001, 0x0025, 0x0001, 0x00af,
+    0x0001, 0x0044, 0x0001, 0x22ef, 0x0001, 0x00a6, 0x0001, 0x005f,
+    0x0001, 0x0084, 0x0001, 0x444e, 0x0001, 0x00c5, 0x0001, 0xcccf,
+    0x0001, 0x0044, 0x0001, 0x00f7, 0x0001, 0x0015, 0x0001, 0x006f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0025, 0x0001, 0x009f,
+    0x0001, 0x0044, 0x0001, 0x00df, 0x0001, 0x30fe, 0x0001, 0x222f,
+    0x0001, 0x0084, 0x0001, 0x20ee, 0x0001, 0x00c5, 0x0001, 0xc8cf,
+    0x0001, 0x0044, 0x0001, 0x11ff, 0x0001, 0x0015, 0x0001, 0x0077,
+    0x0001, 0x0084, 0x0001, 0x0066, 0x0001, 0x0025, 0x0001, 0x007f,
+    0x0001, 0x0044, 0x0001, 0x00e7, 0x0001, 0x00a6, 0x0001, 0x0037,
+    0x0001, 0x0084, 0x0001, 0x444e, 0x0001, 0x00c5, 0x0001, 0x00b7,
+    0x0001, 0x0044, 0x0001, 0x00bf, 0x0001, 0x0015, 0x0001, 0x003f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0025, 0x0001, 0x0097,
+    0x0001, 0x0044, 0x0001, 0x00d7, 0x0001, 0x30fe, 0x0001, 0x111f,
+    0x0002, 0xa8ee, 0x0044, 0x888e, 0x0002, 0x00d6, 0x00c5, 0xf3ff,
+    0x0002, 0xfcfe, 0x0025, 0x003e, 0x0002, 0x00b6, 0x0055, 0xd8df,
+    0x0002, 0xf8fe, 0x0044, 0x0066, 0x0002, 0x207e, 0x0085, 0x99ff,
+    0x0002, 0x00e6, 0x00f5, 0x0036, 0x0002, 0x00a6, 0x0015, 0x009f,
+    0x0002, 0xf2fe, 0x0044, 0x0076, 0x0002, 0x44ce, 0x00c5, 0x76ff,
+    0x0002, 0xf1fe, 0x0025, 0x444e, 0x0002, 0x00ae, 0x0055, 0xc8cf,
+    0x0002, 0xf4fe, 0x0044, 0x445e, 0x0002, 0x10be, 0x0085, 0xe4ef,
+    0x0002, 0x54de, 0x00f5, 0x111e, 0x0002, 0x0096, 0x0015, 0x222f,
+    0x0002, 0xa8ee, 0x0044, 0x888e, 0x0002, 0x00d6, 0x00c5, 0xfaff,
+    0x0002, 0xfcfe, 0x0025, 0x003e, 0x0002, 0x00b6, 0x0055, 0x11bf,
+    0x0002, 0xf8fe, 0x0044, 0x0066, 0x0002, 0x207e, 0x0085, 0x22ef,
+    0x0002, 0x00e6, 0x00f5, 0x0036, 0x0002, 0x00a6, 0x0015, 0x227f,
+    0x0002, 0xf2fe, 0x0044, 0x0076, 0x0002, 0x44ce, 0x00c5, 0xd5ff,
+    0x0002, 0xf1fe, 0x0025, 0x444e, 0x0002, 0x00ae, 0x0055, 0x006f,
+    0x0002, 0xf4fe, 0x0044, 0x445e, 0x0002, 0x10be, 0x0085, 0x11df,
+    0x0002, 0x54de, 0x00f5, 0x111e, 0x0002, 0x0096, 0x0015, 0x515f,
+    0x0003, 0x00f6, 0x0014, 0x111e, 0x0044, 0x888e, 0x00a5, 0xd4df,
+    0x0003, 0xa2ae, 0x0055, 0x76ff, 0x0024, 0x223e, 0x00b6, 0xaaaf,
+    0x0003, 0x00e6, 0x0014, 0xf5ff, 0x0044, 0x0066, 0x0085, 0xcccf,
+    0x0003, 0x009e, 0x00c5, 0x44ef, 0x0024, 0x0036, 0xf8fe, 0x317f,
+    0x0003, 0xe8ee, 0x0014, 0xf1ff, 0x0044, 0x0076, 0x00a5, 0xc4cf,
+    0x0003, 0x227e, 0x0055, 0xd1df, 0x0024, 0x444e, 0xf4fe, 0x515f,
+    0x0003, 0x00d6, 0x0014, 0xe2ef, 0x0044, 0x445e, 0x0085, 0x22bf,
+    0x0003, 0x0096, 0x00c5, 0xc8df, 0x0024, 0x222e, 0xf2fe, 0x226f,
+    0x0003, 0x00f6, 0x0014, 0x111e, 0x0044, 0x888e, 0x00a5, 0xb1bf,
+    0x0003, 0xa2ae, 0x0055, 0x33ff, 0x0024, 0x223e, 0x00b6, 0xa8af,
+    0x0003, 0x00e6, 0x0014, 0xb9ff, 0x0044, 0x0066, 0x0085, 0xa8bf,
+    0x0003, 0x009e, 0x00c5, 0xe4ef, 0x0024, 0x0036, 0xf8fe, 0x646f,
+    0x0003, 0xe8ee, 0x0014, 0xfcff, 0x0044, 0x0076, 0x00a5, 0xc8cf,
+    0x0003, 0x227e, 0x0055, 0xeaef, 0x0024, 0x444e, 0xf4fe, 0x747f,
+    0x0003, 0x00d6, 0x0014, 0xfaff, 0x0044, 0x445e, 0x0085, 0xb2bf,
+    0x0003, 0x0096, 0x00c5, 0x44df, 0x0024, 0x222e, 0xf2fe, 0x313f,
+    0x00f3, 0xfafe, 0xf1fd, 0x0036, 0x0004, 0x32be, 0x0075, 0x11df,
+    0x00f3, 0x54de, 0xf2fd, 0xe4ef, 0x00d5, 0x717e, 0xfcfe, 0x737f,
+    0x00f3, 0xf3fe, 0xf8fd, 0x111e, 0x0004, 0x0096, 0x0055, 0xb1bf,
+    0x00f3, 0x00ce, 0x00b5, 0xd8df, 0xf4fd, 0x0066, 0xb9fe, 0x545f,
+    0x00f3, 0x76fe, 0xf1fd, 0x0026, 0x0004, 0x00a6, 0x0075, 0x009f,
+    0x00f3, 0x00ae, 0xf2fd, 0xf7ff, 0x00d5, 0x0046, 0xf5fe, 0x747f,
+    0x00f3, 0x00e6, 0xf8fd, 0x0016, 0x0004, 0x0086, 0x0055, 0x888f,
+    0x00f3, 0x00c6, 0x00b5, 0xe2ef, 0xf4fd, 0x115e, 0xa8ee, 0x113f,
+    0x00f3, 0xfafe, 0xf1fd, 0x0036, 0x0004, 0x32be, 0x0075, 0xd1df,
+    0x00f3, 0x54de, 0xf2fd, 0xfbff, 0x00d5, 0x717e, 0xfcfe, 0x447f,
+    0x00f3, 0xf3fe, 0xf8fd, 0x111e, 0x0004, 0x0096, 0x0055, 0x727f,
+    0x00f3, 0x00ce, 0x00b5, 0x22ef, 0xf4fd, 0x0066, 0xb9fe, 0x444f,
+    0x00f3, 0x76fe, 0xf1fd, 0x0026, 0x0004, 0x00a6, 0x0075, 0x11bf,
+    0x00f3, 0x00ae, 0xf2fd, 0xffff, 0x00d5, 0x0046, 0xf5fe, 0x323f,
+    0x00f3, 0x00e6, 0xf8fd, 0x0016, 0x0004, 0x0086, 0x0055, 0x006f,
+    0x00f3, 0x00c6, 0x00b5, 0xb8bf, 0xf4fd, 0x115e, 0xa8ee, 0x222f
+};
\ No newline at end of file
diff --git a/third_party/libopenjpeg20/t2.c b/third_party/libopenjpeg20/t2.c
index 1481e16..ebda005 100644
--- a/third_party/libopenjpeg20/t2.c
+++ b/third_party/libopenjpeg20/t2.c
@@ -502,7 +502,6 @@
                                     l_current_pi->precno, l_current_pi->layno, skip_packet ? "skipped" : "kept");
                 */
             }
-
             if (!skip_packet) {
                 l_nb_bytes_read = 0;
 
@@ -1124,7 +1123,7 @@
 
     /*
     When the marker PPT/PPM is used the packet header are store in PPT/PPM marker
-    This part deal with this caracteristic
+    This part deal with this characteristic
     step 1: Read packet header in the saved structure
     step 2: Return to codestream for decoding
     */
@@ -1229,6 +1228,7 @@
                     ++i;
                 }
 
+                l_cblk->Mb = (OPJ_UINT32)l_band->numbps;
                 l_cblk->numbps = (OPJ_UINT32)l_band->numbps + 1 - i;
                 l_cblk->numlenbits = 3;
             }
@@ -1258,34 +1258,63 @@
             }
             n = (OPJ_INT32)l_cblk->numnewpasses;
 
-            do {
-                OPJ_UINT32 bit_number;
-                l_cblk->segs[l_segno].numnewpasses = (OPJ_UINT32)opj_int_min((OPJ_INT32)(
-                        l_cblk->segs[l_segno].maxpasses - l_cblk->segs[l_segno].numpasses), n);
-                bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
-                                 l_cblk->segs[l_segno].numnewpasses);
-                if (bit_number > 32) {
-                    opj_event_msg(p_manager, EVT_ERROR,
-                                  "Invalid bit number %d in opj_t2_read_packet_header()\n",
-                                  bit_number);
-                    opj_bio_destroy(l_bio);
-                    return OPJ_FALSE;
-                }
-                l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
-                JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
-                            l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
-                            l_cblk->segs[l_segno].newlen);
-
-                n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
-                if (n > 0) {
-                    ++l_segno;
-
-                    if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+            if ((p_tcp->tccps[p_pi->compno].cblksty & J2K_CCP_CBLKSTY_HT) != 0)
+                do {
+                    OPJ_UINT32 bit_number;
+                    l_cblk->segs[l_segno].numnewpasses = l_segno == 0 ? 1 : (OPJ_UINT32)n;
+                    bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
+                                     l_cblk->segs[l_segno].numnewpasses);
+                    if (bit_number > 32) {
+                        opj_event_msg(p_manager, EVT_ERROR,
+                                      "Invalid bit number %d in opj_t2_read_packet_header()\n",
+                                      bit_number);
                         opj_bio_destroy(l_bio);
                         return OPJ_FALSE;
                     }
-                }
-            } while (n > 0);
+                    l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
+                    JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
+                                l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
+                                l_cblk->segs[l_segno].newlen);
+
+                    n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
+                    if (n > 0) {
+                        ++l_segno;
+
+                        if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+                            opj_bio_destroy(l_bio);
+                            return OPJ_FALSE;
+                        }
+                    }
+                } while (n > 0);
+            else
+                do {
+                    OPJ_UINT32 bit_number;
+                    l_cblk->segs[l_segno].numnewpasses = (OPJ_UINT32)opj_int_min((OPJ_INT32)(
+                            l_cblk->segs[l_segno].maxpasses - l_cblk->segs[l_segno].numpasses), n);
+                    bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
+                                     l_cblk->segs[l_segno].numnewpasses);
+                    if (bit_number > 32) {
+                        opj_event_msg(p_manager, EVT_ERROR,
+                                      "Invalid bit number %d in opj_t2_read_packet_header()\n",
+                                      bit_number);
+                        opj_bio_destroy(l_bio);
+                        return OPJ_FALSE;
+                    }
+                    l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
+                    JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
+                                l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
+                                l_cblk->segs[l_segno].newlen);
+
+                    n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
+                    if (n > 0) {
+                        ++l_segno;
+
+                        if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+                            opj_bio_destroy(l_bio);
+                            return OPJ_FALSE;
+                        }
+                    }
+                } while (n > 0);
 
             ++l_cblk;
         }
@@ -1348,6 +1377,7 @@
     opj_tcd_cblk_dec_t* l_cblk = 00;
     opj_tcd_resolution_t* l_res =
         &p_tile->comps[p_pi->compno].resolutions[p_pi->resno];
+    OPJ_BOOL partial_buffer = OPJ_FALSE;
 
     OPJ_ARG_NOT_USED(p_t2);
     OPJ_ARG_NOT_USED(pack_info);
@@ -1367,6 +1397,12 @@
         for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) {
             opj_tcd_seg_t *l_seg = 00;
 
+            // if we have a partial data stream, set numchunks to zero
+            // since we have no data to actually decode.
+            if (partial_buffer) {
+                l_cblk->numchunks = 0;
+            }
+
             if (!l_cblk->numnewpasses) {
                 /* nothing to do */
                 ++l_cblk;
@@ -1389,12 +1425,32 @@
                 /* Check possible overflow (on l_current_data only, assumes input args already checked) then size */
                 if ((((OPJ_SIZE_T)l_current_data + (OPJ_SIZE_T)l_seg->newlen) <
                         (OPJ_SIZE_T)l_current_data) ||
-                        (l_current_data + l_seg->newlen > p_src_data + p_max_length)) {
-                    opj_event_msg(p_manager, EVT_ERROR,
-                                  "read: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
-                                  l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
-                                  p_pi->compno);
-                    return OPJ_FALSE;
+                        (l_current_data + l_seg->newlen > p_src_data + p_max_length) ||
+                        (partial_buffer)) {
+                    if (p_t2->cp->strict) {
+                        opj_event_msg(p_manager, EVT_ERROR,
+                                      "read: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
+                                      l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
+                                      p_pi->compno);
+                        return OPJ_FALSE;
+                    } else {
+                        opj_event_msg(p_manager, EVT_WARNING,
+                                      "read: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
+                                      l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
+                                      p_pi->compno);
+                        // skip this codeblock since it is a partial read
+                        partial_buffer = OPJ_TRUE;
+                        l_cblk->numchunks = 0;
+
+                        l_seg->numpasses += l_seg->numnewpasses;
+                        l_cblk->numnewpasses -= l_seg->numnewpasses;
+                        if (l_cblk->numnewpasses > 0) {
+                            ++l_seg;
+                            ++l_cblk->numsegs;
+                            break;
+                        }
+                        continue;
+                    }
                 }
 
 #ifdef USE_JPWL
@@ -1456,8 +1512,12 @@
         ++l_band;
     }
 
-    *(p_data_read) = (OPJ_UINT32)(l_current_data - p_src_data);
-
+    // return the number of bytes read
+    if (partial_buffer) {
+        *(p_data_read) = p_max_length;
+    } else {
+        *(p_data_read) = (OPJ_UINT32)(l_current_data - p_src_data);
+    }
 
     return OPJ_TRUE;
 }
@@ -1519,11 +1579,18 @@
                 /* Check possible overflow then size */
                 if (((*p_data_read + l_seg->newlen) < (*p_data_read)) ||
                         ((*p_data_read + l_seg->newlen) > p_max_length)) {
-                    opj_event_msg(p_manager, EVT_ERROR,
-                                  "skip: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
-                                  l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
-                                  p_pi->compno);
-                    return OPJ_FALSE;
+                    if (p_t2->cp->strict) {
+                        opj_event_msg(p_manager, EVT_ERROR,
+                                      "skip: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
+                                      l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
+                                      p_pi->compno);
+                        return OPJ_FALSE;
+                    } else {
+                        opj_event_msg(p_manager, EVT_WARNING,
+                                      "skip: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n",
+                                      l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno,
+                                      p_pi->compno);
+                    }
                 }
 
 #ifdef USE_JPWL
diff --git a/third_party/libopenjpeg20/tcd.h b/third_party/libopenjpeg20/tcd.h
index f1b52b8..340c2bf 100644
--- a/third_party/libopenjpeg20/tcd.h
+++ b/third_party/libopenjpeg20/tcd.h
@@ -122,6 +122,11 @@
     opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
     /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
     OPJ_INT32 x0, y0, x1, y1;
+    /* Mb is The maximum number of bit-planes available for the representation of
+       coefficients in any sub-band, b, as defined in Equation (E-2). See
+       Section B.10.5 of the standard */
+    OPJ_UINT32 Mb;  /* currently used only to check if HT decoding is correct */
+    /* numbps is Mb - P as defined in Section B.10.5 of the standard */
     OPJ_UINT32 numbps;
     /* number of bits for len, for the current packet. Transitory value */
     OPJ_UINT32 numlenbits;