OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_block_decoder64.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_block_decoder.cpp
34// Author: Aous Naman
35// Date: 13 May 2022
36//***************************************************************************/
37
38//***************************************************************************/
43#include <string>
44#include <iostream>
45
46#include <cassert>
47#include <cstring>
48#include "ojph_block_common.h"
49#include "ojph_block_decoder.h"
50#include "ojph_arch.h"
51#include "ojph_message.h"
52
53namespace ojph {
54 namespace local {
55
56 //************************************************************************/
63 struct dec_mel_st {
64 dec_mel_st() : data(NULL), tmp(0), bits(0), size(0), unstuff(false),
65 k(0), num_runs(0), runs(0)
66 {}
67 // data decoding machinery
68 ui8* data;
69 ui64 tmp;
70 int bits;
71 int size;
72 bool unstuff;
73 int k;
74
75 // queue of decoded runs
76 int num_runs;
77 ui64 runs;
78 };
79
80 //************************************************************************/
92 static inline
93 void mel_read(dec_mel_st *melp)
94 {
95 if (melp->bits > 32) //there are enough bits in the tmp variable
96 return; // return without reading new data
97
98 ui32 val = 0xFFFFFFFF; // feed in 0xFF if buffer is exhausted
99 if (melp->size > 4) { // if there is data in the MEL segment
100 val = *(ui32*)melp->data; // read 32 bits from MEL data
101 melp->data += 4; // advance pointer
102 melp->size -= 4; // reduce counter
103 }
104 else if (melp->size > 0)
105 { // 4 or less
106 int i = 0;
107 while (melp->size > 1) {
108 ui32 v = *melp->data++; // read one byte at a time
109 ui32 m = ~(0xFFu << i); // mask of location
110 val = (val & m) | (v << i);// put one byte in its correct location
111 --melp->size;
112 i += 8;
113 }
114 // size equal to 1
115 ui32 v = *melp->data++; // the one before the last is different
116 v |= 0xF; // MEL and VLC segments can overlap
117 ui32 m = ~(0xFFu << i);
118 val = (val & m) | (v << i);
119 --melp->size;
120 }
121
122 // next we unstuff them before adding them to the buffer
123 int bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
124 // the previously read byte requires
125 // unstuffing
126
127 // data is unstuffed and accumulated in t
128 // bits has the number of bits in t
129 ui32 t = val & 0xFF;
130 bool unstuff = ((val & 0xFF) == 0xFF); // true if we need unstuffing
131 bits -= unstuff; // there is one less bit in t if unstuffing is needed
132 t = t << (8 - unstuff); // move up to make room for the next byte
133
134 //this is a repeat of the above
135 t |= (val>>8) & 0xFF;
136 unstuff = (((val >> 8) & 0xFF) == 0xFF);
137 bits -= unstuff;
138 t = t << (8 - unstuff);
139
140 t |= (val>>16) & 0xFF;
141 unstuff = (((val >> 16) & 0xFF) == 0xFF);
142 bits -= unstuff;
143 t = t << (8 - unstuff);
144
145 t |= (val>>24) & 0xFF;
146 melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
147
148 // move t to tmp, and push the result all the way up, so we read from
149 // the MSB
150 melp->tmp |= ((ui64)t) << (64 - bits - melp->bits);
151 melp->bits += bits; //increment the number of bits in tmp
152 }
153
154 //************************************************************************/
169 static inline
171 {
172 static const int mel_exp[13] = { //MEL exponents
173 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
174 };
175
176 if (melp->bits < 6) // if there are less than 6 bits in tmp
177 mel_read(melp); // then read from the MEL bitstream
178 // 6 bits is the largest decodable MEL cwd
179
180 //repeat so long that there is enough decodable bits in tmp,
181 // and the runs store is not full (num_runs < 8)
182 while (melp->bits >= 6 && melp->num_runs < 8)
183 {
184 int eval = mel_exp[melp->k]; // number of bits associated with state
185 int run = 0;
186 if (melp->tmp & (1ull<<63)) //The next bit to decode (stored in MSB)
187 { //one is found
188 run = 1 << eval;
189 run--; // consecutive runs of 0 events - 1
190 melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
191 melp->tmp <<= 1; // consume one bit from tmp
192 melp->bits -= 1;
193 run = run << 1; // a stretch of zeros not terminating in one
194 }
195 else
196 { //0 is found
197 run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
198 melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
199 melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
200 melp->bits -= eval + 1;
201 run = (run << 1) + 1; // a stretch of zeros terminating with one
202 }
203 eval = melp->num_runs * 7; // 7 bits per run
204 melp->runs &= ~((ui64)0x3F << eval); // 6 bits are sufficient
205 melp->runs |= ((ui64)run) << eval; // store the value in runs
206 melp->num_runs++; // increment count
207 }
208 }
209
210 //************************************************************************/
220 static inline
221 void mel_init(dec_mel_st *melp, ui8* bbuf, int lcup, int scup)
222 {
223 melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
224 melp->bits = 0; // 0 bits in tmp
225 melp->tmp = 0; //
226 melp->unstuff = false; // no unstuffing
227 melp->size = scup - 1; // size is the length of MEL+VLC-1
228 melp->k = 0; // 0 for state
229 melp->num_runs = 0; // num_runs is 0
230 melp->runs = 0; //
231
232 //This code is borrowed; original is for a different architecture
233 //These few lines take care of the case where data is not at a multiple
234 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MEL segment
235 int num = 4 - (int)(intptr_t(melp->data) & 0x3);
236 for (int i = 0; i < num; ++i) { // this code is similar to mel_read
237 assert(melp->unstuff == false || melp->data[0] <= 0x8F);
238 ui64 d = (melp->size > 0) ? *melp->data : 0xFF;//if buffer is consumed
239 //set data to 0xFF
240 if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
241 // see the standard
242 melp->data += melp->size-- > 0; //increment if the end is not reached
243 int d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
244 melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
245 melp->bits += d_bits; //increment tmp by number of bits
246 melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs
247 //unstuffing
248 }
249 melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
250 // is the MSB
251 }
252
253 //************************************************************************/
259 static inline
261 {
262 if (melp->num_runs == 0) //if no runs, decode more bit from MEL segment
263 mel_decode(melp);
264
265 int t = melp->runs & 0x7F; //retrieve one run
266 melp->runs >>= 7; // remove the retrieved run
267 melp->num_runs--;
268 return t; // return run
269 }
270
271 //************************************************************************/
275 struct rev_struct {
276 rev_struct() : data(NULL), tmp(0), bits(0), size(0), unstuff(false)
277 {}
278 //storage
279 ui8* data;
280 ui64 tmp;
281 ui32 bits;
282 int size;
283 bool unstuff;
285 };
286
287 //************************************************************************/
304 static inline
306 {
307 // process 1 bytes
308 ui8 val = 0; // insert 0s at the end -- the standard says that the
309 // bitstream must contain all needed bits. Therefore
310 // if the whole bitstream is consumed and bits are still
311 // needed, then this is an error condition, but we are
312 // lenient -- it is also possible that we are decoding
313 // more bits than what we are actually need.
314 if (vlcp->size > 0) // if there are more than 3 bytes left in VLC
315 {
316 val = *vlcp->data; // then read 8 bits
317 --vlcp->data; // increment data pointer
318 --vlcp->size; // decrement number of bytes in the buffer
319 }
320
321 // accumulate in tmp, and increment bits, check if unstuffing is needed
322 ui8 t = (vlcp->unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0;
323 val = (ui8)(val & (0xFFU >> t)); // protect against erroneous 1 in MSB
324 vlcp->tmp |= (ui64)val << vlcp->bits;
325 vlcp->bits += 8 - t;
326 vlcp->unstuff = val > 0x8F;
327 }
328
329 //************************************************************************/
342 static inline
343 void rev_init8(rev_struct *vlcp, ui8* data, int lcup, int scup)
344 {
345 //first byte has only the upper 4 bits
346 vlcp->data = data + lcup - 2;
347
348 //size can not be larger than this, in fact it should be smaller
349 vlcp->size = scup - 2;
350
351 ui8 val = *vlcp->data--; // read one byte (this is a half byte)
352
353 // the first byte is treated different to other bytes, because only
354 // the MSB nibble is part of the VLC code.
355 val = (ui8)(val >> 4);
356 ui8 t = ((val & 0x7) == 0x7) ? 1 : 0; // unstuffing is needed
357 val = (ui8)(val & (0xFU >> t)); // protect against erroneous 1 in MSB
358 vlcp->tmp = val;
359 vlcp->bits = 4 - t;
360 vlcp->unstuff = val > 0x8; //this is useful for the next byte
361 }
362
363 //************************************************************************/
370 static inline
372 {
373 while (vlcp->bits <= 56)
374 rev_read8(vlcp); // read 8 bits, but unstuffing might reduce this
375 return vlcp->tmp; // return unstuff decoded bits
376 }
377
378 //************************************************************************/
384 static inline
386 {
387 assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
388 vlcp->tmp >>= num_bits; // remove bits
389 vlcp->bits -= num_bits; // decrement the number of bits
390 return vlcp->tmp;
391 }
392
393 //************************************************************************/
404 static inline
406 {
407 //process 4 bytes at a time
408 if (mrp->bits > 32)
409 return;
410 ui32 val = 0;
411 if (mrp->size > 3) // If there are 3 byte or more
412 { // (mrp->data - 3) move pointer back to read 32 bits at once
413 val = *(ui32*)(mrp->data - 3); // read 32 bits
414 mrp->data -= 4; // move back pointer
415 mrp->size -= 4; // reduce count
416 }
417 else if (mrp->size > 0)
418 {
419 int i = 24;
420 while (mrp->size > 0) {
421 ui32 v = *mrp->data--; // read one byte at a time
422 val |= (v << i); // put byte in its correct location
423 --mrp->size;
424 i -= 8;
425 }
426 }
427
428 //accumulate in tmp, and keep count in bits
429 ui32 bits, tmp = val >> 24;
430
431 //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
432 bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
433 bool unstuff = (val >> 24) > 0x8F;
434
435 //process the next byte
436 tmp |= ((val >> 16) & 0xFF) << bits;
437 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
438 unstuff = ((val >> 16) & 0xFF) > 0x8F;
439
440 tmp |= ((val >> 8) & 0xFF) << bits;
441 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
442 unstuff = ((val >> 8) & 0xFF) > 0x8F;
443
444 tmp |= (val & 0xFF) << bits;
445 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
446 unstuff = (val & 0xFF) > 0x8F;
447
448 mrp->tmp |= (ui64)tmp << mrp->bits; // move data to mrp pointer
449 mrp->bits += bits;
450 mrp->unstuff = unstuff; // next byte
451 }
452
453 //************************************************************************/
468 static inline
469 void rev_init_mrp(rev_struct *mrp, ui8* data, int lcup, int len2)
470 {
471 mrp->data = data + lcup + len2 - 1;
472 mrp->size = len2;
473 mrp->unstuff = true;
474 mrp->bits = 0;
475 mrp->tmp = 0;
476
477 //This code is designed for an architecture that read address should
478 // align to the read size (address multiple of 4 if read size is 4)
479 //These few lines take care of the case where data is not at a multiple
480 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MRP stream
481 int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
482 for (int i = 0; i < num; ++i) {
483 ui64 d;
484 //read a byte, 0 if no more data
485 d = (mrp->size-- > 0) ? *mrp->data-- : 0;
486 //check if unstuffing is needed
487 ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
488 mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
489 mrp->bits += d_bits;
490 mrp->unstuff = d > 0x8F; // for next byte
491 }
492 rev_read_mrp(mrp);
493 }
494
495 //************************************************************************/
502 static inline
504 {
505 if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
506 {
507 rev_read_mrp(mrp); // read 30-32 bits from mrp
508 if (mrp->bits < 32) // if there is a space of 32 bits
509 rev_read_mrp(mrp); // read more
510 }
511 return (ui32)mrp->tmp; // return the head of mrp->tmp
512 }
513
514 //************************************************************************/
520 static inline
522 {
523 assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
524 mrp->tmp >>= num_bits; // discard the lowest num_bits bits
525 mrp->bits -= num_bits;
526 return (ui32)mrp->tmp; // return data after consumption
527 }
528
529 //************************************************************************/
540
541 //************************************************************************/
559 template<int X>
560 static inline
562 {
563 assert(msp->bits <= 32); // assert that there is a space for 32 bits
564
565 ui32 val = 0;
566 if (msp->size > 3) {
567 val = *(ui32*)msp->data; // read 32 bits
568 msp->data += 4; // increment pointer
569 msp->size -= 4; // reduce size
570 }
571 else if (msp->size > 0)
572 {
573 int i = 0;
574 val = X != 0 ? 0xFFFFFFFFu : 0;
575 while (msp->size > 0) {
576 ui32 v = *msp->data++; // read one byte at a time
577 ui32 m = ~(0xFFu << i); // mask of location
578 val = (val & m) | (v << i);// put one byte in its correct location
579 --msp->size;
580 i += 8;
581 }
582 }
583 else
584 val = X != 0 ? 0xFFFFFFFFu : 0;
585
586 // we accumulate in t and keep a count of the number of bits in bits
587 ui32 bits = 8 - msp->unstuff;
588 ui32 t = val & 0xFF;
589 bool unstuff = ((val & 0xFF) == 0xFF); // Do we need unstuffing next?
590
591 t |= ((val >> 8) & 0xFF) << bits;
592 bits += 8 - unstuff;
593 unstuff = (((val >> 8) & 0xFF) == 0xFF);
594
595 t |= ((val >> 16) & 0xFF) << bits;
596 bits += 8 - unstuff;
597 unstuff = (((val >> 16) & 0xFF) == 0xFF);
598
599 t |= ((val >> 24) & 0xFF) << bits;
600 bits += 8 - unstuff;
601 msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
602
603 msp->tmp |= ((ui64)t) << msp->bits; // move data to msp->tmp
604 msp->bits += bits;
605 }
606
607 //************************************************************************/
623 template<ui8 X>
624 static inline
626 {
627 ui8 val = X;
628 if (msp->size > 0) {
629 val = *msp->data; // read 8 bits
630 ++msp->data; // increment pointer
631 --msp->size; // reduce size
632 }
633
634 // unstuff and accumulate
635 ui8 t = msp->unstuff ? 1 : 0;
636 val = (ui8)(val & (0xFFU >> t));
637 msp->unstuff = (val == 0xFF);
638 msp->tmp |= ((ui64)val) << msp->bits; // move data to msp->tmp
639 msp->bits += 8 - t;
640 }
641
642 //************************************************************************/
651 template<int X>
652 static inline
653 void frwd_init(frwd_struct64 *msp, const ui8* data, int size)
654 {
655 msp->data = data;
656 msp->tmp = 0;
657 msp->bits = 0;
658 msp->unstuff = 0;
659 msp->size = size;
660
661 //This code is designed for an architecture that read address should
662 // align to the read size (address multiple of 4 if read size is 4)
663 //These few lines take care of the case where data is not at a multiple
664 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the bitstream
665 int num = 4 - (int)(intptr_t(msp->data) & 0x3);
666 for (int i = 0; i < num; ++i)
667 {
668 ui64 d;
669 //read a byte if the buffer is not exhausted, otherwise set it to X
670 d = msp->size-- > 0 ? *msp->data++ : X;
671 msp->tmp |= (d << msp->bits); // store data in msp->tmp
672 msp->bits += 8 - msp->unstuff; // number of bits added to msp->tmp
673 msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
674 }
675 frwd_read<X>(msp); // read 32 bits more
676 }
677
678 //************************************************************************/
687 template<ui8 X>
688 static inline
689 void frwd_init8(frwd_struct64 *msp, const ui8* data, int size)
690 {
691 msp->data = data;
692 msp->tmp = 0;
693 msp->bits = 0;
694 msp->unstuff = 0;
695 msp->size = size;
696 frwd_read8<X>(msp); // read 8 bits
697 }
698
699 //************************************************************************/
705 static inline
706 void frwd_advance(frwd_struct64 *msp, ui32 num_bits)
707 {
708 assert(num_bits <= msp->bits);
709 msp->tmp >>= num_bits; // consume num_bits
710 msp->bits -= num_bits;
711 }
712
713 //************************************************************************/
720 template<int X>
721 static inline
723 {
724 if (msp->bits < 32)
725 {
726 frwd_read<X>(msp);
727 if (msp->bits < 32) //need to test
728 frwd_read<X>(msp);
729 }
730 return (ui32)msp->tmp;
731 }
732
733 //************************************************************************/
740 template<ui8 X>
741 static inline
743 {
744 while (msp->bits <= 56)
745 frwd_read8<X>(msp);
746 return msp->tmp;
747 }
748
749 //************************************************************************/
766 bool ojph_decode_codeblock64(ui8* coded_data, ui64* decoded_data,
767 ui32 missing_msbs, ui32 num_passes,
768 ui32 lengths1, ui32 lengths2,
769 ui32 width, ui32 height, ui32 stride,
770 bool stripe_causal)
771 {
772 // static bool insufficient_precision = false;
773 // static bool modify_code = false;
774 // static bool truncate_spp_mrp = false;
775
776 if (num_passes > 1 && lengths2 == 0)
777 {
778 OJPH_WARN(0x00010001, "A malformed codeblock that has more than "
779 "one coding pass, but zero length for "
780 "2nd and potential 3rd pass.");
781 num_passes = 1;
782 }
783
784 if (num_passes > 3)
785 {
786 OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; "
787 "This codeblocks has %d passes.",
788 num_passes);
789 return false;
790 }
791
792 // if (missing_msbs > 30) // p < 0
793 // {
794 // if (insufficient_precision == false)
795 // {
796 // insufficient_precision = true;
797 // OJPH_WARN(0x00010003, "32 bits are not enough to decode this "
798 // "codeblock. This message will not be "
799 // "displayed again.");
800 // }
801 // return false;
802 // }
803 // else if (missing_msbs == 30) // p == 0
804 // { // not enough precision to decode and set the bin center to 1
805 // if (modify_code == false) {
806 // modify_code = true;
807 // OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup "
808 // "pass. The code can be modified to support "
809 // "this case. This message will not be "
810 // "displayed again.");
811 // }
812 // return false; // 32 bits are not enough to decode this
813 // }
814 // else if (missing_msbs == 29) // if p is 1, then num_passes must be 1
815 // {
816 // if (num_passes > 1) {
817 // num_passes = 1;
818 // if (truncate_spp_mrp == false) {
819 // truncate_spp_mrp = true;
820 // OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp "
821 // "nor MagRef passes; both will be skipped. "
822 // "This message will not be displayed "
823 // "again.");
824 // }
825 // }
826 // }
827 ui32 p = 62 - missing_msbs; // The least significant bitplane for CUP
828 // There is a way to handle the case of p == 0, but a different path
829 // is required
830
831 if (lengths1 < 2)
832 {
833 OJPH_WARN(0x00010006, "Wrong codeblock length.");
834 return false;
835 }
836
837 // read scup and fix the bytes there
838 int lcup, scup;
839 lcup = (int)lengths1; // length of CUP
840 //scup is the length of MEL + VLC
841 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
842 if (scup < 2 || scup > lcup || scup > 4079) //something is wrong
843 return false;
844
845 // The temporary storage scratch holds two types of data in an
846 // interleaved fashion. The interleaving allows us to use one
847 // memory pointer.
848 // We have one entry for a decoded VLC code, and one entry for UVLC.
849 // Entries are 16 bits each, corresponding to one quad,
850 // but since we want to use XMM registers of the SSE family
851 // of SIMD; we allocated 16 bytes or more per quad row; that is,
852 // the width is no smaller than 16 bytes (or 8 entries), and the
853 // height is 512 quads
854 // Each VLC entry contains, in the following order, starting
855 // from MSB
856 // e_k (4bits), e_1 (4bits), rho (4bits), useless for step 2 (4bits)
857 // Each entry in UVLC contains u_q
858 // One extra row to handle the case of SPP propagating downwards
859 // when codeblock width is 4
860 ui16 scratch[8 * 513] = {0}; // 8 kB
861
862 // We need an extra two entries (one inf and one u_q) beyond
863 // the last column.
864 // If the block width is 4 (2 quads), then we use sstr of 8
865 // (enough for 4 quads). If width is 8 (4 quads) we use
866 // sstr is 16 (enough for 8 quads). For a width of 16 (8
867 // quads), we use 24 (enough for 12 quads).
868 ui32 sstr = ((width + 2u) + 7u) & ~7u; // multiples of 8
869
870 ui32 mmsbp2 = missing_msbs + 2;
871
872 // The cleanup pass is decoded in two steps; in step one,
873 // the VLC and MEL segments are decoded, generating a record that
874 // has 2 bytes per quad. The 2 bytes contain, u, rho, e^1 & e^k.
875 // This information should be sufficient for the next step.
876 // In step 2, we decode the MagSgn segment.
877
878 // step 1 decoding VLC and MEL segments
879 {
880 // init structures
881 dec_mel_st mel;
882 mel_init(&mel, coded_data, lcup, scup);
883 rev_struct vlc;
884 rev_init8(&vlc, coded_data, lcup, scup);
885
886 int run = mel_get_run(&mel); // decode runs of events from MEL bitstrm
887 // data represented as runs of 0 events
888 // See mel_decode description
889
890 ui64 vlc_val;
891 ui32 c_q = 0;
892 ui16 *sp = scratch;
893 //initial quad row
894 for (ui32 x = 0; x < width; sp += 4)
895 {
896 // decode VLC
898
899 // first quad
900 vlc_val = rev_fetch64(&vlc);
901
902 //decode VLC using the context c_q and the head of VLC bitstream
903 ui16 t0 = vlc_tbl0[ c_q + (vlc_val & 0x7F) ];
904
905 // if context is zero, use one MEL event
906 if (c_q == 0) //zero context
907 {
908 run -= 2; //subtract 2, since events number if multiplied by 2
909
910 // Is the run terminated in 1? if so, use decoded VLC code,
911 // otherwise, discard decoded data, since we will decoded again
912 // using a different context
913 t0 = (run == -1) ? t0 : 0;
914
915 // is run -1 or -2? this means a run has been consumed
916 if (run < 0)
917 run = mel_get_run(&mel); // get another run
918 }
919 //run -= (c_q == 0) ? 2 : 0;
920 //t0 = (c_q != 0 || run == -1) ? t0 : 0;
921 //if (run < 0)
922 // run = mel_get_run(&mel); // get another run
923 sp[0] = t0;
924 x += 2;
925
926 // prepare context for the next quad; eqn. 1 in ITU T.814
927 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
928
929 //remove data from vlc stream (0 bits are removed if vlc is not used)
930 vlc_val = rev_advance64(&vlc, t0 & 0x7);
931
932 //second quad
933 ui16 t1 = 0;
934
935 //decode VLC using the context c_q and the head of VLC bitstream
936 t1 = vlc_tbl0[c_q + (vlc_val & 0x7F)];
937
938 // if context is zero, use one MEL event
939 if (c_q == 0 && x < width) //zero context
940 {
941 run -= 2; //subtract 2, since events number if multiplied by 2
942
943 // if event is 0, discard decoded t1
944 t1 = (run == -1) ? t1 : 0;
945
946 if (run < 0) // have we consumed all events in a run
947 run = mel_get_run(&mel); // if yes, then get another run
948 }
949 t1 = x < width ? t1 : 0;
950 //run -= (c_q == 0 && x < width) ? 2 : 0;
951 //t1 = (c_q != 0 || run == -1) ? t1 : 0;
952 //if (run < 0)
953 // run = mel_get_run(&mel); // get another run
954 sp[2] = t1;
955 x += 2;
956
957 //prepare context for the next quad, eqn. 1 in ITU T.814
958 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
959
960 //remove data from vlc stream, if qinf is not used, cwdlen is 0
961 vlc_val = rev_advance64(&vlc, t1 & 0x7);
962
963 // decode u
965 // uvlc_mode is made up of u_offset bits from the quad pair
966 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
967 if (uvlc_mode == 0xc0)// if both u_offset are set, get an event from
968 { // the MEL run of events
969 run -= 2; //subtract 2, since events number if multiplied by 2
970
971 uvlc_mode += (run == -1) ? 0x40 : 0; // increment uvlc_mode by
972 // is 0x40
973
974 if (run < 0)//if run is consumed (run is -1 or -2), get another run
975 run = mel_get_run(&mel);
976 }
977 //run -= (uvlc_mode == 0xc0) ? 2 : 0;
978 //uvlc_mode += (uvlc_mode == 0xc0 && run == -1) ? 0x40 : 0;
979 //if (run < 0)
980 // run = mel_get_run(&mel); // get another run
981
982 //decode uvlc_mode to get u for both quads
983 ui32 idx = uvlc_mode + (ui32)(vlc_val & 0x3F);
984 ui32 uvlc_entry = uvlc_tbl0[idx];
985 ui16 u_bias = uvlc_bias[idx];
986 //remove total prefix length
987 vlc_val = rev_advance64(&vlc, uvlc_entry & 0x7);
988 uvlc_entry >>= 3;
989 //extract suffixes for quad 0 and 1
990 ui32 len = uvlc_entry & 0xF; // suffix length for 2 quads
991 ui32 tmp = (ui32)(vlc_val&((1<<len)-1)); // suffix value for 2 quads
992 vlc_val = rev_advance64(&vlc, len);
993 uvlc_entry >>= 4;
994 // quad 0 length
995 len = uvlc_entry & 0x7; // quad 0 suffix length
996 uvlc_entry >>= 3;
997 ui16 u_q0 = (ui16)((uvlc_entry & 7) + (tmp & ~(0xFFU << len)));
998 ui16 u_q1 = (ui16)((uvlc_entry >> 3) + (tmp >> len));
999
1000 // decode u_q extensions, which is needed only when u_q > 32
1001 ui16 u_ext; bool cond0, cond1;
1002 cond0 = u_q0 - (u_bias & 0x3) > 32;
1003 u_ext = (ui16)(cond0 ? (vlc_val & 0xF) : 0);
1004 vlc_val = rev_advance64(&vlc, cond0 ? 4 : 0);
1005 u_q0 = (ui16)(u_q0 + (u_ext << 2));
1006 sp[1] = (ui16)(u_q0 + 1); // kappa = 1
1007 cond1 = u_q1 - (u_bias >> 2) > 32;
1008 u_ext = (ui16)(cond1 ? (vlc_val & 0xF) : 0);
1009 vlc_val = rev_advance64(&vlc, cond1 ? 4 : 0);
1010 u_q1 = (ui16)(u_q1 + (u_ext << 2));
1011 sp[3] = (ui16)(u_q1 + 1); // kappa = 1
1012 }
1013 sp[0] = sp[1] = 0;
1014
1015 //non initial quad rows
1016 for (ui32 y = 2; y < height; y += 2)
1017 {
1018 c_q = 0; // context
1019 ui16 *sp = scratch + (y >> 1) * sstr; // this row of quads
1020
1021 for (ui32 x = 0; x < width; sp += 4)
1022 {
1023 // decode VLC
1025
1026 // sigma_q (n, ne, nf)
1027 c_q |= ((sp[0 - (si32)sstr] & 0xA0U) << 2);
1028 c_q |= ((sp[2 - (si32)sstr] & 0x20U) << 4);
1029
1030 // first quad
1031 vlc_val = rev_fetch64(&vlc);
1032
1033 //decode VLC using the context c_q and the head of VLC bitstream
1034 ui16 t0 = vlc_tbl1[ c_q + (vlc_val & 0x7F) ];
1035
1036 // if context is zero, use one MEL event
1037 if (c_q == 0) //zero context
1038 {
1039 run -= 2; //subtract 2, since events number is multiplied by 2
1040
1041 // Is the run terminated in 1? if so, use decoded VLC code,
1042 // otherwise, discard decoded data, since we will decoded again
1043 // using a different context
1044 t0 = (run == -1) ? t0 : 0;
1045
1046 // is run -1 or -2? this means a run has been consumed
1047 if (run < 0)
1048 run = mel_get_run(&mel); // get another run
1049 }
1050 //run -= (c_q == 0) ? 2 : 0;
1051 //t0 = (c_q != 0 || run == -1) ? t0 : 0;
1052 //if (run < 0)
1053 // run = mel_get_run(&mel); // get another run
1054 sp[0] = t0;
1055 x += 2;
1056
1057 // prepare context for the next quad; eqn. 2 in ITU T.814
1058 // sigma_q (w, sw)
1059 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1060 // sigma_q (nw)
1061 c_q |= sp[0 - (si32)sstr] & 0x80;
1062 // sigma_q (n, ne, nf)
1063 c_q |= ((sp[2 - (si32)sstr] & 0xA0U) << 2);
1064 c_q |= ((sp[4 - (si32)sstr] & 0x20U) << 4);
1065
1066 //remove data from vlc stream (0 bits are removed if vlc is unused)
1067 vlc_val = rev_advance64(&vlc, t0 & 0x7);
1068
1069 //second quad
1070 ui16 t1 = 0;
1071
1072 //decode VLC using the context c_q and the head of VLC bitstream
1073 t1 = vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1074
1075 // if context is zero, use one MEL event
1076 if (c_q == 0 && x < width) //zero context
1077 {
1078 run -= 2; //subtract 2, since events number if multiplied by 2
1079
1080 // if event is 0, discard decoded t1
1081 t1 = (run == -1) ? t1 : 0;
1082
1083 if (run < 0) // have we consumed all events in a run
1084 run = mel_get_run(&mel); // if yes, then get another run
1085 }
1086 t1 = x < width ? t1 : 0;
1087 //run -= (c_q == 0 && x < width) ? 2 : 0;
1088 //t1 = (c_q != 0 || run == -1) ? t1 : 0;
1089 //if (run < 0)
1090 // run = mel_get_run(&mel); // get another run
1091 sp[2] = t1;
1092 x += 2;
1093
1094 // partial c_q, will be completed when we process the next quad
1095 // sigma_q (w, sw)
1096 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1097 // sigma_q (nw)
1098 c_q |= sp[2 - (si32)sstr] & 0x80;
1099
1100 //remove data from vlc stream, if qinf is not used, cwdlen is 0
1101 vlc_val = rev_advance64(&vlc, t1 & 0x7);
1102
1103 // decode u
1105 // uvlc_mode is made up of u_offset bits from the quad pair
1106 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1107 ui32 uvlc_entry = uvlc_tbl1[uvlc_mode + (vlc_val & 0x3F)];
1108 //remove total prefix length
1109 vlc_val = rev_advance64(&vlc, uvlc_entry & 0x7);
1110 uvlc_entry >>= 3;
1111 //extract suffixes for quad 0 and 1
1112 ui32 len = uvlc_entry & 0xF; //suffix length for 2 quads
1113 ui32 tmp = (ui32)(vlc_val&((1<<len)-1)); //suffix value for 2 quads
1114 vlc_val = rev_advance64(&vlc, len);
1115 uvlc_entry >>= 4;
1116 // quad 0 length
1117 len = uvlc_entry & 0x7; // quad 0 suffix length
1118 uvlc_entry >>= 3;
1119 ui16 u_q0 = (ui16)((uvlc_entry & 7) + (tmp & ~(0xFFU << len)));
1120 ui16 u_q1 = (ui16)((uvlc_entry >> 3) + (tmp >> len)); // u_q
1121
1122 // decode u_q extensions, which is needed only when u_q > 32
1123 ui16 u_ext; bool cond0, cond1;
1124 cond0 = u_q0 > 32;
1125 u_ext = (ui16)(cond0 ? (vlc_val & 0xF) : 0);
1126 vlc_val = rev_advance64(&vlc, cond0 ? 4 : 0);
1127 u_q0 = (ui16)(u_q0 + (u_ext << 2));
1128 sp[1] = u_q0;
1129 cond1 = u_q1 > 32;
1130 u_ext = (ui16)(cond1 ? (vlc_val & 0xF) : 0);
1131 vlc_val = rev_advance64(&vlc, cond1 ? 4 : 0);
1132 u_q1 = (ui16)(u_q1 + (u_ext << 2));
1133 sp[3] = u_q1;
1134 }
1135 sp[0] = sp[1] = 0;
1136 }
1137 }
1138
1139 // step2 we decode magsgn
1140 {
1141 // We allocate a scratch row for storing v_n values.
1142 // We have 512 quads horizontally.
1143 // We need an extra entry to handle the case of vp[1]
1144 // when vp is at the last column.
1145 // Here, we allocate 4 instead of 1 to make the buffer size
1146 // a multipled of 16 bytes.
1147 const int v_n_size = 512 + 4;
1148 ui64 v_n_scratch[v_n_size] = {0}; // 4+ kB
1149
1150 frwd_struct64 magsgn;
1151 frwd_init8<0xFF>(&magsgn, coded_data, lcup - scup);
1152
1153 const ui16 *sp = scratch;
1154 ui64 *vp = v_n_scratch;
1155 ui64 *dp = decoded_data;
1156
1157 ui64 prev_v_n = 0;
1158 for (ui32 x = 0; x < width; sp += 2, ++vp)
1159 {
1160 ui32 inf = sp[0];
1161 ui32 U_q = sp[1];
1162 if (U_q > mmsbp2)
1163 return false;
1164
1165 ui64 v_n;
1166 ui64 val = 0;
1167 ui32 bit = 0;
1168 if (inf & (1 << (4 + bit)))
1169 {
1170 //get 32 bits of magsgn data
1171 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1172 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1173 frwd_advance(&magsgn, m_n); //consume m_n
1174
1175 val = ms_val << 63; // get sign bit
1176 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1177 v_n |= (ui64)((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1178 v_n |= 1; // add center of bin
1179 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1180 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1181 val |= (v_n + 2) << (p - 1);
1182 }
1183 dp[0] = val;
1184
1185 v_n = 0;
1186 val = 0;
1187 bit = 1;
1188 if (inf & (1 << (4 + bit)))
1189 {
1190 //get 32 bits of magsgn data
1191 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1192 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1193 frwd_advance(&magsgn, m_n); //consume m_n
1194
1195 val = ms_val << 63; // get sign bit
1196 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1197 v_n |= (ui64)((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1198 v_n |= 1; // add center of bin
1199 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1200 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1201 val |= (v_n + 2) << (p - 1);
1202 }
1203 dp[stride] = val;
1204 vp[0] = prev_v_n | v_n;
1205 prev_v_n = 0;
1206 ++dp;
1207 if (++x >= width)
1208 { ++vp; break; }
1209
1210 val = 0;
1211 bit = 2;
1212 if (inf & (1 << (4 + bit)))
1213 {
1214 //get 32 bits of magsgn data
1215 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1216 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1217 frwd_advance(&magsgn, m_n); //consume m_n
1218
1219 val = ms_val << 63; // get sign bit
1220 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1221 v_n |= (ui64)((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1222 v_n |= 1; // add center of bin
1223 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1224 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1225 val |= (v_n + 2) << (p - 1);
1226 }
1227 dp[0] = val;
1228
1229 v_n = 0;
1230 val = 0;
1231 bit = 3;
1232 if (inf & (1 << (4 + bit)))
1233 {
1234 //get 32 bits of magsgn data
1235 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1236 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1237 frwd_advance(&magsgn, m_n); //consume m_n
1238
1239 val = ms_val << 63; // get sign bit
1240 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1241 v_n |= (ui64)((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1242 v_n |= 1; // add center of bin
1243 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1244 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1245 val |= (v_n + 2) << (p - 1);
1246 }
1247 dp[stride] = val;
1248 prev_v_n = v_n;
1249 ++dp;
1250 ++x;
1251 }
1252 vp[0] = prev_v_n;
1253
1254 for (ui32 y = 2; y < height; y += 2)
1255 {
1256 const ui16 *sp = scratch + (y >> 1) * sstr;
1257 ui64 *vp = v_n_scratch;
1258 ui64 *dp = decoded_data + y * stride;
1259
1260 prev_v_n = 0;
1261 for (ui32 x = 0; x < width; sp += 2, ++vp)
1262 {
1263 ui32 inf = sp[0];
1264 ui32 u_q = sp[1];
1265
1266 ui32 gamma = inf & 0xF0; gamma &= gamma - 0x10; //is gamma_q 1?
1267 ui32 emax = 63 - count_leading_zeros(2 | vp[0] | vp[1]); // emax-1
1268 ui32 kappa = gamma ? emax : 1;
1269
1270 ui32 U_q = u_q + kappa;
1271 if (U_q > mmsbp2)
1272 return false;
1273
1274 ui64 v_n;
1275 ui64 val = 0;
1276 ui32 bit = 0;
1277 if (inf & (1 << (4 + bit)))
1278 {
1279 //get 32 bits of magsgn data
1280 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1281 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1282 frwd_advance(&magsgn, m_n); //consume m_n
1283
1284 val = ms_val << 63; // get sign bit
1285 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1286 v_n |= (ui64)((inf >> (8+bit)) & 1) << m_n; // add EMB e_1 as MSB
1287 v_n |= 1; // add center of bin
1288 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1289 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1290 val |= (v_n + 2) << (p - 1);
1291 }
1292 dp[0] = val;
1293
1294 v_n = 0;
1295 val = 0;
1296 bit = 1;
1297 if (inf & (1 << (4 + bit)))
1298 {
1299 //get 32 bits of magsgn data
1300 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1301 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1302 frwd_advance(&magsgn, m_n); //consume m_n
1303
1304 val = ms_val << 63; // get sign bit
1305 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1306 v_n |= (ui64)((inf >> (8+bit)) & 1) << m_n; // add EMB e_1 as MSB
1307 v_n |= 1; // add center of bin
1308 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1309 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1310 val |= (v_n + 2) << (p - 1);
1311 }
1312 dp[stride] = val;
1313 vp[0] = prev_v_n | v_n;
1314 prev_v_n = 0;
1315 ++dp;
1316 if (++x >= width)
1317 { ++vp; break; }
1318
1319 val = 0;
1320 bit = 2;
1321 if (inf & (1 << (4 + bit)))
1322 {
1323 //get 32 bits of magsgn data
1324 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1325 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1326 frwd_advance(&magsgn, m_n); //consume m_n
1327
1328 val = ms_val << 63; // get sign bit
1329 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1330 v_n |= (ui64)((inf >> (8+bit)) & 1) << m_n; // add EMB e_1 as MSB
1331 v_n |= 1; // add center of bin
1332 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1333 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1334 val |= (v_n + 2) << (p - 1);
1335 }
1336 dp[0] = val;
1337
1338 v_n = 0;
1339 val = 0;
1340 bit = 3;
1341 if (inf & (1 << (4 + bit)))
1342 {
1343 //get 32 bits of magsgn data
1344 ui64 ms_val = frwd_fetch64<0xFF>(&magsgn);
1345 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1346 frwd_advance(&magsgn, m_n); //consume m_n
1347
1348 val = ms_val << 63; // get sign bit
1349 v_n = ms_val & ((1ULL << m_n) - 1); // keep only m_n bits
1350 v_n |= (ui64)((inf >> (8+bit)) & 1) << m_n; // add EMB e_1 as MSB
1351 v_n |= 1; // add center of bin
1352 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1353 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1354 val |= (v_n + 2) << (p - 1);
1355 }
1356 dp[stride] = val;
1357 prev_v_n = v_n;
1358 ++dp;
1359 ++x;
1360 }
1361 vp[0] = prev_v_n;
1362 }
1363 }
1364
1365 if (num_passes > 1)
1366 {
1367 // We use scratch again, we can divide it into multiple regions
1368 // sigma holds all the significant samples, and it cannot
1369 // be modified after it is set. it will be used during the
1370 // Magnitude Refinement Pass
1371 ui16* const sigma = scratch;
1372
1373 ui32 mstr = (width + 3u) >> 2; // divide by 4, since each
1374 // ui16 contains 4 columns
1375 mstr = ((mstr + 2u) + 7u) & ~7u; // multiples of 8
1376
1377 // We re-arrange quad significance, where each 4 consecutive
1378 // bits represent one quad, into column significance, where,
1379 // each 4 consequtive bits represent one column of 4 rows
1380 {
1381 ui32 y;
1382 for (y = 0; y < height; y += 4)
1383 {
1384 ui16* sp = scratch + (y >> 1) * sstr;
1385 ui16* dp = sigma + (y >> 2) * mstr;
1386 for (ui32 x = 0; x < width; x += 4, sp += 4, ++dp) {
1387 ui32 t0 = 0, t1 = 0;
1388 t0 = ((sp[0 ] & 0x30u) >> 4) | ((sp[0 ] & 0xC0u) >> 2);
1389 t0 |= ((sp[2 ] & 0x30u) << 4) | ((sp[2 ] & 0xC0u) << 6);
1390 t1 = ((sp[0+sstr] & 0x30u) >> 2) | ((sp[0+sstr] & 0xC0u) );
1391 t1 |= ((sp[2+sstr] & 0x30u) << 6) | ((sp[2+sstr] & 0xC0u) << 8);
1392 dp[0] = (ui16)(t0 | t1);
1393 }
1394 dp[0] = 0; // set an extra entry on the right with 0
1395 }
1396 {
1397 // reset one row after the codeblock
1398 ui16* dp = sigma + (y >> 2) * mstr;
1399 for (ui32 x = 0; x < width; x += 4, ++dp)
1400 dp[0] = 0;
1401 dp[0] = 0; // set an extra entry on the right with 0
1402 }
1403 }
1404
1405 // We perform Significance Propagation Pass here
1406 {
1407 // This stores significance information of the previous
1408 // 4 rows. Significance information in this array includes
1409 // all signicant samples in bitplane p - 1; that is,
1410 // significant samples for bitplane p (discovered during the
1411 // cleanup pass and stored in sigma) and samples that have recently
1412 // became significant (during the SPP) in bitplane p-1.
1413 // We store enough for the widest row, containing 1024 columns,
1414 // which is equivalent to 256 of ui16, since each stores 4 columns.
1415 // We add an extra 8 entries, just in case we need more
1416 ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes
1417
1418 frwd_struct64 sigprop;
1419 frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);
1420
1421 for (ui32 y = 0; y < height; y += 4)
1422 {
1423 ui32 pattern = 0xFFFFu; // a pattern needed samples
1424 if (height - y < 4) {
1425 pattern = 0x7777u;
1426 if (height - y < 3) {
1427 pattern = 0x3333u;
1428 if (height - y < 2)
1429 pattern = 0x1111u;
1430 }
1431 }
1432
1433 // prev holds sign. info. for the previous quad, together
1434 // with the rows on top of it and below it.
1435 ui32 prev = 0;
1436 ui16 *prev_sig = prev_row_sig;
1437 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1438 ui64 *dpp = decoded_data + y * stride;
1439 for (ui32 x = 0; x < width; x += 4, ++cur_sig, ++prev_sig)
1440 {
1441 // only rows and columns inside the stripe are included
1442 si32 s = (si32)x + 4 - (si32)width;
1443 s = ojph_max(s, 0);
1444 pattern = pattern >> (s * 4);
1445
1446 // We first find locations that need to be tested (potential
1447 // SPP members); these location will end up in mbr
1448 // In each iteration, we produce 16 bits because cwd can have
1449 // up to 16 bits of significance information, followed by the
1450 // corresponding 16 bits of sign information; therefore, it is
1451 // sufficient to fetch 32 bit data per loop.
1452
1453 // Althougth we are interested in 16 bits only, we load 32 bits.
1454 // For the 16 bits we are producing, we need the next 4 bits --
1455 // We need data for at least 5 columns out of 8.
1456 // Therefore loading 32 bits is easier than loading 16 bits
1457 // twice.
1458 ui32 ps = *(ui32*)prev_sig;
1459 ui32 ns = *(ui32*)(cur_sig + mstr);
1460 ui32 u = (ps & 0x88888888) >> 3; // the row on top
1461 if (!stripe_causal)
1462 u |= (ns & 0x11111111) << 3; // the row below
1463
1464 ui32 cs = *(ui32*)cur_sig;
1465 // vertical integration
1466 ui32 mbr = cs; // this sig. info.
1467 mbr |= (cs & 0x77777777) << 1; //above neighbors
1468 mbr |= (cs & 0xEEEEEEEE) >> 1; //below neighbors
1469 mbr |= u;
1470 // horizontal integration
1471 ui32 t = mbr;
1472 mbr |= t << 4; // neighbors on the left
1473 mbr |= t >> 4; // neighbors on the right
1474 mbr |= prev >> 12; // significance of previous group
1475
1476 // remove outside samples, and already significant samples
1477 mbr &= pattern;
1478 mbr &= ~cs;
1479
1480 // find samples that become significant during the SPP
1481 ui32 new_sig = mbr;
1482 if (new_sig)
1483 {
1484 ui64 cwd = frwd_fetch<0>(&sigprop);
1485
1486 ui32 cnt = 0;
1487 ui32 col_mask = 0xFu;
1488 ui32 inv_sig = ~cs & pattern;
1489 for (int i = 0; i < 16; i += 4, col_mask <<= 4)
1490 {
1491 if ((col_mask & new_sig) == 0)
1492 continue;
1493
1494 //scan one column
1495 ui32 sample_mask = 0x1111u & col_mask;
1496 if (new_sig & sample_mask)
1497 {
1498 new_sig &= ~sample_mask;
1499 if (cwd & 1)
1500 {
1501 ui32 t = 0x33u << i;
1502 new_sig |= t & inv_sig;
1503 }
1504 cwd >>= 1; ++cnt;
1505 }
1506
1507 sample_mask <<= 1;
1508 if (new_sig & sample_mask)
1509 {
1510 new_sig &= ~sample_mask;
1511 if (cwd & 1)
1512 {
1513 ui32 t = 0x76u << i;
1514 new_sig |= t & inv_sig;
1515 }
1516 cwd >>= 1; ++cnt;
1517 }
1518
1519 sample_mask <<= 1;
1520 if (new_sig & sample_mask)
1521 {
1522 new_sig &= ~sample_mask;
1523 if (cwd & 1)
1524 {
1525 ui32 t = 0xECu << i;
1526 new_sig |= t & inv_sig;
1527 }
1528 cwd >>= 1; ++cnt;
1529 }
1530
1531 sample_mask <<= 1;
1532 if (new_sig & sample_mask)
1533 {
1534 new_sig &= ~sample_mask;
1535 if (cwd & 1)
1536 {
1537 ui32 t = 0xC8u << i;
1538 new_sig |= t & inv_sig;
1539 }
1540 cwd >>= 1; ++cnt;
1541 }
1542 }
1543
1544 if (new_sig)
1545 {
1546 // new_sig has newly-discovered sig. samples during SPP
1547 // find the signs and update decoded_data
1548 ui64 *dp = dpp + x;
1549 ui64 val = 3u << (p - 2);
1550 col_mask = 0xFu;
1551 for (int i = 0; i < 4; ++i, ++dp, col_mask <<= 4)
1552 {
1553 if ((col_mask & new_sig) == 0)
1554 continue;
1555
1556 //scan 4 signs
1557 ui32 sample_mask = 0x1111u & col_mask;
1558 if (new_sig & sample_mask)
1559 {
1560 assert(dp[0] == 0);
1561 dp[0] = (cwd << 63) | val;
1562 cwd >>= 1; ++cnt;
1563 }
1564
1565 sample_mask += sample_mask;
1566 if (new_sig & sample_mask)
1567 {
1568 assert(dp[stride] == 0);
1569 dp[stride] = (cwd << 63) | val;
1570 cwd >>= 1; ++cnt;
1571 }
1572
1573 sample_mask += sample_mask;
1574 if (new_sig & sample_mask)
1575 {
1576 assert(dp[2 * stride] == 0);
1577 dp[2 * stride] = (cwd << 63) | val;
1578 cwd >>= 1; ++cnt;
1579 }
1580
1581 sample_mask += sample_mask;
1582 if (new_sig & sample_mask)
1583 {
1584 assert(dp[3 * stride] == 0);
1585 dp[3 * stride] = (cwd << 63) | val;
1586 cwd >>= 1; ++cnt;
1587 }
1588 }
1589 }
1590 frwd_advance(&sigprop, cnt);
1591 }
1592
1593 new_sig |= cs;
1594 *prev_sig = (ui16)(new_sig);
1595
1596 // vertical integration for the new sig. info.
1597 t = new_sig;
1598 new_sig |= (t & 0x7777) << 1; //above neighbors
1599 new_sig |= (t & 0xEEEE) >> 1; //below neighbors
1600 // add sig. info. from the row on top and below
1601 prev = new_sig | u;
1602 // we need only the bits in 0xF000
1603 prev &= 0xF000;
1604 }
1605 }
1606 }
1607
1608 // We perform Magnitude Refinement Pass here
1609 if (num_passes > 2)
1610 {
1611 rev_struct magref;
1612 rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
1613
1614 for (ui32 y = 0; y < height; y += 4)
1615 {
1616 ui32 *cur_sig = (ui32*)(sigma + (y >> 2) * mstr);
1617 ui64 *dpp = decoded_data + y * stride;
1618 ui64 half = 1ULL << (p - 2);
1619 for (ui32 i = 0; i < width; i += 8)
1620 {
1621 //Process one entry from sigma array at a time
1622 // Each nibble (4 bits) in the sigma array represents 4 rows,
1623 // and the 32 bits contain 8 columns
1624 ui32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
1625 ui32 sig = *cur_sig++; // 32 bit that will be processed now
1626 ui32 col_mask = 0xFu; // a mask for a column in sig
1627 if (sig) // if any of the 32 bits are set
1628 {
1629 for (int j = 0; j < 8; ++j) //one column at a time
1630 {
1631 if (sig & col_mask) // lowest nibble
1632 {
1633 ui64 *dp = dpp + i + j; // next column in decoded samples
1634 ui32 sample_mask = 0x11111111u & col_mask; //LSB
1635
1636 for (int k = 0; k < 4; ++k) {
1637 if (sig & sample_mask) //if LSB is set
1638 {
1639 assert(dp[0] != 0); // decoded value cannot be zero
1640 assert((dp[0] & half) == 0); // no half
1641 ui64 sym = cwd & 1; // get it value
1642 sym = (1 - sym) << (p - 1); // previous center of bin
1643 sym |= half; // put half the center of bin
1644 dp[0] ^= sym; // remove old bin center and put new
1645 cwd >>= 1; // consume word
1646 }
1647 sample_mask += sample_mask; //next row
1648 dp += stride; // next samples row
1649 }
1650 }
1651 col_mask <<= 4; //next column
1652 }
1653 }
1654 // consume data according to the number of bits set
1655 rev_advance_mrp(&magref, population_count(sig));
1656 }
1657 }
1658 }
1659 }
1660 return true;
1661 }
1662 }
1663}
ui8 uvlc_bias[256+64]
uvlc_bias contains decoding info. for initial row of quads
ui16 uvlc_tbl0[256+64]
uvlc_tbl0 contains decoding information for initial row of quads
ui16 uvlc_tbl1[256]
uvlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
static void frwd_init8(frwd_struct64 *msp, const ui8 *data, int size)
Initialize frwd_struct64 struct and reads some bytes.
static void frwd_read8(frwd_struct64 *msp)
Read and unstuffs 8 bits from forward-growing bitstream.
static void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
static void rev_init8(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads the first byte.
static void frwd_advance(frwd_struct32 *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct32.
static void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
static void frwd_read(frwd_struct32 *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
static void rev_read8(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
static ui64 rev_advance64(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static ui64 rev_fetch64(rev_struct *vlcp)
Fills the temporary variable (vlcp->tmp) by up to 64 bits.
static ui64 frwd_fetch64(frwd_struct64 *msp)
Fetches up to 64 bits from the frwd_struct64 bitstream.
static ui32 frwd_fetch(frwd_struct32 *msp)
Fetches 32 bits from the frwd_struct32 bitstream.
static ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
bool ojph_decode_codeblock64(ui8 *coded_data, ui64 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
static void frwd_init(frwd_struct32 *msp, const ui8 *data, int size)
Initialize frwd_struct32 struct and reads some bytes.
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
uint64_t ui64
Definition ojph_defs.h:56
uint16_t ui16
Definition ojph_defs.h:52
static ui32 population_count(ui32 val)
Definition ojph_arch.h:152
static ui32 count_leading_zeros(ui32 val)
Definition ojph_arch.h:173
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54
uint8_t ui8
Definition ojph_defs.h:50
#define ojph_max(a, b)
Definition ojph_defs.h:73
#define OJPH_WARN(t,...)
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
ui8 * data
the address of data (or bitstream)
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
State structure for reading and unstuffing of forward-growing bitstreams; these are: MagSgn and SPP b...
ui32 bits
number of bits stored in tmp
ui64 tmp
temporary buffer of read data
ui32 unstuff
1 if a bit needs to be unstuffed from next byte
const ui8 * data
pointer to bitstream
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui8 * data
pointer to where to read data
ui64 tmp
temporary buffer of read data