OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_transform.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_transform.cpp
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38#include <cstdio>
39
40#include "ojph_arch.h"
41#include "ojph_mem.h"
42#include "ojph_transform.h"
44#include "ojph_params.h"
45#include "../codestream/ojph_params_local.h"
46
47namespace ojph {
48
49 // defined elsewhere
50 class line_buf;
51
52 namespace local {
53
55 // Reversible functions
57
60 (const lifting_step* s, const line_buf* sig, const line_buf* other,
61 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
62
65 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
66 const line_buf* src, ui32 width, bool even) = NULL;
67
70 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
71 const line_buf* hsrc, ui32 width, bool even) = NULL;
72
74 // Irreversible functions
76
79 (const lifting_step* s, const line_buf* sig, const line_buf* other,
80 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
81
84 (float K, const line_buf* aug, ui32 repeat) = NULL;
85
88 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
89 const line_buf* src, ui32 width, bool even) = NULL;
90
93 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
94 const line_buf* hsrc, ui32 width, bool even) = NULL;
95
98
101 {
103 return;
104
105#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
106
110
115
116 #ifndef OJPH_DISABLE_SIMD
117
118 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
119
120 #ifndef OJPH_DISABLE_SSE
122 {
127 }
128 #endif // !OJPH_DISABLE_SSE
129
130 #ifndef OJPH_DISABLE_SSE2
132 {
136 }
137 #endif // !OJPH_DISABLE_SSE2
138
139 #ifndef OJPH_DISABLE_AVX
141 {
146 }
147 #endif // !OJPH_DISABLE_AVX
148
149 #ifndef OJPH_DISABLE_AVX2
151 {
155 }
156 #endif // !OJPH_DISABLE_AVX2
157
158 #if (defined(OJPH_ARCH_X86_64) && !defined(OJPH_DISABLE_AVX512))
160 {
161 // rev_vert_step = avx512_rev_vert_step;
162 // rev_horz_ana = avx512_rev_horz_ana;
163 // rev_horz_syn = avx512_rev_horz_syn;
164
169 }
170 #endif // !OJPH_DISABLE_AVX512
171
172 #elif defined(OJPH_ARCH_ARM)
173
174 #endif // !(defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
175
176 #endif // !OJPH_DISABLE_SIMD
177
178#else // OJPH_ENABLE_WASM_SIMD
182
187#endif // !OJPH_ENABLE_WASM_SIMD
188
190 }
191
193
194#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
195
197 static
198 void gen_rev_vert_step32(const lifting_step* s, const line_buf* sig,
199 const line_buf* other, const line_buf* aug,
200 ui32 repeat, bool synthesis)
201 {
202 const si32 a = s->rev.Aatk;
203 const si32 b = s->rev.Batk;
204 const ui8 e = s->rev.Eatk;
205
206 si32* dst = aug->i32;
207 const si32* src1 = sig->i32, * src2 = other->i32;
208 // The general definition of the wavelet in Part 2 is slightly
209 // different to part 2, although they are mathematically equivalent
210 // here, we identify the simpler form from Part 1 and employ them
211 if (a == 1)
212 { // 5/3 update and any case with a == 1
213 if (synthesis)
214 for (ui32 i = repeat; i > 0; --i)
215 *dst++ -= (b + *src1++ + *src2++) >> e;
216 else
217 for (ui32 i = repeat; i > 0; --i)
218 *dst++ += (b + *src1++ + *src2++) >> e;
219 }
220 else if (a == -1 && b == 1 && e == 1)
221 { // 5/3 predict
222 if (synthesis)
223 for (ui32 i = repeat; i > 0; --i)
224 *dst++ += (*src1++ + *src2++) >> e;
225 else
226 for (ui32 i = repeat; i > 0; --i)
227 *dst++ -= (*src1++ + *src2++) >> e;
228 }
229 else if (a == -1)
230 { // any case with a == -1, which is not 5/3 predict
231 if (synthesis)
232 for (ui32 i = repeat; i > 0; --i)
233 *dst++ -= (b - (*src1++ + *src2++)) >> e;
234 else
235 for (ui32 i = repeat; i > 0; --i)
236 *dst++ += (b - (*src1++ + *src2++)) >> e;
237 }
238 else { // general case
239 if (synthesis)
240 for (ui32 i = repeat; i > 0; --i)
241 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
242 else
243 for (ui32 i = repeat; i > 0; --i)
244 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
245 }
246 }
247
249 static
250 void gen_rev_vert_step64(const lifting_step* s, const line_buf* sig,
251 const line_buf* other, const line_buf* aug,
252 ui32 repeat, bool synthesis)
253 {
254 const si64 a = s->rev.Aatk;
255 const si64 b = s->rev.Batk;
256 const ui8 e = s->rev.Eatk;
257
258 si64* dst = aug->i64;
259 const si64* src1 = sig->i64, * src2 = other->i64;
260 // The general definition of the wavelet in Part 2 is slightly
261 // different to part 2, although they are mathematically equivalent
262 // here, we identify the simpler form from Part 1 and employ them
263 if (a == 1)
264 { // 5/3 update and any case with a == 1
265 if (synthesis)
266 for (ui32 i = repeat; i > 0; --i)
267 *dst++ -= (b + *src1++ + *src2++) >> e;
268 else
269 for (ui32 i = repeat; i > 0; --i)
270 *dst++ += (b + *src1++ + *src2++) >> e;
271 }
272 else if (a == -1 && b == 1 && e == 1)
273 { // 5/3 predict
274 if (synthesis)
275 for (ui32 i = repeat; i > 0; --i)
276 *dst++ += (*src1++ + *src2++) >> e;
277 else
278 for (ui32 i = repeat; i > 0; --i)
279 *dst++ -= (*src1++ + *src2++) >> e;
280 }
281 else if (a == -1)
282 { // any case with a == -1, which is not 5/3 predict
283 if (synthesis)
284 for (ui32 i = repeat; i > 0; --i)
285 *dst++ -= (b - (*src1++ + *src2++)) >> e;
286 else
287 for (ui32 i = repeat; i > 0; --i)
288 *dst++ += (b - (*src1++ + *src2++)) >> e;
289 }
290 else { // general case
291 if (synthesis)
292 for (ui32 i = repeat; i > 0; --i)
293 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
294 else
295 for (ui32 i = repeat; i > 0; --i)
296 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
297 }
298 }
299
301 void gen_rev_vert_step(const lifting_step* s, const line_buf* sig,
302 const line_buf* other, const line_buf* aug,
303 ui32 repeat, bool synthesis)
304 {
305 if (((sig != NULL) && (sig->flags & line_buf::LFT_32BIT)) ||
306 ((aug != NULL) && (aug->flags & line_buf::LFT_32BIT)) ||
307 ((other != NULL) && (other->flags & line_buf::LFT_32BIT)))
308 {
309 assert((sig == NULL || sig->flags & line_buf::LFT_32BIT) &&
310 (other == NULL || other->flags & line_buf::LFT_32BIT) &&
311 (aug == NULL || aug->flags & line_buf::LFT_32BIT));
312 gen_rev_vert_step32(s, sig, other, aug, repeat, synthesis);
313 }
314 else
315 {
316 assert((sig == NULL || sig->flags & line_buf::LFT_64BIT) &&
317 (other == NULL || other->flags & line_buf::LFT_64BIT) &&
318 (aug == NULL || aug->flags & line_buf::LFT_64BIT));
319 gen_rev_vert_step64(s, sig, other, aug, repeat, synthesis);
320 }
321 }
322
324 static
325 void gen_rev_horz_ana32(const param_atk* atk, const line_buf* ldst,
326 const line_buf* hdst, const line_buf* src,
327 ui32 width, bool even)
328 {
329 if (width > 1)
330 {
331 // combine both lsrc and hsrc into dst
332 si32* dph = hdst->i32;
333 si32* dpl = ldst->i32;
334 si32* sp = src->i32;
335 ui32 w = width;
336 if (!even)
337 {
338 *dph++ = *sp++; --w;
339 }
340 for (; w > 1; w -= 2)
341 {
342 *dpl++ = *sp++; *dph++ = *sp++;
343 }
344 if (w)
345 {
346 *dpl++ = *sp++; --w;
347 }
348
349 si32* hp = hdst->i32, * lp = ldst->i32;
350 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
351 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
352 ui32 num_steps = atk->get_num_steps();
353 for (ui32 j = num_steps; j > 0; --j)
354 {
355 // first lifting step
356 const lifting_step* s = atk->get_step(j - 1);
357 const si32 a = s->rev.Aatk;
358 const si32 b = s->rev.Batk;
359 const ui8 e = s->rev.Eatk;
360
361 // extension
362 lp[-1] = lp[0];
363 lp[l_width] = lp[l_width - 1];
364 // lifting step
365 const si32* sp = lp + (even ? 1 : 0);
366 si32* dp = hp;
367 if (a == 1)
368 { // 5/3 update and any case with a == 1
369 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
370 *dp += (b + (sp[-1] + sp[0])) >> e;
371 }
372 else if (a == -1 && b == 1 && e == 1)
373 { // 5/3 predict
374 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
375 *dp -= (sp[-1] + sp[0]) >> e;
376 }
377 else if (a == -1)
378 { // any case with a == -1, which is not 5/3 predict
379 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
380 *dp += (b - (sp[-1] + sp[0])) >> e;
381 }
382 else {
383 // general case
384 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
385 *dp += (b + a * (sp[-1] + sp[0])) >> e;
386 }
387
388 // swap buffers
389 si32* t = lp; lp = hp; hp = t;
390 even = !even;
391 ui32 w = l_width; l_width = h_width; h_width = w;
392 }
393 }
394 else {
395 if (even)
396 ldst->i32[0] = src->i32[0];
397 else
398 hdst->i32[0] = src->i32[0] << 1;
399 }
400 }
401
403 static
404 void gen_rev_horz_ana64(const param_atk* atk, const line_buf* ldst,
405 const line_buf* hdst, const line_buf* src,
406 ui32 width, bool even)
407 {
408 if (width > 1)
409 {
410 // combine both lsrc and hsrc into dst
411 si64* dph = hdst->i64;
412 si64* dpl = ldst->i64;
413 si64* sp = src->i64;
414 ui32 w = width;
415 if (!even)
416 {
417 *dph++ = *sp++; --w;
418 }
419 for (; w > 1; w -= 2)
420 {
421 *dpl++ = *sp++; *dph++ = *sp++;
422 }
423 if (w)
424 {
425 *dpl++ = *sp++; --w;
426 }
427
428 si64* hp = hdst->i64, * lp = ldst->i64;
429 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
430 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
431 ui32 num_steps = atk->get_num_steps();
432 for (ui32 j = num_steps; j > 0; --j)
433 {
434 // first lifting step
435 const lifting_step* s = atk->get_step(j - 1);
436 const si64 a = s->rev.Aatk;
437 const si64 b = s->rev.Batk;
438 const ui8 e = s->rev.Eatk;
439
440 // extension
441 lp[-1] = lp[0];
442 lp[l_width] = lp[l_width - 1];
443 // lifting step
444 const si64* sp = lp + (even ? 1 : 0);
445 si64* dp = hp;
446 if (a == 1)
447 { // 5/3 update and any case with a == 1
448 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
449 *dp += (b + (sp[-1] + sp[0])) >> e;
450 }
451 else if (a == -1 && b == 1 && e == 1)
452 { // 5/3 predict
453 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
454 *dp -= (sp[-1] + sp[0]) >> e;
455 }
456 else if (a == -1)
457 { // any case with a == -1, which is not 5/3 predict
458 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
459 *dp += (b - (sp[-1] + sp[0])) >> e;
460 }
461 else {
462 // general case
463 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
464 *dp += (b + a * (sp[-1] + sp[0])) >> e;
465 }
466
467 // swap buffers
468 si64* t = lp; lp = hp; hp = t;
469 even = !even;
470 ui32 w = l_width; l_width = h_width; h_width = w;
471 }
472 }
473 else {
474 if (even)
475 ldst->i64[0] = src->i64[0];
476 else
477 hdst->i64[0] = src->i64[0] << 1;
478 }
479 }
480
482 void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst,
483 const line_buf* hdst, const line_buf* src,
484 ui32 width, bool even)
485 {
486 if (src->flags & line_buf::LFT_32BIT)
487 {
488 assert((ldst == NULL || ldst->flags & line_buf::LFT_32BIT) &&
489 (hdst == NULL || hdst->flags & line_buf::LFT_32BIT));
490 gen_rev_horz_ana32(atk, ldst, hdst, src, width, even);
491 }
492 else
493 {
494 assert((ldst == NULL || ldst->flags & line_buf::LFT_64BIT) &&
495 (hdst == NULL || hdst->flags & line_buf::LFT_64BIT) &&
496 (src == NULL || src->flags & line_buf::LFT_64BIT));
497 gen_rev_horz_ana64(atk, ldst, hdst, src, width, even);
498 }
499 }
500
502 static
503 void gen_rev_horz_syn32(const param_atk* atk, const line_buf* dst,
504 const line_buf* lsrc, const line_buf* hsrc,
505 ui32 width, bool even)
506 {
507 if (width > 1)
508 {
509 bool ev = even;
510 si32* oth = hsrc->i32, * aug = lsrc->i32;
511 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
512 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
513 ui32 num_steps = atk->get_num_steps();
514 for (ui32 j = 0; j < num_steps; ++j)
515 {
516 const lifting_step* s = atk->get_step(j);
517 const si32 a = s->rev.Aatk;
518 const si32 b = s->rev.Batk;
519 const ui8 e = s->rev.Eatk;
520
521 // extension
522 oth[-1] = oth[0];
523 oth[oth_width] = oth[oth_width - 1];
524 // lifting step
525 const si32* sp = oth + (ev ? 0 : 1);
526 si32* dp = aug;
527 if (a == 1)
528 { // 5/3 update and any case with a == 1
529 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
530 *dp -= (b + (sp[-1] + sp[0])) >> e;
531 }
532 else if (a == -1 && b == 1 && e == 1)
533 { // 5/3 predict
534 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
535 *dp += (sp[-1] + sp[0]) >> e;
536 }
537 else if (a == -1)
538 { // any case with a == -1, which is not 5/3 predict
539 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
540 *dp -= (b - (sp[-1] + sp[0])) >> e;
541 }
542 else {
543 // general case
544 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
545 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
546 }
547
548 // swap buffers
549 si32* t = aug; aug = oth; oth = t;
550 ev = !ev;
551 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
552 }
553
554 // combine both lsrc and hsrc into dst
555 si32* sph = hsrc->i32;
556 si32* spl = lsrc->i32;
557 si32* dp = dst->i32;
558 ui32 w = width;
559 if (!even)
560 {
561 *dp++ = *sph++; --w;
562 }
563 for (; w > 1; w -= 2)
564 {
565 *dp++ = *spl++; *dp++ = *sph++;
566 }
567 if (w)
568 {
569 *dp++ = *spl++; --w;
570 }
571 }
572 else {
573 if (even)
574 dst->i32[0] = lsrc->i32[0];
575 else
576 dst->i32[0] = hsrc->i32[0] >> 1;
577 }
578 }
579
581 static
582 void gen_rev_horz_syn64(const param_atk* atk, const line_buf* dst,
583 const line_buf* lsrc, const line_buf* hsrc,
584 ui32 width, bool even)
585 {
586 if (width > 1)
587 {
588 bool ev = even;
589 si64* oth = hsrc->i64, * aug = lsrc->i64;
590 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
591 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
592 ui32 num_steps = atk->get_num_steps();
593 for (ui32 j = 0; j < num_steps; ++j)
594 {
595 const lifting_step* s = atk->get_step(j);
596 const si64 a = s->rev.Aatk;
597 const si64 b = s->rev.Batk;
598 const ui8 e = s->rev.Eatk;
599
600 // extension
601 oth[-1] = oth[0];
602 oth[oth_width] = oth[oth_width - 1];
603 // lifting step
604 const si64* sp = oth + (ev ? 0 : 1);
605 si64* dp = aug;
606 if (a == 1)
607 { // 5/3 update and any case with a == 1
608 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
609 *dp -= (b + (sp[-1] + sp[0])) >> e;
610 }
611 else if (a == -1 && b == 1 && e == 1)
612 { // 5/3 predict
613 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
614 *dp += (sp[-1] + sp[0]) >> e;
615 }
616 else if (a == -1)
617 { // any case with a == -1, which is not 5/3 predict
618 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
619 *dp -= (b - (sp[-1] + sp[0])) >> e;
620 }
621 else {
622 // general case
623 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
624 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
625 }
626
627 // swap buffers
628 si64* t = aug; aug = oth; oth = t;
629 ev = !ev;
630 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
631 }
632
633 // combine both lsrc and hsrc into dst
634 si64* sph = hsrc->i64;
635 si64* spl = lsrc->i64;
636 si64* dp = dst->i64;
637 ui32 w = width;
638 if (!even)
639 {
640 *dp++ = *sph++; --w;
641 }
642 for (; w > 1; w -= 2)
643 {
644 *dp++ = *spl++; *dp++ = *sph++;
645 }
646 if (w)
647 {
648 *dp++ = *spl++; --w;
649 }
650 }
651 else {
652 if (even)
653 dst->i64[0] = lsrc->i64[0];
654 else
655 dst->i64[0] = hsrc->i64[0] >> 1;
656 }
657 }
658
660 void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst,
661 const line_buf* lsrc, const line_buf* hsrc,
662 ui32 width, bool even)
663 {
664 if (dst->flags & line_buf::LFT_32BIT)
665 {
666 assert((lsrc == NULL || lsrc->flags & line_buf::LFT_32BIT) &&
667 (hsrc == NULL || hsrc->flags & line_buf::LFT_32BIT));
668 gen_rev_horz_syn32(atk, dst, lsrc, hsrc, width, even);
669 }
670 else
671 {
672 assert((dst == NULL || dst->flags & line_buf::LFT_64BIT) &&
673 (lsrc == NULL || lsrc->flags & line_buf::LFT_64BIT) &&
674 (hsrc == NULL || hsrc->flags & line_buf::LFT_64BIT));
675 gen_rev_horz_syn64(atk, dst, lsrc, hsrc, width, even);
676 }
677 }
678
680 void gen_irv_vert_step(const lifting_step* s, const line_buf* sig,
681 const line_buf* other, const line_buf* aug,
682 ui32 repeat, bool synthesis)
683 {
684 float a = s->irv.Aatk;
685
686 if (synthesis)
687 a = -a;
688
689 float* dst = aug->f32;
690 const float* src1 = sig->f32, * src2 = other->f32;
691 for (ui32 i = repeat; i > 0; --i)
692 *dst++ += a * (*src1++ + *src2++);
693 }
694
696 void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat)
697 {
698 float* dst = aug->f32;
699 for (ui32 i = repeat; i > 0; --i)
700 *dst++ *= K;
701 }
702
704 void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst,
705 const line_buf* hdst, const line_buf* src,
706 ui32 width, bool even)
707 {
708 if (width > 1)
709 {
710 // split src into ldst and hdst
711 float* dph = hdst->f32;
712 float* dpl = ldst->f32;
713 float* sp = src->f32;
714 ui32 w = width;
715 if (!even)
716 {
717 *dph++ = *sp++; --w;
718 }
719 for (; w > 1; w -= 2)
720 {
721 *dpl++ = *sp++; *dph++ = *sp++;
722 }
723 if (w)
724 {
725 *dpl++ = *sp++; --w;
726 }
727
728 float* hp = hdst->f32, * lp = ldst->f32;
729 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
730 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
731 ui32 num_steps = atk->get_num_steps();
732 for (ui32 j = num_steps; j > 0; --j)
733 {
734 const lifting_step* s = atk->get_step(j - 1);
735 const float a = s->irv.Aatk;
736
737 // extension
738 lp[-1] = lp[0];
739 lp[l_width] = lp[l_width - 1];
740 // lifting step
741 const float* sp = lp + (even ? 1 : 0);
742 float* dp = hp;
743 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
744 *dp += a * (sp[-1] + sp[0]);
745
746 // swap buffers
747 float* t = lp; lp = hp; hp = t;
748 even = !even;
749 ui32 w = l_width; l_width = h_width; h_width = w;
750 }
751
752 {
753 float K = atk->get_K();
754 float K_inv = 1.0f / K;
755 float* dp;
756
757 dp = lp;
758 for (ui32 i = l_width; i > 0; --i)
759 *dp++ *= K_inv;
760
761 dp = hp;
762 for (ui32 i = h_width; i > 0; --i)
763 *dp++ *= K;
764 }
765 }
766 else {
767 if (even)
768 ldst->f32[0] = src->f32[0];
769 else
770 hdst->f32[0] = src->f32[0] * 2.0f;
771 }
772 }
773
775 void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst,
776 const line_buf* lsrc, const line_buf* hsrc,
777 ui32 width, bool even)
778 {
779 if (width > 1)
780 {
781 bool ev = even;
782 float* oth = hsrc->f32, * aug = lsrc->f32;
783 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
784 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
785
786 {
787 float K = atk->get_K();
788 float K_inv = 1.0f / K;
789 float* dp;
790
791 dp = aug;
792 for (ui32 i = aug_width; i > 0; --i)
793 *dp++ *= K;
794
795 dp = oth;
796 for (ui32 i = oth_width; i > 0; --i)
797 *dp++ *= K_inv;
798 }
799
800 ui32 num_steps = atk->get_num_steps();
801 for (ui32 j = 0; j < num_steps; ++j)
802 {
803 const lifting_step* s = atk->get_step(j);
804 const float a = s->irv.Aatk;
805
806 // extension
807 oth[-1] = oth[0];
808 oth[oth_width] = oth[oth_width - 1];
809 // lifting step
810 const float* sp = oth + (ev ? 0 : 1);
811 float* dp = aug;
812 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
813 *dp -= a * (sp[-1] + sp[0]);
814
815 // swap buffers
816 float* t = aug; aug = oth; oth = t;
817 ev = !ev;
818 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
819 }
820
821 // combine both lsrc and hsrc into dst
822 float* sph = hsrc->f32;
823 float* spl = lsrc->f32;
824 float* dp = dst->f32;
825 ui32 w = width;
826 if (!even)
827 { *dp++ = *sph++; --w; }
828 for (; w > 1; w -= 2)
829 { *dp++ = *spl++; *dp++ = *sph++; }
830 if (w)
831 { *dp++ = *spl++; --w; }
832 }
833 else {
834 if (even)
835 dst->f32[0] = lsrc->f32[0];
836 else
837 dst->f32[0] = hsrc->f32[0] * 0.5f;
838 }
839 }
840
841#endif // !OJPH_ENABLE_WASM_SIMD
842
843 }
844}
float * f32
Definition ojph_mem.h:162
void(* rev_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void gen_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_syn32(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
static void gen_rev_vert_step64(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx512_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_vert_step32(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
static void gen_rev_horz_ana64(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* irv_vert_times_K)(float K, const line_buf *aug, ui32 repeat)
static bool wavelet_transform_functions_initialized
void gen_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void(* irv_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void init_wavelet_transform_functions()
void wasm_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
static void gen_rev_horz_syn64(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void sse_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void wasm_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx512_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void(* irv_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void(* irv_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void wasm_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_ana32(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:138
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:137
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:140
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:132
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:131
int64_t si64
Definition ojph_defs.h:57
OJPH_EXPORT int get_cpu_ext_level()
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54
uint8_t ui8
Definition ojph_defs.h:50
const lifting_step * get_step(ui32 s) const