OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_codestream_wasm.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2022, Aous Naman
6// Copyright (c) 2022, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2022, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_codestream_wasm.cpp
34// Author: Aous Naman
35// Date: 15 May 2022
36//***************************************************************************/
37
38#include <climits>
39#include <cstddef>
40#include <wasm_simd128.h>
41
42#include "ojph_defs.h"
43
44namespace ojph {
45 namespace local {
46
48 void wasm_mem_clear(void* addr, size_t count)
49 {
50 float* p = (float*)addr;
51 v128_t zero = wasm_i32x4_splat(0);
52 for (size_t i = 0; i < count; i += 16, p += 4)
53 wasm_v128_store(p, zero);
54 }
55
58 {
59 v128_t x1, x0 = wasm_v128_load(address);
60 x1 = wasm_i32x4_shuffle(x0, x0, 2, 3, 2, 3); // x1 = x0[2,3,2,3]
61 x0 = wasm_v128_or(x0, x1);
62 x1 = wasm_i32x4_shuffle(x0, x0, 1, 1, 1, 1); // x1 = x0[1,1,1,1]
63 x0 = wasm_v128_or(x0, x1);
64 ui32 t = (ui32)wasm_i32x4_extract_lane(x0, 0);
65 return t;
66 }
67
70 {
71 v128_t x1, x0 = wasm_v128_load(address);
72 x1 = wasm_i64x2_shuffle(x0, x0, 1, 1); // x1 = x0[2,3,2,3]
73 x0 = wasm_v128_or(x0, x1);
74 ui64 t = (ui64)wasm_i64x2_extract_lane(x0, 0);
75 return t;
76 }
77
79 void wasm_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
80 float delta_inv, ui32 count, ui32* max_val)
81 {
82 ojph_unused(delta_inv);
83
84 // convert to sign and magnitude and keep max_val
85 ui32 shift = 31 - K_max;
86 v128_t m0 = wasm_i32x4_splat(INT_MIN);
87 v128_t zero = wasm_i32x4_splat(0);
88 v128_t one = wasm_i32x4_splat(1);
89 v128_t tmax = wasm_v128_load(max_val);
90 si32 *p = (si32*)sp;
91 for ( ; count >= 4; count -= 4, p += 4, dp += 4)
92 {
93 v128_t v = wasm_v128_load(p);
94 v128_t sign = wasm_i32x4_lt(v, zero);
95 v128_t val = wasm_v128_xor(v, sign); // negate 1's complement
96 v128_t ones = wasm_v128_and(sign, one);
97 val = wasm_i32x4_add(val, ones); // 2's complement
98 sign = wasm_v128_and(sign, m0);
99 val = wasm_i32x4_shl(val, shift);
100 tmax = wasm_v128_or(tmax, val);
101 val = wasm_v128_or(val, sign);
102 wasm_v128_store(dp, val);
103 }
104 if (count)
105 {
106 v128_t v = wasm_v128_load(p);
107 v128_t sign = wasm_i32x4_lt(v, zero);
108 v128_t val = wasm_v128_xor(v, sign); // negate 1's complement
109 v128_t ones = wasm_v128_and(sign, one);
110 val = wasm_i32x4_add(val, ones); // 2's complement
111 sign = wasm_v128_and(sign, m0);
112 val = wasm_i32x4_shl(val, shift);
113
114 v128_t c = wasm_i32x4_splat((si32)count);
115 v128_t idx = wasm_i32x4_make(0, 1, 2, 3);
116 v128_t mask = wasm_i32x4_gt(c, idx);
117 c = wasm_v128_and(val, mask);
118 tmax = wasm_v128_or(tmax, c);
119
120 val = wasm_v128_or(val, sign);
121 wasm_v128_store(dp, val);
122 }
123 wasm_v128_store(max_val, tmax);
124 }
125
127 void wasm_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
128 float delta_inv, ui32 count, ui32* max_val)
129 {
130 ojph_unused(K_max);
131
132 //quantize and convert to sign and magnitude and keep max_val
133
134 v128_t d = wasm_f32x4_splat(delta_inv);
135 v128_t zero = wasm_i32x4_splat(0);
136 v128_t one = wasm_i32x4_splat(1);
137 v128_t tmax = wasm_v128_load(max_val);
138 float *p = (float*)sp;
139 for ( ; count >= 4; count -= 4, p += 4, dp += 4)
140 {
141 v128_t vf = wasm_v128_load(p);
142 vf = wasm_f32x4_mul(vf, d); // multiply
143 v128_t val = wasm_i32x4_trunc_sat_f32x4(vf); // convert to signed int
144 v128_t sign = wasm_i32x4_lt(val, zero); // get sign
145 val = wasm_v128_xor(val, sign); // negate 1's complement
146 v128_t ones = wasm_v128_and(sign, one);
147 val = wasm_i32x4_add(val, ones); // 2's complement
148 tmax = wasm_v128_or(tmax, val);
149 sign = wasm_i32x4_shl(sign, 31);
150 val = wasm_v128_or(val, sign);
151 wasm_v128_store(dp, val);
152 }
153 if (count)
154 {
155 v128_t vf = wasm_v128_load(p);
156 vf = wasm_f32x4_mul(vf, d); // multiply
157 v128_t val = wasm_i32x4_trunc_sat_f32x4(vf); // convert to signed int
158 v128_t sign = wasm_i32x4_lt(val, zero); // get sign
159 val = wasm_v128_xor(val, sign); // negate 1's complement
160 v128_t ones = wasm_v128_and(sign, one);
161 val = wasm_i32x4_add(val, ones); // 2's complement
162
163 v128_t c = wasm_i32x4_splat((si32)count);
164 v128_t idx = wasm_i32x4_make(0, 1, 2, 3);
165 v128_t mask = wasm_i32x4_gt(c, idx);
166 c = wasm_v128_and(val, mask);
167 tmax = wasm_v128_or(tmax, c);
168
169 sign = wasm_i32x4_shl(sign, 31);
170 val = wasm_v128_or(val, sign);
171 wasm_v128_store(dp, val);
172 }
173 wasm_v128_store(max_val, tmax);
174 }
175
177 void wasm_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
178 float delta, ui32 count)
179 {
180 ojph_unused(delta);
181 ui32 shift = 31 - K_max;
182 v128_t m1 = wasm_i32x4_splat(INT_MAX);
183 v128_t zero = wasm_i32x4_splat(0);
184 v128_t one = wasm_i32x4_splat(1);
185 si32 *p = (si32*)dp;
186 for (ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
187 {
188 v128_t v = wasm_v128_load((v128_t*)sp);
189 v128_t val = wasm_v128_and(v, m1);
190 val = wasm_i32x4_shr(val, shift);
191 v128_t sign = wasm_i32x4_lt(v, zero);
192 val = wasm_v128_xor(val, sign); // negate 1's complement
193 v128_t ones = wasm_v128_and(sign, one);
194 val = wasm_i32x4_add(val, ones); // 2's complement
195 wasm_v128_store(p, val);
196 }
197 }
198
200 void wasm_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
201 float delta, ui32 count)
202 {
203 ojph_unused(K_max);
204 v128_t m1 = wasm_i32x4_splat(INT_MAX);
205 v128_t d = wasm_f32x4_splat(delta);
206 float *p = (float*)dp;
207 for (ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
208 {
209 v128_t v = wasm_v128_load((v128_t*)sp);
210 v128_t vali = wasm_v128_and(v, m1);
211 v128_t valf = wasm_f32x4_convert_i32x4(vali);
212 valf = wasm_f32x4_mul(valf, d);
213 v128_t sign = wasm_v128_andnot(v, m1);
214 valf = wasm_v128_or(valf, sign);
215 wasm_v128_store(p, valf);
216 }
217 }
218
220 void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
221 float delta_inv, ui32 count, ui64* max_val)
222 {
223 ojph_unused(delta_inv);
224
225 // convert to sign and magnitude and keep max_val
226 ui32 shift = 63 - K_max;
227 v128_t m0 = wasm_i64x2_splat(LLONG_MIN);
228 v128_t zero = wasm_i64x2_splat(0);
229 v128_t one = wasm_i64x2_splat(1);
230 v128_t tmax = wasm_v128_load(max_val);
231 si64 *p = (si64*)sp;
232 for ( ; count >= 2; count -= 2, p += 2, dp += 2)
233 {
234 v128_t v = wasm_v128_load(p);
235 v128_t sign = wasm_i64x2_lt(v, zero);
236 v128_t val = wasm_v128_xor(v, sign); // negate 1's complement
237 v128_t ones = wasm_v128_and(sign, one);
238 val = wasm_i64x2_add(val, ones); // 2's complement
239 sign = wasm_v128_and(sign, m0);
240 val = wasm_i64x2_shl(val, shift);
241 tmax = wasm_v128_or(tmax, val);
242 val = wasm_v128_or(val, sign);
243 wasm_v128_store(dp, val);
244 }
245 if (count)
246 {
247 v128_t v = wasm_v128_load(p);
248 v128_t sign = wasm_i64x2_lt(v, zero);
249 v128_t val = wasm_v128_xor(v, sign); // negate 1's complement
250 v128_t ones = wasm_v128_and(sign, one);
251 val = wasm_i64x2_add(val, ones); // 2's complement
252 sign = wasm_v128_and(sign, m0);
253 val = wasm_i64x2_shl(val, shift);
254
255 v128_t c = wasm_i32x4_make((si32)0xFFFFFFFF, (si32)0xFFFFFFFF, 0, 0);
256 c = wasm_v128_and(val, c);
257 tmax = wasm_v128_or(tmax, c);
258
259 val = wasm_v128_or(val, sign);
260 wasm_v128_store(dp, val);
261 }
262
263 wasm_v128_store(max_val, tmax);
264 }
265
267 void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
268 float delta, ui32 count)
269 {
270 ojph_unused(delta);
271 ui32 shift = 63 - K_max;
272 v128_t m1 = wasm_i64x2_splat(LLONG_MAX);
273 v128_t zero = wasm_i64x2_splat(0);
274 v128_t one = wasm_i64x2_splat(1);
275 si64 *p = (si64*)dp;
276 for (ui32 i = 0; i < count; i += 2, sp += 2, p += 2)
277 {
278 v128_t v = wasm_v128_load((v128_t*)sp);
279 v128_t val = wasm_v128_and(v, m1);
280 val = wasm_i64x2_shr(val, shift);
281 v128_t sign = wasm_i64x2_lt(v, zero);
282 val = wasm_v128_xor(val, sign); // negate 1's complement
283 v128_t ones = wasm_v128_and(sign, one);
284 val = wasm_i64x2_add(val, ones); // 2's complement
285 wasm_v128_store(p, val);
286 }
287 }
288 }
289}
void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
ui32 wasm_find_max_val32(ui32 *address)
void wasm_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void wasm_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
ui64 wasm_find_max_val64(ui64 *address)
void wasm_mem_clear(void *addr, size_t count)
void wasm_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void wasm_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
int64_t si64
Definition ojph_defs.h:57
uint64_t ui64
Definition ojph_defs.h:56
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54
#define ojph_unused(x)
Definition ojph_defs.h:78