OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_arch.h
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_arch.h
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38
39#ifndef OJPH_ARCH_H
40#define OJPH_ARCH_H
41
42#include <cstdio>
43#include <cstdint>
44#include <cmath>
45
46#include "ojph_defs.h"
47
48
50// preprocessor directives for compiler
52#ifdef _MSC_VER
53#define OJPH_COMPILER_MSVC
54#elif (defined __GNUC__)
55#define OJPH_COMPILER_GNUC
56#endif
57
58#ifdef __EMSCRIPTEN__
59#define OJPH_EMSCRIPTEN
60#endif
61
62#ifdef OJPH_COMPILER_MSVC
63#include <intrin.h>
64#endif
65
67// preprocessor directives for architecture
69#if defined(__arm__) || defined(__TARGET_ARCH_ARM) \
70 || defined(__aarch64__) || defined(_M_ARM64)
71 #define OJPH_ARCH_ARM
72#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
73 #define OJPH_ARCH_I386
74#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) \
75 || defined(_M_X64)
76 #define OJPH_ARCH_X86_64
77#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
78 #define OJPH_ARCH_IA64
79#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \
80 || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \
81 || defined(_M_MPPC) || defined(_M_PPC)
82 #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
83 #define OJPH_ARCH_PPC64
84 #else
85 #define OJPH_ARCH_PPC
86 #endif
87#else
88 #define OJPH_ARCH_UNKNOWN
89#endif
90
91namespace ojph {
93 // disable SIMD for unknown architecture
95#if !defined(OJPH_ARCH_X86_64) && !defined(OJPH_ARCH_I386) && \
96 !defined(OJPH_ARCH_ARM) && !defined(OJPH_DISABLE_SIMD)
97#define OJPH_DISABLE_SIMD
98#endif // !OJPH_ARCH_UNKNOWN
99
101 // OS detection definitions
103#if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
104#define OJPH_OS_WINDOWS
105#elif (defined __APPLE__)
106#define OJPH_OS_APPLE
107#elif (defined __ANDROID__)
108#define OJPH_OS_ANDROID
109#elif (defined __linux)
110#define OJPH_OS_LINUX
111#endif
112
114 // defines for dll
116#if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
117#define OJPH_EXPORT __declspec(dllexport)
118#else
119#define OJPH_EXPORT
120#endif
121
123 // cpu features
126 int get_cpu_ext_level();
127
128 enum : int {
141 };
142
143 enum : int {
149 };
150
152 static inline ui32 population_count(ui32 val)
153 {
154 #if defined(OJPH_COMPILER_MSVC) \
155 && (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
156 return (ui32)__popcnt(val);
157 #elif (defined OJPH_COMPILER_GNUC)
158 return (ui32)__builtin_popcount(val);
159 #else
160 val -= ((val >> 1) & 0x55555555);
161 val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
162 val = (((val >> 4) + val) & 0x0f0f0f0f);
163 val += (val >> 8);
164 val += (val >> 16);
165 return (int)(val & 0x0000003f);
166 #endif
167 }
168
170#ifdef OJPH_COMPILER_MSVC
171 #pragma intrinsic(_BitScanReverse)
172#endif
173 static inline ui32 count_leading_zeros(ui32 val)
174 {
175 #ifdef OJPH_COMPILER_MSVC
176 unsigned long result = 0;
177 _BitScanReverse(&result, val);
178 return 31 ^ (ui32)result;
179 #elif (defined OJPH_COMPILER_GNUC)
180 return (ui32)__builtin_clz(val);
181 #else
182 val |= (val >> 1);
183 val |= (val >> 2);
184 val |= (val >> 4);
185 val |= (val >> 8);
186 val |= (val >> 16);
187 return 32 - population_count(val);
188 #endif
189 }
190
192#ifdef OJPH_COMPILER_MSVC
193 #if (defined OJPH_ARCH_X86_64)
194 #pragma intrinsic(_BitScanReverse64)
195 #elif (defined OJPH_ARCH_I386)
196 #pragma intrinsic(_BitScanReverse)
197 #else
198 #error Error unsupport MSVC version
199 #endif
200#endif
201 static inline ui32 count_leading_zeros(ui64 val)
202 {
203 #ifdef OJPH_COMPILER_MSVC
204 unsigned long result = 0;
205 #ifdef OJPH_ARCH_X86_64
206 _BitScanReverse64(&result, val);
207 #elif (defined OJPH_ARCH_I386)
208 ui32 msb = (ui32)(val >> 32), lsb = (ui32)val;
209 if (msb == 0)
210 _BitScanReverse(&result, lsb);
211 else {
212 _BitScanReverse(&result, msb);
213 result += 32;
214 }
215 #else
216 #error Error unsupport MSVC version
217 #endif
218 return 63 ^ (ui32)result;
219 #elif (defined OJPH_COMPILER_GNUC)
220 return (ui32)__builtin_clzll(val);
221 #else
222 val |= (val >> 1);
223 val |= (val >> 2);
224 val |= (val >> 4);
225 val |= (val >> 8);
226 val |= (val >> 16);
227 val |= (val >> 32);
228 return 64 - population_count64(val);
229 #endif
230 }
231
233#ifdef OJPH_COMPILER_MSVC
234 #pragma intrinsic(_BitScanForward)
235#endif
236 static inline ui32 count_trailing_zeros(ui32 val)
237 {
238 #ifdef OJPH_COMPILER_MSVC
239 unsigned long result = 0;
240 _BitScanForward(&result, val);
241 return (ui32)result;
242 #elif (defined OJPH_COMPILER_GNUC)
243 return (ui32)__builtin_ctz(val);
244 #else
245 val |= (val << 1);
246 val |= (val << 2);
247 val |= (val << 4);
248 val |= (val << 8);
249 val |= (val << 16);
250 return 32 - population_count(val);
251 #endif
252 }
253
255 static inline si32 ojph_round(float val)
256 {
257 #ifdef OJPH_COMPILER_MSVC
258 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
259 #elif (defined OJPH_COMPILER_GNUC)
260 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
261 #else
262 return (si32)round(val);
263 #endif
264 }
265
267 static inline si32 ojph_trunc(float val)
268 {
269 #ifdef OJPH_COMPILER_MSVC
270 return (si32)(val);
271 #elif (defined OJPH_COMPILER_GNUC)
272 return (si32)(val);
273 #else
274 return (si32)trunc(val);
275 #endif
276 }
277
279 // constants
281 #ifndef OJPH_EMSCRIPTEN
282 const ui32 byte_alignment = 64; // 64 bytes == 512 bits
285 #else
286 const ui32 byte_alignment = 16; // 16 bytes == 128 bits
288 const ui32 object_alignment = 8;
289 #endif
290
292 // templates for alignment
294
296 // finds the size such that it is a multiple of byte_alignment
297 template <typename T, ui32 N>
298 size_t calc_aligned_size(size_t size) {
299 size = size * sizeof(T) + N - 1;
300 size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
301 size >>= (63 - count_leading_zeros((ui64)sizeof(T)));
302 return size;
303 }
304
306 // moves the pointer to first address that is a multiple of byte_alignment
307 template <typename T, ui32 N>
308 inline T *align_ptr(T *ptr) {
309 intptr_t p = reinterpret_cast<intptr_t>(ptr);
310 p += N - 1;
311 p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
312 return reinterpret_cast<T *>(p);
313 }
314
315}
316
317#endif // !OJPH_ARCH_H
@ ARM_CPU_EXT_LEVEL_SVE
Definition ojph_arch.h:147
@ ARM_CPU_EXT_LEVEL_SVE2
Definition ojph_arch.h:148
@ ARM_CPU_EXT_LEVEL_NEON
Definition ojph_arch.h:145
@ ARM_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:144
@ ARM_CPU_EXT_LEVEL_ASIMD
Definition ojph_arch.h:146
const ui32 object_alignment
Definition ojph_arch.h:284
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:138
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:137
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:140
@ X86_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:129
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:132
@ X86_CPU_EXT_LEVEL_SSE41
Definition ojph_arch.h:135
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:131
@ X86_CPU_EXT_LEVEL_MMX
Definition ojph_arch.h:130
@ X86_CPU_EXT_LEVEL_SSE42
Definition ojph_arch.h:136
@ X86_CPU_EXT_LEVEL_SSSE3
Definition ojph_arch.h:134
@ X86_CPU_EXT_LEVEL_SSE3
Definition ojph_arch.h:133
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition ojph_arch.h:139
const ui32 byte_alignment
Definition ojph_arch.h:282
uint64_t ui64
Definition ojph_defs.h:56
static si32 ojph_round(float val)
Definition ojph_arch.h:255
size_t calc_aligned_size(size_t size)
Definition ojph_arch.h:298
T * align_ptr(T *ptr)
Definition ojph_arch.h:308
static ui32 population_count(ui32 val)
Definition ojph_arch.h:152
OJPH_EXPORT int get_cpu_ext_level()
static si32 ojph_trunc(float val)
Definition ojph_arch.h:267
static ui32 count_trailing_zeros(ui32 val)
Definition ojph_arch.h:236
static ui32 count_leading_zeros(ui32 val)
Definition ojph_arch.h:173
int32_t si32
Definition ojph_defs.h:55
const ui32 log_byte_alignment
Definition ojph_arch.h:283
uint32_t ui32
Definition ojph_defs.h:54
#define OJPH_EXPORT
Definition ojph_arch.h:119