34#if JUCE_GCC && (__GNUC__ >= 6)
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wignored-attributes"
40 #define DECLARE_AVX_SIMD_CONST(type, name) \
41 static __declspec(align(32)) const type name[32 / sizeof (type)]
43 #define DEFINE_AVX_SIMD_CONST(type, class_type, name) \
44 __declspec(align(32)) const type SIMDNativeOps<class_type>:: name[32 / sizeof (type)]
47 #define DECLARE_AVX_SIMD_CONST(type, name) \
48 static const type name[32 / sizeof (type)] __attribute__((aligned(32)))
50 #define DEFINE_AVX_SIMD_CONST(type, class_type, name) \
51 const type SIMDNativeOps<class_type>:: name[32 / sizeof (type)] __attribute__((aligned(32)))
55template <
typename type>
64struct SIMDNativeOps<float>
66 using vSIMDType = __m256;
69 DECLARE_AVX_SIMD_CONST (int32_t, kAllBitsSet);
70 DECLARE_AVX_SIMD_CONST (int32_t, kEvenHighBit);
71 DECLARE_AVX_SIMD_CONST (
float, kOne);
74 static forcedinline __m256 JUCE_VECTOR_CALLTYPE vconst (
const float* a)
noexcept {
return load (a); }
75 static forcedinline __m256 JUCE_VECTOR_CALLTYPE vconst (
const int32_t* a)
noexcept {
return _mm256_castsi256_ps (_mm256_load_si256 (
reinterpret_cast <const __m256i*
> (a))); }
76 static forcedinline __m256 JUCE_VECTOR_CALLTYPE expand (
float s)
noexcept {
return _mm256_broadcast_ss (&s); }
77 static forcedinline __m256 JUCE_VECTOR_CALLTYPE load (
const float* a)
noexcept {
return _mm256_load_ps (a); }
78 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256 value,
float* dest)
noexcept { _mm256_store_ps (dest, value); }
79 static forcedinline __m256 JUCE_VECTOR_CALLTYPE add (__m256 a, __m256 b)
noexcept {
return _mm256_add_ps (a, b); }
80 static forcedinline __m256 JUCE_VECTOR_CALLTYPE sub (__m256 a, __m256 b)
noexcept {
return _mm256_sub_ps (a, b); }
81 static forcedinline __m256 JUCE_VECTOR_CALLTYPE mul (__m256 a, __m256 b)
noexcept {
return _mm256_mul_ps (a, b); }
82 static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_and (__m256 a, __m256 b)
noexcept {
return _mm256_and_ps (a, b); }
83 static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_or (__m256 a, __m256 b)
noexcept {
return _mm256_or_ps (a, b); }
84 static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_xor (__m256 a, __m256 b)
noexcept {
return _mm256_xor_ps (a, b); }
85 static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_notand (__m256 a, __m256 b)
noexcept {
return _mm256_andnot_ps (a, b); }
86 static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_not (__m256 a)
noexcept {
return bit_notand (a, vconst (kAllBitsSet)); }
87 static forcedinline __m256 JUCE_VECTOR_CALLTYPE min (__m256 a, __m256 b)
noexcept {
return _mm256_min_ps (a, b); }
88 static forcedinline __m256 JUCE_VECTOR_CALLTYPE max (__m256 a, __m256 b)
noexcept {
return _mm256_max_ps (a, b); }
89 static forcedinline __m256 JUCE_VECTOR_CALLTYPE equal (__m256 a, __m256 b)
noexcept {
return _mm256_cmp_ps (a, b, _CMP_EQ_OQ); }
90 static forcedinline __m256 JUCE_VECTOR_CALLTYPE notEqual (__m256 a, __m256 b)
noexcept {
return _mm256_cmp_ps (a, b, _CMP_NEQ_OQ); }
91 static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThan (__m256 a, __m256 b)
noexcept {
return _mm256_cmp_ps (a, b, _CMP_GT_OQ); }
92 static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256 a, __m256 b)
noexcept {
return _mm256_cmp_ps (a, b, _CMP_GE_OQ); }
93 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256 a, __m256 b)
noexcept {
return (_mm256_movemask_ps (equal (a, b)) == 0xff); }
94 static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupeven (__m256 a)
noexcept {
return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
95 static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupodd (__m256 a)
noexcept {
return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
96 static forcedinline __m256 JUCE_VECTOR_CALLTYPE swapevenodd (__m256 a)
noexcept {
return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); }
99 static forcedinline __m256 JUCE_VECTOR_CALLTYPE truncate (__m256 a)
noexcept {
return _mm256_cvtepi32_ps (_mm256_cvttps_epi32 (a)); }
101 static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c)
noexcept
104 return _mm256_fmadd_ps (b, c, a);
106 return add (a, mul (b, c));
110 static forcedinline __m256 JUCE_VECTOR_CALLTYPE oddevensum (__m256 a)
noexcept
112 a = _mm256_add_ps (_mm256_shuffle_ps (a, a, _MM_SHUFFLE (1, 0, 3, 2)), a);
113 return add (_mm256_permute2f128_ps (a, a, 1), a);
117 static forcedinline __m256 JUCE_VECTOR_CALLTYPE cmplxmul (__m256 a, __m256 b)
noexcept
119 __m256 rr_ir = mul (a, dupeven (b));
120 __m256 ii_ri = mul (swapevenodd (a), dupodd (b));
121 return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
124 static forcedinline
float JUCE_VECTOR_CALLTYPE sum (__m256 a)
noexcept
126 __m256 retval = _mm256_dp_ps (a, vconst (kOne), 0xff);
127 __m256 tmp = _mm256_permute2f128_ps (retval, retval, 1);
128 retval = _mm256_add_ps (retval, tmp);
133 return _mm256_cvtss_f32 (retval);
144struct SIMDNativeOps<double>
146 using vSIMDType = __m256d;
149 DECLARE_AVX_SIMD_CONST (int64_t, kAllBitsSet);
150 DECLARE_AVX_SIMD_CONST (int64_t, kEvenHighBit);
151 DECLARE_AVX_SIMD_CONST (
double, kOne);
154 static forcedinline __m256d JUCE_VECTOR_CALLTYPE vconst (
const double* a)
noexcept {
return load (a); }
155 static forcedinline __m256d JUCE_VECTOR_CALLTYPE vconst (
const int64_t* a)
noexcept {
return _mm256_castsi256_pd (_mm256_load_si256 (
reinterpret_cast <const __m256i*
> (a))); }
156 static forcedinline __m256d JUCE_VECTOR_CALLTYPE expand (
double s)
noexcept {
return _mm256_broadcast_sd (&s); }
157 static forcedinline __m256d JUCE_VECTOR_CALLTYPE load (
const double* a)
noexcept {
return _mm256_load_pd (a); }
158 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256d value,
double* dest)
noexcept { _mm256_store_pd (dest, value); }
159 static forcedinline __m256d JUCE_VECTOR_CALLTYPE add (__m256d a, __m256d b)
noexcept {
return _mm256_add_pd (a, b); }
160 static forcedinline __m256d JUCE_VECTOR_CALLTYPE sub (__m256d a, __m256d b)
noexcept {
return _mm256_sub_pd (a, b); }
161 static forcedinline __m256d JUCE_VECTOR_CALLTYPE mul (__m256d a, __m256d b)
noexcept {
return _mm256_mul_pd (a, b); }
162 static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_and (__m256d a, __m256d b)
noexcept {
return _mm256_and_pd (a, b); }
163 static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_or (__m256d a, __m256d b)
noexcept {
return _mm256_or_pd (a, b); }
164 static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_xor (__m256d a, __m256d b)
noexcept {
return _mm256_xor_pd (a, b); }
165 static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_notand (__m256d a, __m256d b)
noexcept {
return _mm256_andnot_pd (a, b); }
166 static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_not (__m256d a)
noexcept {
return bit_notand (a, vconst (kAllBitsSet)); }
167 static forcedinline __m256d JUCE_VECTOR_CALLTYPE min (__m256d a, __m256d b)
noexcept {
return _mm256_min_pd (a, b); }
168 static forcedinline __m256d JUCE_VECTOR_CALLTYPE max (__m256d a, __m256d b)
noexcept {
return _mm256_max_pd (a, b); }
169 static forcedinline __m256d JUCE_VECTOR_CALLTYPE equal (__m256d a, __m256d b)
noexcept {
return _mm256_cmp_pd (a, b, _CMP_EQ_OQ); }
170 static forcedinline __m256d JUCE_VECTOR_CALLTYPE notEqual (__m256d a, __m256d b)
noexcept {
return _mm256_cmp_pd (a, b, _CMP_NEQ_OQ); }
171 static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThan (__m256d a, __m256d b)
noexcept {
return _mm256_cmp_pd (a, b, _CMP_GT_OQ); }
172 static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256d a, __m256d b)
noexcept {
return _mm256_cmp_pd (a, b, _CMP_GE_OQ); }
173 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256d a, __m256d b)
noexcept {
return (_mm256_movemask_pd (equal (a, b)) == 0xf); }
174 static forcedinline __m256d JUCE_VECTOR_CALLTYPE multiplyAdd (__m256d a, __m256d b, __m256d c)
noexcept {
return _mm256_add_pd (a, _mm256_mul_pd (b, c)); }
175 static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupeven (__m256d a)
noexcept {
return _mm256_shuffle_pd (a, a, 0); }
176 static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupodd (__m256d a)
noexcept {
return _mm256_shuffle_pd (a, a, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)); }
177 static forcedinline __m256d JUCE_VECTOR_CALLTYPE swapevenodd (__m256d a)
noexcept {
return _mm256_shuffle_pd (a, a, (1 << 0) | (0 << 1) | (1 << 2) | (0 << 3)); }
178 static forcedinline __m256d JUCE_VECTOR_CALLTYPE oddevensum (__m256d a)
noexcept {
return _mm256_add_pd (_mm256_permute2f128_pd (a, a, 1), a); }
181 static forcedinline __m256d JUCE_VECTOR_CALLTYPE truncate (__m256d a)
noexcept {
return _mm256_cvtepi32_pd (_mm256_cvttpd_epi32 (a)); }
184 static forcedinline __m256d JUCE_VECTOR_CALLTYPE cmplxmul (__m256d a, __m256d b)
noexcept
186 __m256d rr_ir = mul (a, dupeven (b));
187 __m256d ii_ri = mul (swapevenodd (a), dupodd (b));
188 return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
191 static forcedinline
double JUCE_VECTOR_CALLTYPE sum (__m256d a)
noexcept
193 __m256d retval = _mm256_hadd_pd (a, a);
194 __m256d tmp = _mm256_permute2f128_pd (retval, retval, 1);
195 retval = _mm256_add_pd (retval, tmp);
200 return _mm256_cvtsd_f64 (retval);
211struct SIMDNativeOps<int8_t>
213 using vSIMDType = __m256i;
216 DECLARE_AVX_SIMD_CONST (int8_t, kAllBitsSet);
218 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int8_t s)
noexcept {
return _mm256_set1_epi8 (s); }
219 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const int8_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
220 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, int8_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
221 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi8 (a, b); }
222 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi8 (a, b); }
223 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
224 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
225 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
226 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
227 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
228 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept {
return _mm256_min_epi8 (a, b); }
229 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept {
return _mm256_max_epi8 (a, b); }
230 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi8 (a, b); }
231 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi8 (a, b); }
232 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
233 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return _mm256_movemask_epi8 (equal (a, b)) == -1; }
234 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
235 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
238 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
241 static forcedinline int8_t JUCE_VECTOR_CALLTYPE sum (__m256i a)
noexcept
243 __m256i lo = _mm256_unpacklo_epi8 (a, _mm256_setzero_si256());
244 __m256i hi = _mm256_unpackhi_epi8 (a, _mm256_setzero_si256());
246 for (
int i = 0; i < 3; ++i)
248 lo = _mm256_hadd_epi16 (lo, lo);
249 hi = _mm256_hadd_epi16 (hi, hi);
253 return (int8_t) ((lo[0] & 0xff) +
258 constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
260 return (int8_t) ((_mm256_cvtsi256_si32 (lo) & 0xff) +
261 (_mm256_cvtsi256_si32 (hi) & 0xff) +
262 (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (lo, mask)) & 0xff) +
263 (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (hi, mask)) & 0xff));
267 static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
270 __m256i even = _mm256_mullo_epi16 (a, b);
271 __m256i odd = _mm256_mullo_epi16 (_mm256_srli_epi16 (a, 8), _mm256_srli_epi16 (b, 8));
273 return _mm256_or_si256 (_mm256_slli_epi16 (odd, 8),
274 _mm256_srli_epi16 (_mm256_slli_epi16 (even, 8), 8));
284struct SIMDNativeOps<uint8_t>
287 using vSIMDType = __m256i;
290 DECLARE_AVX_SIMD_CONST (uint8_t, kHighBit);
291 DECLARE_AVX_SIMD_CONST (uint8_t, kAllBitsSet);
293 static forcedinline __m256i JUCE_VECTOR_CALLTYPE ssign (__m256i a)
noexcept {
return _mm256_xor_si256 (a, load (kHighBit)); }
294 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (uint8_t s)
noexcept {
return _mm256_set1_epi8 ((int8_t) s); }
295 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const uint8_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
296 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, uint8_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
297 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi8 (a, b); }
298 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi8 (a, b); }
299 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
300 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
301 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
302 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
303 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
304 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept {
return _mm256_min_epu8 (a, b); }
305 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept {
return _mm256_max_epu8 (a, b); }
306 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi8 (a, b); }
307 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi8 (ssign (a), ssign (b)); }
308 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
309 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
310 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
311 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
314 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
317 static forcedinline uint8_t JUCE_VECTOR_CALLTYPE sum (__m256i a)
noexcept
319 __m256i lo = _mm256_unpacklo_epi8 (a, _mm256_setzero_si256());
320 __m256i hi = _mm256_unpackhi_epi8 (a, _mm256_setzero_si256());
322 for (
int i = 0; i < 3; ++i)
324 lo = _mm256_hadd_epi16 (lo, lo);
325 hi = _mm256_hadd_epi16 (hi, hi);
329 return (uint8_t) ((
static_cast<uint32_t
> (lo[0]) & 0xffu) +
330 (
static_cast<uint32_t
> (hi[0]) & 0xffu) +
331 (
static_cast<uint32_t
> (lo[2]) & 0xffu) +
332 (
static_cast<uint32_t
> (hi[2]) & 0xffu));
334 constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
336 return (uint8_t) ((
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (lo)) & 0xffu) +
337 (
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (hi)) & 0xffu) +
338 (
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (lo, mask))) & 0xffu) +
339 (
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (hi, mask))) & 0xffu));
343 static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
346 __m256i even = _mm256_mullo_epi16 (a, b);
347 __m256i odd = _mm256_mullo_epi16 (_mm256_srli_epi16 (a, 8), _mm256_srli_epi16 (b, 8));
349 return _mm256_or_si256 (_mm256_slli_epi16 (odd, 8),
350 _mm256_srli_epi16 (_mm256_slli_epi16 (even, 8), 8));
360struct SIMDNativeOps<int16_t>
363 using vSIMDType = __m256i;
366 DECLARE_AVX_SIMD_CONST (int16_t, kAllBitsSet);
369 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int16_t s)
noexcept {
return _mm256_set1_epi16 (s); }
370 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const int16_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
371 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, int16_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
372 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi16 (a, b); }
373 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi16 (a, b); }
374 static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
noexcept {
return _mm256_mullo_epi16 (a, b); }
375 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
376 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
377 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
378 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
379 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
380 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept {
return _mm256_min_epi16 (a, b); }
381 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept {
return _mm256_max_epi16 (a, b); }
382 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi16 (a, b); }
383 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi16 (a, b); }
384 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
385 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
386 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
387 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
390 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
393 static forcedinline int16_t JUCE_VECTOR_CALLTYPE sum (__m256i a)
noexcept
395 __m256i tmp = _mm256_hadd_epi16 (a, a);
396 tmp = _mm256_hadd_epi16 (tmp, tmp);
397 tmp = _mm256_hadd_epi16 (tmp, tmp);
400 return (int16_t) ((tmp[0] & 0xffff) + (tmp[2] & 0xffff));
402 constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
404 return (int16_t) ((_mm256_cvtsi256_si32 (tmp) & 0xffff) +
405 (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask)) & 0xffff));
416struct SIMDNativeOps<uint16_t>
419 using vSIMDType = __m256i;
422 DECLARE_AVX_SIMD_CONST (uint16_t, kHighBit);
423 DECLARE_AVX_SIMD_CONST (uint16_t, kAllBitsSet);
426 static forcedinline __m256i JUCE_VECTOR_CALLTYPE ssign (__m256i a)
noexcept {
return _mm256_xor_si256 (a, load (kHighBit)); }
427 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (uint16_t s)
noexcept {
return _mm256_set1_epi16 ((int16_t) s); }
428 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const uint16_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
429 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, uint16_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
430 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi16 (a, b); }
431 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi16 (a, b); }
432 static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
noexcept {
return _mm256_mullo_epi16 (a, b); }
433 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
434 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
435 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
436 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
437 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
438 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept {
return _mm256_min_epu16 (a, b); }
439 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept {
return _mm256_max_epu16 (a, b); }
440 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi16 (a, b); }
441 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi16 (ssign (a), ssign (b)); }
442 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
443 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
444 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
445 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
448 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
451 static forcedinline uint16_t JUCE_VECTOR_CALLTYPE sum (__m256i a)
noexcept
453 __m256i tmp = _mm256_hadd_epi16 (a, a);
454 tmp = _mm256_hadd_epi16 (tmp, tmp);
455 tmp = _mm256_hadd_epi16 (tmp, tmp);
458 return (uint16_t) ((
static_cast<uint32_t
> (tmp[0]) & 0xffffu) +
459 (
static_cast<uint32_t
> (tmp[2]) & 0xffffu));
461 constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
463 return (uint16_t) ((
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (tmp)) & 0xffffu) +
464 (
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask))) & 0xffffu));
475struct SIMDNativeOps<int32_t>
478 using vSIMDType = __m256i;
481 DECLARE_AVX_SIMD_CONST (int32_t, kAllBitsSet);
484 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int32_t s)
noexcept {
return _mm256_set1_epi32 (s); }
485 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const int32_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
486 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, int32_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
487 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi32 (a, b); }
488 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi32 (a, b); }
489 static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
noexcept {
return _mm256_mullo_epi32 (a, b); }
490 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
491 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
492 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
493 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
494 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
495 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept {
return _mm256_min_epi32 (a, b); }
496 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept {
return _mm256_max_epi32 (a, b); }
497 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi32 (a, b); }
498 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi32 (a, b); }
499 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
500 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
501 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
502 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
505 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
508 static forcedinline int32_t JUCE_VECTOR_CALLTYPE sum (__m256i a)
noexcept
510 __m256i tmp = _mm256_hadd_epi32 (a, a);
511 tmp = _mm256_hadd_epi32 (tmp, tmp);
514 return (int32_t) (tmp[0] + tmp[2]);
516 constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
518 return _mm256_cvtsi256_si32 (tmp) + _mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask));
529struct SIMDNativeOps<uint32_t>
532 using vSIMDType = __m256i;
535 DECLARE_AVX_SIMD_CONST (uint32_t, kAllBitsSet);
536 DECLARE_AVX_SIMD_CONST (uint32_t, kHighBit);
539 static forcedinline __m256i JUCE_VECTOR_CALLTYPE ssign (__m256i a)
noexcept {
return _mm256_xor_si256 (a, load (kHighBit)); }
540 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (uint32_t s)
noexcept {
return _mm256_set1_epi32 ((int32_t) s); }
541 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const uint32_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
542 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, uint32_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
543 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi32 (a, b); }
544 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi32 (a, b); }
545 static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
noexcept {
return _mm256_mullo_epi32 (a, b); }
546 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
547 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
548 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
549 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
550 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
551 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept {
return _mm256_min_epu32 (a, b); }
552 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept {
return _mm256_max_epu32 (a, b); }
553 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi32 (a, b); }
554 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi32 (ssign (a), ssign (b)); }
555 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
556 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
557 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
558 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
561 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
564 static forcedinline uint32_t JUCE_VECTOR_CALLTYPE sum (__m256i a)
noexcept
566 __m256i tmp = _mm256_hadd_epi32 (a, a);
567 tmp = _mm256_hadd_epi32 (tmp, tmp);
570 return static_cast<uint32_t
> (tmp[0]) +
static_cast<uint32_t
> (tmp[2]);
572 constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
574 return static_cast<uint32_t
> (_mm256_cvtsi256_si32 (tmp))
575 +
static_cast<uint32_t
> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask)));
586struct SIMDNativeOps<int64_t>
589 using vSIMDType = __m256i;
592 DECLARE_AVX_SIMD_CONST (int64_t, kAllBitsSet);
594 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int64_t s)
noexcept {
return _mm256_set1_epi64x ((int64_t) s); }
595 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const int64_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
596 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, int64_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
597 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi64 (a, b); }
598 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi64 (a, b); }
599 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
600 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
601 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
602 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
603 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
604 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept { __m256i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
605 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept { __m256i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
606 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi64 (a, b); }
607 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi64 (a, b); }
608 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
609 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
610 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
611 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
616 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
625struct SIMDNativeOps<uint64_t>
628 using vSIMDType = __m256i;
631 DECLARE_AVX_SIMD_CONST (uint64_t, kAllBitsSet);
632 DECLARE_AVX_SIMD_CONST (uint64_t, kHighBit);
634 static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (uint64_t s)
noexcept {
return _mm256_set1_epi64x ((int64_t) s); }
635 static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (
const uint64_t* p)
noexcept {
return _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (p)); }
636 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m256i value, uint64_t* dest)
noexcept { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (dest), value); }
637 static forcedinline __m256i JUCE_VECTOR_CALLTYPE ssign (__m256i a)
noexcept {
return _mm256_xor_si256 (a, load (kHighBit)); }
638 static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b)
noexcept {
return _mm256_add_epi64 (a, b); }
639 static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b)
noexcept {
return _mm256_sub_epi64 (a, b); }
640 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b)
noexcept {
return _mm256_and_si256 (a, b); }
641 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or (__m256i a, __m256i b)
noexcept {
return _mm256_or_si256 (a, b); }
642 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b)
noexcept {
return _mm256_xor_si256 (a, b); }
643 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b)
noexcept {
return _mm256_andnot_si256 (a, b); }
644 static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a)
noexcept {
return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
645 static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b)
noexcept { __m256i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
646 static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b)
noexcept { __m256i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
647 static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b)
noexcept {
return _mm256_cmpeq_epi64 (a, b); }
648 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b)
noexcept {
return _mm256_cmpgt_epi64 (ssign (a), ssign (b)); }
649 static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
650 static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c)
noexcept {
return add (a, mul (b, c)); }
651 static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b)
noexcept {
return bit_not (equal (a, b)); }
652 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b)
noexcept {
return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
657 static forcedinline __m256i JUCE_VECTOR_CALLTYPE truncate (__m256i a)
noexcept {
return a; }
662#if JUCE_GCC && (__GNUC__ >= 6)
663 #pragma GCC diagnostic pop