34#if JUCE_GCC && (__GNUC__ >= 6)
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wignored-attributes"
40 #define DECLARE_SSE_SIMD_CONST(type, name) \
41 static __declspec(align(16)) const type name [16 / sizeof (type)]
43 #define DEFINE_SSE_SIMD_CONST(type, class_type, name) \
44 __declspec(align(16)) const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)]
47 #define DECLARE_SSE_SIMD_CONST(type, name) \
48 static const type name [16 / sizeof (type)] __attribute__((aligned(16)))
50 #define DEFINE_SSE_SIMD_CONST(type, class_type, name) \
51 const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)] __attribute__((aligned(16)))
55template <
typename type>
64struct SIMDNativeOps<float>
67 using vSIMDType = __m128;
70 DECLARE_SSE_SIMD_CONST (int32_t, kAllBitsSet);
71 DECLARE_SSE_SIMD_CONST (int32_t, kEvenHighBit);
72 DECLARE_SSE_SIMD_CONST (
float, kOne);
75 static forcedinline __m128 JUCE_VECTOR_CALLTYPE expand (
float s)
noexcept {
return _mm_load1_ps (&s); }
76 static forcedinline __m128 JUCE_VECTOR_CALLTYPE load (
const float* a)
noexcept {
return _mm_load_ps (a); }
77 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128 value,
float* dest)
noexcept { _mm_store_ps (dest, value); }
78 static forcedinline __m128 JUCE_VECTOR_CALLTYPE add (__m128 a, __m128 b)
noexcept {
return _mm_add_ps (a, b); }
79 static forcedinline __m128 JUCE_VECTOR_CALLTYPE sub (__m128 a, __m128 b)
noexcept {
return _mm_sub_ps (a, b); }
80 static forcedinline __m128 JUCE_VECTOR_CALLTYPE mul (__m128 a, __m128 b)
noexcept {
return _mm_mul_ps (a, b); }
81 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_and (__m128 a, __m128 b)
noexcept {
return _mm_and_ps (a, b); }
82 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_or (__m128 a, __m128 b)
noexcept {
return _mm_or_ps (a, b); }
83 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_xor (__m128 a, __m128 b)
noexcept {
return _mm_xor_ps (a, b); }
84 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_notand (__m128 a, __m128 b)
noexcept {
return _mm_andnot_ps (a, b); }
85 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_not (__m128 a)
noexcept {
return bit_notand (a, _mm_loadu_ps ((
float*) kAllBitsSet)); }
86 static forcedinline __m128 JUCE_VECTOR_CALLTYPE min (__m128 a, __m128 b)
noexcept {
return _mm_min_ps (a, b); }
87 static forcedinline __m128 JUCE_VECTOR_CALLTYPE max (__m128 a, __m128 b)
noexcept {
return _mm_max_ps (a, b); }
88 static forcedinline __m128 JUCE_VECTOR_CALLTYPE equal (__m128 a, __m128 b)
noexcept {
return _mm_cmpeq_ps (a, b); }
89 static forcedinline __m128 JUCE_VECTOR_CALLTYPE notEqual (__m128 a, __m128 b)
noexcept {
return _mm_cmpneq_ps (a, b); }
90 static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThan (__m128 a, __m128 b)
noexcept {
return _mm_cmpgt_ps (a, b); }
91 static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128 a, __m128 b)
noexcept {
return _mm_cmpge_ps (a, b); }
92 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128 a, __m128 b )
noexcept {
return (_mm_movemask_ps (equal (a, b)) == 0xf); }
93 static forcedinline __m128 JUCE_VECTOR_CALLTYPE multiplyAdd (__m128 a, __m128 b, __m128 c)
noexcept {
return _mm_add_ps (a, _mm_mul_ps (b, c)); }
94 static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupeven (__m128 a)
noexcept {
return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
95 static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupodd (__m128 a)
noexcept {
return _mm_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
96 static forcedinline __m128 JUCE_VECTOR_CALLTYPE swapevenodd (__m128 a)
noexcept {
return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); }
97 static forcedinline __m128 JUCE_VECTOR_CALLTYPE oddevensum (__m128 a)
noexcept {
return _mm_add_ps (_mm_shuffle_ps (a, a, _MM_SHUFFLE (1, 0, 3, 2)), a); }
98 static forcedinline
float JUCE_VECTOR_CALLTYPE get (__m128 v,
size_t i)
noexcept {
return SIMDFallbackOps<float, __m128>::get (v, i); }
99 static forcedinline __m128 JUCE_VECTOR_CALLTYPE set (__m128 v,
size_t i,
float s)
noexcept {
return SIMDFallbackOps<float, __m128>::set (v, i, s); }
100 static forcedinline __m128 JUCE_VECTOR_CALLTYPE truncate (__m128 a)
noexcept {
return _mm_cvtepi32_ps (_mm_cvttps_epi32 (a)); }
103 static forcedinline __m128 JUCE_VECTOR_CALLTYPE cmplxmul (__m128 a, __m128 b)
noexcept
105 __m128 rr_ir = mul (a, dupeven (b));
106 __m128 ii_ri = mul (swapevenodd (a), dupodd (b));
107 return add (rr_ir, bit_xor (ii_ri, _mm_loadu_ps ((
float*) kEvenHighBit)));
110 static forcedinline
float JUCE_VECTOR_CALLTYPE sum (__m128 a)
noexcept
112 #if defined(__SSE4__)
113 __m128 retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
114 #elif defined(__SSE3__)
115 __m128 retval = _mm_hadd_ps (_mm_hadd_ps (a, a), a);
117 __m128 retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
118 retval = _mm_add_ps (retval, _mm_shuffle_ps (retval, retval, 0xb1));
120 return _mm_cvtss_f32 (retval);
130struct SIMDNativeOps<double>
133 using vSIMDType = __m128d;
136 DECLARE_SSE_SIMD_CONST (int64_t, kAllBitsSet);
137 DECLARE_SSE_SIMD_CONST (int64_t, kEvenHighBit);
138 DECLARE_SSE_SIMD_CONST (
double, kOne);
141 static forcedinline __m128d JUCE_VECTOR_CALLTYPE vconst (
const double* a)
noexcept {
return load (a); }
142 static forcedinline __m128d JUCE_VECTOR_CALLTYPE vconst (
const int64_t* a)
noexcept {
return _mm_castsi128_pd (_mm_load_si128 (
reinterpret_cast<const __m128i*
> (a))); }
143 static forcedinline __m128d JUCE_VECTOR_CALLTYPE expand (
double s)
noexcept {
return _mm_load1_pd (&s); }
144 static forcedinline __m128d JUCE_VECTOR_CALLTYPE load (
const double* a)
noexcept {
return _mm_load_pd (a); }
145 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128d value,
double* dest)
noexcept { _mm_store_pd (dest, value); }
146 static forcedinline __m128d JUCE_VECTOR_CALLTYPE add (__m128d a, __m128d b)
noexcept {
return _mm_add_pd (a, b); }
147 static forcedinline __m128d JUCE_VECTOR_CALLTYPE sub (__m128d a, __m128d b)
noexcept {
return _mm_sub_pd (a, b); }
148 static forcedinline __m128d JUCE_VECTOR_CALLTYPE mul (__m128d a, __m128d b)
noexcept {
return _mm_mul_pd (a, b); }
149 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_and (__m128d a, __m128d b)
noexcept {
return _mm_and_pd (a, b); }
150 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_or (__m128d a, __m128d b)
noexcept {
return _mm_or_pd (a, b); }
151 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_xor (__m128d a, __m128d b)
noexcept {
return _mm_xor_pd (a, b); }
152 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_notand (__m128d a, __m128d b)
noexcept {
return _mm_andnot_pd (a, b); }
153 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_not (__m128d a)
noexcept {
return bit_notand (a, vconst (kAllBitsSet)); }
154 static forcedinline __m128d JUCE_VECTOR_CALLTYPE min (__m128d a, __m128d b)
noexcept {
return _mm_min_pd (a, b); }
155 static forcedinline __m128d JUCE_VECTOR_CALLTYPE max (__m128d a, __m128d b)
noexcept {
return _mm_max_pd (a, b); }
156 static forcedinline __m128d JUCE_VECTOR_CALLTYPE equal (__m128d a, __m128d b)
noexcept {
return _mm_cmpeq_pd (a, b); }
157 static forcedinline __m128d JUCE_VECTOR_CALLTYPE notEqual (__m128d a, __m128d b)
noexcept {
return _mm_cmpneq_pd (a, b); }
158 static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThan (__m128d a, __m128d b)
noexcept {
return _mm_cmpgt_pd (a, b); }
159 static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128d a, __m128d b)
noexcept {
return _mm_cmpge_pd (a, b); }
160 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128d a, __m128d b )
noexcept {
return (_mm_movemask_pd (equal (a, b)) == 0x3); }
161 static forcedinline __m128d JUCE_VECTOR_CALLTYPE multiplyAdd (__m128d a, __m128d b, __m128d c)
noexcept {
return _mm_add_pd (a, _mm_mul_pd (b, c)); }
162 static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupeven (__m128d a)
noexcept {
return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 0)); }
163 static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupodd (__m128d a)
noexcept {
return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (1, 1)); }
164 static forcedinline __m128d JUCE_VECTOR_CALLTYPE swapevenodd (__m128d a)
noexcept {
return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 1)); }
165 static forcedinline __m128d JUCE_VECTOR_CALLTYPE oddevensum (__m128d a)
noexcept {
return a; }
166 static forcedinline
double JUCE_VECTOR_CALLTYPE get (__m128d v,
size_t i)
noexcept {
return SIMDFallbackOps<double, __m128d>::get (v, i); }
167 static forcedinline __m128d JUCE_VECTOR_CALLTYPE set (__m128d v,
size_t i,
double s)
noexcept {
return SIMDFallbackOps<double, __m128d>::set (v, i, s); }
168 static forcedinline __m128d JUCE_VECTOR_CALLTYPE truncate (__m128d a)
noexcept {
return _mm_cvtepi32_pd (_mm_cvttpd_epi32 (a)); }
171 static forcedinline __m128d JUCE_VECTOR_CALLTYPE cmplxmul (__m128d a, __m128d b)
noexcept
173 __m128d rr_ir = mul (a, dupeven (b));
174 __m128d ii_ri = mul (swapevenodd (a), dupodd (b));
175 return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
178 static forcedinline
double JUCE_VECTOR_CALLTYPE sum (__m128d a)
noexcept
180 #if defined(__SSE4__)
181 __m128d retval = _mm_dp_pd (a, vconst (kOne), 0xff);
182 #elif defined(__SSE3__)
183 __m128d retval = _mm_hadd_pd (a, a);
185 __m128d retval = _mm_add_pd (_mm_shuffle_pd (a, a, 0x01), a);
187 return _mm_cvtsd_f64 (retval);
197struct SIMDNativeOps<int8_t>
200 using vSIMDType = __m128i;
203 DECLARE_SSE_SIMD_CONST (int8_t, kAllBitsSet);
205 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int8_t* a)
noexcept {
return load (a); }
206 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int8_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
207 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int8_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
208 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int8_t s)
noexcept {
return _mm_set1_epi8 (s); }
209 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi8 (a, b); }
210 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi8 (a, b); }
211 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
212 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
213 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
214 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
215 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
216 #if defined(__SSE4__)
217 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epi8 (a, b); }
218 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epi8 (a, b); }
220 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
221 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
223 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi8 (a, b); }
224 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi8 (a, b); }
225 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
226 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
227 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
228 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
229 static forcedinline int8_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<int8_t, __m128i>::get (v, i); }
230 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, int8_t s)
noexcept {
return SIMDFallbackOps<int8_t, __m128i>::set (v, i, s); }
231 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
234 static forcedinline int8_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
237 __m128i lo = _mm_unpacklo_epi8 (a, _mm_setzero_si128());
238 __m128i hi = _mm_unpackhi_epi8 (a, _mm_setzero_si128());
240 for (
int i = 0; i < 3; ++i)
242 lo = _mm_hadd_epi16 (lo, lo);
243 hi = _mm_hadd_epi16 (hi, hi);
246 return static_cast<int8_t
> ((_mm_cvtsi128_si32 (lo) & 0xff) + (_mm_cvtsi128_si32 (hi) & 0xff));
248 return SIMDFallbackOps<int8_t, __m128i>::sum (a);
252 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
255 __m128i even = _mm_mullo_epi16 (a, b);
256 __m128i odd = _mm_mullo_epi16 (_mm_srli_epi16 (a, 8), _mm_srli_epi16 (b, 8));
258 return _mm_or_si128 (_mm_slli_epi16 (odd, 8),
259 _mm_srli_epi16 (_mm_slli_epi16 (even, 8), 8));
269struct SIMDNativeOps<uint8_t>
272 using vSIMDType = __m128i;
275 DECLARE_SSE_SIMD_CONST (uint8_t, kHighBit);
276 DECLARE_SSE_SIMD_CONST (uint8_t, kAllBitsSet);
278 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint8_t* a)
noexcept {
return load (a); }
279 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
280 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint8_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
281 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint8_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
282 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint8_t s)
noexcept {
return _mm_set1_epi8 ((int8_t) s); }
283 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi8 (a, b); }
284 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi8 (a, b); }
285 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
286 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
287 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
288 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
289 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
290 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epu8 (a, b); }
291 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epu8 (a, b); }
292 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi8 (a, b); }
293 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi8 (ssign (a), ssign (b)); }
294 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
295 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
296 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
297 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
298 static forcedinline uint8_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<uint8_t, __m128i>::get (v, i); }
299 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, uint8_t s)
noexcept {
return SIMDFallbackOps<uint8_t, __m128i>::set (v, i, s); }
300 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
303 static forcedinline uint8_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
306 __m128i lo = _mm_unpacklo_epi8 (a, _mm_setzero_si128());
307 __m128i hi = _mm_unpackhi_epi8 (a, _mm_setzero_si128());
309 for (
int i = 0; i < 3; ++i)
311 lo = _mm_hadd_epi16 (lo, lo);
312 hi = _mm_hadd_epi16 (hi, hi);
315 return static_cast<uint8_t
> ((
static_cast<uint32_t
> (_mm_cvtsi128_si32 (lo)) & 0xffu)
316 + (
static_cast<uint32_t
> (_mm_cvtsi128_si32 (hi)) & 0xffu));
318 return SIMDFallbackOps<uint8_t, __m128i>::sum (a);
322 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
325 __m128i even = _mm_mullo_epi16 (a, b);
326 __m128i odd = _mm_mullo_epi16 (_mm_srli_epi16 (a, 8), _mm_srli_epi16 (b, 8));
328 return _mm_or_si128 (_mm_slli_epi16 (odd, 8),
329 _mm_srli_epi16 (_mm_slli_epi16 (even, 8), 8));
339struct SIMDNativeOps<int16_t>
342 using vSIMDType = __m128i;
345 DECLARE_SSE_SIMD_CONST (int16_t, kAllBitsSet);
348 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int16_t* a)
noexcept {
return load (a); }
349 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int16_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
350 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int16_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
351 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int16_t s)
noexcept {
return _mm_set1_epi16 (s); }
352 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi16 (a, b); }
353 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi16 (a, b); }
354 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept {
return _mm_mullo_epi16 (a, b); }
355 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
356 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
357 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
358 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
359 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
360 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epi16 (a, b); }
361 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epi16 (a, b); }
362 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi16 (a, b); }
363 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi16 (a, b); }
364 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
365 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
366 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
367 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
368 static forcedinline int16_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<int16_t, __m128i>::get (v, i); }
369 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, int16_t s)
noexcept {
return SIMDFallbackOps<int16_t, __m128i>::set (v, i, s); }
370 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
373 static forcedinline int16_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
376 __m128i tmp = _mm_hadd_epi16 (a, a);
377 tmp = _mm_hadd_epi16 (tmp, tmp);
378 tmp = _mm_hadd_epi16 (tmp, tmp);
380 return static_cast<int16_t
> (_mm_cvtsi128_si32 (tmp) & 0xffff);
382 return SIMDFallbackOps<int16_t, __m128i>::sum (a);
393struct SIMDNativeOps<uint16_t>
396 using vSIMDType = __m128i;
399 DECLARE_SSE_SIMD_CONST (uint16_t, kHighBit);
400 DECLARE_SSE_SIMD_CONST (uint16_t, kAllBitsSet);
403 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint16_t* a)
noexcept {
return load (a); }
404 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
405 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint16_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
406 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint16_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
407 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint16_t s)
noexcept {
return _mm_set1_epi16 ((int16_t) s); }
408 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi16 (a, b); }
409 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi16 (a, b); }
410 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept {
return _mm_mullo_epi16 (a, b); }
411 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
412 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
413 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
414 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
415 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
416 #if defined(__SSE4__)
417 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epu16 (a, b); }
418 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epu16 (a, b); }
420 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
421 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
423 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi16 (a, b); }
424 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi16 (ssign (a), ssign (b)); }
425 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
426 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
427 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
428 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
429 static forcedinline uint16_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<uint16_t, __m128i>::get (v, i); }
430 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, uint16_t s)
noexcept {
return SIMDFallbackOps<uint16_t, __m128i>::set (v, i, s); }
431 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
434 static forcedinline uint16_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
437 __m128i tmp = _mm_hadd_epi16 (a, a);
438 tmp = _mm_hadd_epi16 (tmp, tmp);
439 tmp = _mm_hadd_epi16 (tmp, tmp);
441 return static_cast<uint16_t
> (
static_cast<uint32_t
> (_mm_cvtsi128_si32 (tmp)) & 0xffffu);
443 return SIMDFallbackOps<uint16_t, __m128i>::sum (a);
454struct SIMDNativeOps<int32_t>
457 using vSIMDType = __m128i;
460 DECLARE_SSE_SIMD_CONST (int32_t, kAllBitsSet);
463 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int32_t* a)
noexcept {
return load (a); }
464 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int32_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
465 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int32_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
466 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int32_t s)
noexcept {
return _mm_set1_epi32 (s); }
467 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi32 (a, b); }
468 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi32 (a, b); }
469 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
470 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
471 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
472 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
473 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
474 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi32 (a, b); }
475 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi32 (a, b); }
476 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
477 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
478 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
479 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
480 static forcedinline int32_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<int32_t, __m128i>::get (v, i); }
481 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, int32_t s)
noexcept {
return SIMDFallbackOps<int32_t, __m128i>::set (v, i, s); }
482 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
485 static forcedinline int32_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
488 __m128i tmp = _mm_hadd_epi32 (a, a);
489 return _mm_cvtsi128_si32 (_mm_hadd_epi32 (tmp, tmp));
491 return SIMDFallbackOps<int32_t, __m128i>::sum (a);
495 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept
497 #if defined(__SSE4_1__)
498 return _mm_mullo_epi32 (a, b);
500 __m128i even = _mm_mul_epu32 (a,b);
501 __m128i odd = _mm_mul_epu32 (_mm_srli_si128 (a,4), _mm_srli_si128 (b,4));
502 return _mm_unpacklo_epi32 (_mm_shuffle_epi32(even, _MM_SHUFFLE (0,0,2,0)),
503 _mm_shuffle_epi32(odd, _MM_SHUFFLE (0,0,2,0)));
507 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept
509 #if defined(__SSE4_1__)
510 return _mm_min_epi32 (a, b);
512 __m128i lt = greaterThan (b, a);
513 return bit_or (bit_and (lt, a), bit_andnot (lt, b));
517 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept
519 #if defined(__SSE4_1__)
520 return _mm_max_epi32 (a, b);
522 __m128i gt = greaterThan (a, b);
523 return bit_or (bit_and (gt, a), bit_andnot (gt, b));
534struct SIMDNativeOps<uint32_t>
537 using vSIMDType = __m128i;
540 DECLARE_SSE_SIMD_CONST (uint32_t, kAllBitsSet);
541 DECLARE_SSE_SIMD_CONST (uint32_t, kHighBit);
544 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint32_t* a)
noexcept {
return load (a); }
545 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
546 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint32_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
547 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint32_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
548 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint32_t s)
noexcept {
return _mm_set1_epi32 ((int32_t) s); }
549 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi32 (a, b); }
550 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi32 (a, b); }
551 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
552 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
553 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
554 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
555 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
556 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi32 (a, b); }
557 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi32 (ssign (a), ssign (b)); }
558 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
559 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
560 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
561 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
562 static forcedinline uint32_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<uint32_t, __m128i>::get (v, i); }
563 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, uint32_t s)
noexcept {
return SIMDFallbackOps<uint32_t, __m128i>::set (v, i, s); }
564 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
567 static forcedinline uint32_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
570 __m128i tmp = _mm_hadd_epi32 (a, a);
571 return static_cast<uint32_t
> (_mm_cvtsi128_si32 (_mm_hadd_epi32 (tmp, tmp)));
573 return SIMDFallbackOps<uint32_t, __m128i>::sum (a);
577 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept
579 #if defined(__SSE4_1__)
580 return _mm_mullo_epi32 (a, b);
582 __m128i even = _mm_mul_epu32 (a,b);
583 __m128i odd = _mm_mul_epu32 (_mm_srli_si128 (a,4), _mm_srli_si128 (b,4));
584 return _mm_unpacklo_epi32 (_mm_shuffle_epi32(even, _MM_SHUFFLE (0,0,2,0)),
585 _mm_shuffle_epi32(odd, _MM_SHUFFLE (0,0,2,0)));
589 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept
591 #if defined(__SSE4_1__)
592 return _mm_min_epi32 (a, b);
594 __m128i lt = greaterThan (b, a);
595 return bit_or (bit_and (lt, a), bit_andnot (lt, b));
599 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept
601 #if defined(__SSE4_1__)
602 return _mm_max_epi32 (a, b);
604 __m128i gt = greaterThan (a, b);
605 return bit_or (bit_and (gt, a), bit_andnot (gt, b));
616struct SIMDNativeOps<int64_t>
619 using vSIMDType = __m128i;
622 DECLARE_SSE_SIMD_CONST (int64_t, kAllBitsSet);
624 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int64_t* a)
noexcept {
return load (a); }
625 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int64_t s)
noexcept {
return _mm_set1_epi64x (s); }
626 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int64_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
627 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int64_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
628 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi64 (a, b); }
629 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi64 (a, b); }
630 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
631 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
632 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
633 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
634 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
635 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
636 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
637 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
638 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
639 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
640 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
641 static forcedinline int64_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<int64_t, __m128i>::get (v, i); }
642 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, int64_t s)
noexcept {
return SIMDFallbackOps<int64_t, __m128i>::set (v, i, s); }
643 static forcedinline int64_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept {
return SIMDFallbackOps<int64_t, __m128i>::sum (a); }
644 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept {
return SIMDFallbackOps<int64_t, __m128i>::mul (a, b); }
645 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
647 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept
649 #if defined(__SSE4_1__)
650 return _mm_cmpeq_epi64 (a, b);
652 __m128i bitmask = _mm_cmpeq_epi32 (a, b);
653 bitmask = _mm_and_si128 (bitmask, _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 3, 0, 1)));
654 return _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 2, 0, 0));
658 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept
660 #if defined(__SSE4_2__)
661 return _mm_cmpgt_epi64 (a, b);
663 return SIMDFallbackOps<int64_t, __m128i>::greaterThan (a, b);
674struct SIMDNativeOps<uint64_t>
677 using vSIMDType = __m128i;
680 DECLARE_SSE_SIMD_CONST (uint64_t, kAllBitsSet);
681 DECLARE_SSE_SIMD_CONST (uint64_t, kHighBit);
683 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint64_t* a)
noexcept {
return load (a); }
684 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint64_t s)
noexcept {
return _mm_set1_epi64x ((int64_t) s); }
685 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
686 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint64_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
687 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint64_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
688 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi64 (a, b); }
689 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi64 (a, b); }
690 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
691 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
692 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
693 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
694 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
695 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
696 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
697 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
698 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
699 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
700 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
701 static forcedinline uint64_t JUCE_VECTOR_CALLTYPE get (__m128i v,
size_t i)
noexcept {
return SIMDFallbackOps<uint64_t, __m128i>::get (v, i); }
702 static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v,
size_t i, uint64_t s)
noexcept {
return SIMDFallbackOps<uint64_t, __m128i>::set (v, i, s); }
703 static forcedinline uint64_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept {
return SIMDFallbackOps<uint64_t, __m128i>::sum (a); }
704 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept {
return SIMDFallbackOps<uint64_t, __m128i>::mul (a, b); }
705 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
707 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept
709 #if defined(__SSE4_1__)
710 return _mm_cmpeq_epi64 (a, b);
712 __m128i bitmask = _mm_cmpeq_epi32 (a, b);
713 bitmask = _mm_and_si128 (bitmask, _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 3, 0, 1)));
714 return _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 2, 0, 0));
718 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept
720 #if defined(__SSE4_2__)
721 return _mm_cmpgt_epi64 (ssign (a), ssign (b));
723 return SIMDFallbackOps<uint64_t, __m128i>::greaterThan (a, b);
730#if JUCE_GCC && (__GNUC__ >= 6)
731 #pragma GCC diagnostic pop