OpenShot Audio Library | OpenShotAudio
0.3.2
Loading...
Searching...
No Matches
juce_avx_SIMDNativeOps.h
1
/*
2
==============================================================================
3
4
This file is part of the JUCE library.
5
Copyright (c) 2017 - ROLI Ltd.
6
7
JUCE is an open source library subject to commercial or open-source
8
licensing.
9
10
By using JUCE, you agree to the terms of both the JUCE 5 End-User License
11
Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
12
27th April 2017).
13
14
End User License Agreement: www.juce.com/juce-5-licence
15
Privacy Policy: www.juce.com/juce-5-privacy-policy
16
17
Or: You may also use this code under the terms of the GPL v3 (see
18
www.gnu.org/licenses).
19
20
JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
21
EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
22
DISCLAIMED.
23
24
==============================================================================
25
*/
26
27
namespace
juce
28
{
29
namespace
dsp
30
{
31
32
#ifndef DOXYGEN
33
34
#if JUCE_GCC && (__GNUC__ >= 6)
35
#pragma GCC diagnostic push
36
#pragma GCC diagnostic ignored "-Wignored-attributes"
37
#endif
38
39
#ifdef _MSC_VER
40
#define DECLARE_AVX_SIMD_CONST(type, name) \
41
static __declspec(align(32)) const type name[32 / sizeof (type)]
42
43
#define DEFINE_AVX_SIMD_CONST(type, class_type, name) \
44
__declspec(align(32)) const type SIMDNativeOps<class_type>:: name[32 / sizeof (type)]
45
46
#else
47
#define DECLARE_AVX_SIMD_CONST(type, name) \
48
static const type name[32 / sizeof (type)] __attribute__((aligned(32)))
49
50
#define DEFINE_AVX_SIMD_CONST(type, class_type, name) \
51
const type SIMDNativeOps<class_type>:: name[32 / sizeof (type)] __attribute__((aligned(32)))
52
53
#endif
54
55
template
<
typename
type>
56
struct
SIMDNativeOps;
57
58
//==============================================================================
63
template
<>
64
struct
SIMDNativeOps<
float
>
65
{
66
using
vSIMDType
=
__m256
;
67
68
//==============================================================================
69
DECLARE_AVX_SIMD_CONST (
int32_t
,
kAllBitsSet
);
70
DECLARE_AVX_SIMD_CONST (
int32_t
,
kEvenHighBit
);
71
DECLARE_AVX_SIMD_CONST (
float
,
kOne
);
72
73
//==============================================================================
74
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
vconst (
const
float
*
a
)
noexcept
{
return
load (
a
); }
75
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
vconst (
const
int32_t
*
a
)
noexcept
{
return
_mm256_castsi256_ps
(
_mm256_load_si256
(
reinterpret_cast <
const
__m256i
*
>
(
a
))); }
76
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
expand (
float
s)
noexcept
{
return
_mm256_broadcast_ss
(&s); }
77
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
load (
const
float
*
a
)
noexcept
{
return
_mm256_load_ps
(
a
); }
78
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256
value,
float
* dest)
noexcept
{
_mm256_store_ps
(dest, value); }
79
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
add (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_add_ps
(
a
,
b
); }
80
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
sub (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_sub_ps
(
a
,
b
); }
81
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
mul (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_mul_ps
(
a
,
b
); }
82
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
bit_and (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_and_ps
(
a
,
b
); }
83
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
bit_or (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_or_ps
(
a
,
b
); }
84
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_xor_ps
(
a
,
b
); }
85
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
bit_notand (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_andnot_ps
(
a
,
b
); }
86
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
bit_not (
__m256
a
)
noexcept
{
return
bit_notand (
a
, vconst (
kAllBitsSet
)); }
87
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
min (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_min_ps
(
a
,
b
); }
88
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
max (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_max_ps
(
a
,
b
); }
89
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
equal (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_cmp_ps
(
a
,
b
,
_CMP_EQ_OQ
); }
90
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
notEqual (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_cmp_ps
(
a
,
b
,
_CMP_NEQ_OQ
); }
91
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_cmp_ps
(
a
,
b
,
_CMP_GT_OQ
); }
92
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256
a
,
__m256
b
)
noexcept
{
return
_mm256_cmp_ps
(
a
,
b
,
_CMP_GE_OQ
); }
93
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256
a
,
__m256
b
)
noexcept
{
return
(
_mm256_movemask_ps
(equal (
a
,
b
)) == 0xff); }
94
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
dupeven (
__m256
a
)
noexcept
{
return
_mm256_shuffle_ps
(
a
,
a
,
_MM_SHUFFLE
(2, 2, 0, 0)); }
95
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
dupodd (
__m256
a
)
noexcept
{
return
_mm256_shuffle_ps
(
a
,
a
,
_MM_SHUFFLE
(3, 3, 1, 1)); }
96
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
swapevenodd (
__m256
a
)
noexcept
{
return
_mm256_shuffle_ps
(
a
,
a
,
_MM_SHUFFLE
(2, 3, 0, 1)); }
97
static
forcedinline
float
JUCE_VECTOR_CALLTYPE
get (
__m256
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<float, __m256>::get
(v, i); }
98
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
set (
__m256
v,
size_t
i,
float
s)
noexcept
{
return
SIMDFallbackOps<float, __m256>::set
(v, i, s); }
99
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
truncate (
__m256
a
)
noexcept
{
return
_mm256_cvtepi32_ps
(
_mm256_cvttps_epi32
(
a
)); }
100
101
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256
a
,
__m256
b
,
__m256
c
)
noexcept
102
{
103
#if __FMA__
104
return
_mm256_fmadd_ps
(
b
,
c
,
a
);
105
#else
106
return
add (
a
, mul (
b
,
c
));
107
#endif
108
}
109
110
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
oddevensum (
__m256
a
)
noexcept
111
{
112
a
=
_mm256_add_ps
(
_mm256_shuffle_ps
(
a
,
a
,
_MM_SHUFFLE
(1, 0, 3, 2)),
a
);
113
return
add (
_mm256_permute2f128_ps
(
a
,
a
, 1),
a
);
114
}
115
116
//==============================================================================
117
static
forcedinline
__m256
JUCE_VECTOR_CALLTYPE
cmplxmul (
__m256
a
,
__m256
b
)
noexcept
118
{
119
__m256
rr_ir
= mul (
a
, dupeven (
b
));
120
__m256
ii_ri
= mul (swapevenodd (
a
), dupodd (
b
));
121
return
add (
rr_ir
, bit_xor (
ii_ri
, vconst (
kEvenHighBit
)));
122
}
123
124
static
forcedinline
float
JUCE_VECTOR_CALLTYPE
sum (
__m256
a
)
noexcept
125
{
126
__m256
retval
=
_mm256_dp_ps
(
a
, vconst (
kOne
), 0xff);
127
__m256
tmp
=
_mm256_permute2f128_ps
(
retval
,
retval
, 1);
128
retval
=
_mm256_add_ps
(
retval
,
tmp
);
129
130
#if JUCE_GCC
131
return
retval
[0];
132
#else
133
return
_mm256_cvtss_f32
(
retval
);
134
#endif
135
}
136
};
137
138
//==============================================================================
143
template
<>
144
struct
SIMDNativeOps<
double
>
145
{
146
using
vSIMDType
=
__m256d
;
147
148
//==============================================================================
149
DECLARE_AVX_SIMD_CONST (
int64_t
,
kAllBitsSet
);
150
DECLARE_AVX_SIMD_CONST (
int64_t
,
kEvenHighBit
);
151
DECLARE_AVX_SIMD_CONST (
double
,
kOne
);
152
153
//==============================================================================
154
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
vconst (
const
double
*
a
)
noexcept
{
return
load (
a
); }
155
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
vconst (
const
int64_t
*
a
)
noexcept
{
return
_mm256_castsi256_pd
(
_mm256_load_si256
(
reinterpret_cast <
const
__m256i
*
>
(
a
))); }
156
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
expand (
double
s)
noexcept
{
return
_mm256_broadcast_sd
(&s); }
157
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
load (
const
double
*
a
)
noexcept
{
return
_mm256_load_pd
(
a
); }
158
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256d
value,
double
* dest)
noexcept
{
_mm256_store_pd
(dest, value); }
159
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
add (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_add_pd
(
a
,
b
); }
160
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
sub (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_sub_pd
(
a
,
b
); }
161
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
mul (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_mul_pd
(
a
,
b
); }
162
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
bit_and (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_and_pd
(
a
,
b
); }
163
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
bit_or (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_or_pd
(
a
,
b
); }
164
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_xor_pd
(
a
,
b
); }
165
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
bit_notand (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_andnot_pd
(
a
,
b
); }
166
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
bit_not (
__m256d
a
)
noexcept
{
return
bit_notand (
a
, vconst (
kAllBitsSet
)); }
167
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
min (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_min_pd
(
a
,
b
); }
168
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
max (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_max_pd
(
a
,
b
); }
169
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
equal (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_cmp_pd
(
a
,
b
,
_CMP_EQ_OQ
); }
170
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
notEqual (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_cmp_pd
(
a
,
b
,
_CMP_NEQ_OQ
); }
171
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_cmp_pd
(
a
,
b
,
_CMP_GT_OQ
); }
172
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256d
a
,
__m256d
b
)
noexcept
{
return
_mm256_cmp_pd
(
a
,
b
,
_CMP_GE_OQ
); }
173
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256d
a
,
__m256d
b
)
noexcept
{
return
(
_mm256_movemask_pd
(equal (
a
,
b
)) == 0xf); }
174
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256d
a
,
__m256d
b
,
__m256d
c
)
noexcept
{
return
_mm256_add_pd
(
a
,
_mm256_mul_pd
(
b
,
c
)); }
175
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
dupeven (
__m256d
a
)
noexcept
{
return
_mm256_shuffle_pd
(
a
,
a
, 0); }
176
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
dupodd (
__m256d
a
)
noexcept
{
return
_mm256_shuffle_pd
(
a
,
a
, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)); }
177
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
swapevenodd (
__m256d
a
)
noexcept
{
return
_mm256_shuffle_pd
(
a
,
a
, (1 << 0) | (0 << 1) | (1 << 2) | (0 << 3)); }
178
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
oddevensum (
__m256d
a
)
noexcept
{
return
_mm256_add_pd
(
_mm256_permute2f128_pd
(
a
,
a
, 1),
a
); }
179
static
forcedinline
double
JUCE_VECTOR_CALLTYPE
get (
__m256d
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<double, __m256d>::get
(v, i); }
180
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
set (
__m256d
v,
size_t
i,
double
s)
noexcept
{
return
SIMDFallbackOps<double, __m256d>::set
(v, i, s); }
181
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
truncate (
__m256d
a
)
noexcept
{
return
_mm256_cvtepi32_pd
(
_mm256_cvttpd_epi32
(
a
)); }
182
183
//==============================================================================
184
static
forcedinline
__m256d
JUCE_VECTOR_CALLTYPE
cmplxmul (
__m256d
a
,
__m256d
b
)
noexcept
185
{
186
__m256d
rr_ir
= mul (
a
, dupeven (
b
));
187
__m256d
ii_ri
= mul (swapevenodd (
a
), dupodd (
b
));
188
return
add (
rr_ir
, bit_xor (
ii_ri
, vconst (
kEvenHighBit
)));
189
}
190
191
static
forcedinline
double
JUCE_VECTOR_CALLTYPE
sum (
__m256d
a
)
noexcept
192
{
193
__m256d
retval
=
_mm256_hadd_pd
(
a
,
a
);
194
__m256d
tmp
=
_mm256_permute2f128_pd
(
retval
,
retval
, 1);
195
retval
=
_mm256_add_pd
(
retval
,
tmp
);
196
197
#if JUCE_GCC
198
return
retval
[0];
199
#else
200
return
_mm256_cvtsd_f64
(
retval
);
201
#endif
202
}
203
};
204
205
//==============================================================================
210
template
<>
211
struct
SIMDNativeOps<
int8_t
>
212
{
213
using
vSIMDType
=
__m256i
;
214
215
//==============================================================================
216
DECLARE_AVX_SIMD_CONST (
int8_t
,
kAllBitsSet
);
217
218
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
int8_t
s)
noexcept
{
return
_mm256_set1_epi8
(s); }
219
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
int8_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
220
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
int8_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
221
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi8
(
a
,
b
); }
222
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi8
(
a
,
b
); }
223
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
224
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
225
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
226
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
227
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
228
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_min_epi8
(
a
,
b
); }
229
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_max_epi8
(
a
,
b
); }
230
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi8
(
a
,
b
); }
231
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi8
(
a
,
b
); }
232
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
233
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1; }
234
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
235
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
236
static
forcedinline
int8_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<int8_t, __m256i>::get
(v, i); }
237
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
int8_t
s)
noexcept
{
return
SIMDFallbackOps<int8_t, __m256i>::set
(v, i, s); }
238
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
239
240
//==============================================================================
241
static
forcedinline
int8_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
242
{
243
__m256i
lo
=
_mm256_unpacklo_epi8
(
a
,
_mm256_setzero_si256
());
244
__m256i
hi
=
_mm256_unpackhi_epi8
(
a
,
_mm256_setzero_si256
());
245
246
for
(
int
i = 0; i < 3; ++i)
247
{
248
lo
=
_mm256_hadd_epi16
(
lo
,
lo
);
249
hi
=
_mm256_hadd_epi16
(
hi
,
hi
);
250
}
251
252
#if JUCE_GCC
253
return
(
int8_t
) ((
lo
[0] & 0xff) +
254
(
hi
[0] & 0xff) +
255
(
lo
[2] & 0xff) +
256
(
hi
[2] & 0xff));
257
#else
258
constexpr
int
mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
259
260
return
(
int8_t
) ((
_mm256_cvtsi256_si32
(
lo
) & 0xff) +
261
(
_mm256_cvtsi256_si32
(
hi
) & 0xff) +
262
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
lo
, mask)) & 0xff) +
263
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
hi
, mask)) & 0xff));
264
#endif
265
}
266
267
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
268
{
269
// unpack and multiply
270
__m256i
even
=
_mm256_mullo_epi16
(
a
,
b
);
271
__m256i
odd
=
_mm256_mullo_epi16
(
_mm256_srli_epi16
(
a
, 8),
_mm256_srli_epi16
(
b
, 8));
272
273
return
_mm256_or_si256
(
_mm256_slli_epi16
(
odd
, 8),
274
_mm256_srli_epi16
(
_mm256_slli_epi16
(
even
, 8), 8));
275
}
276
};
277
278
//==============================================================================
283
template
<>
284
struct
SIMDNativeOps<
uint8_t
>
285
{
286
//==============================================================================
287
using
vSIMDType
=
__m256i
;
288
289
//==============================================================================
290
DECLARE_AVX_SIMD_CONST (
uint8_t
,
kHighBit
);
291
DECLARE_AVX_SIMD_CONST (
uint8_t
,
kAllBitsSet
);
292
293
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
ssign (
__m256i
a
)
noexcept
{
return
_mm256_xor_si256
(
a
, load (
kHighBit
)); }
294
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
uint8_t
s)
noexcept
{
return
_mm256_set1_epi8
((
int8_t
) s); }
295
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
uint8_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
296
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
uint8_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
297
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi8
(
a
,
b
); }
298
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi8
(
a
,
b
); }
299
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
300
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
301
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
302
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
303
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
304
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_min_epu8
(
a
,
b
); }
305
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_max_epu8
(
a
,
b
); }
306
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi8
(
a
,
b
); }
307
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi8
(ssign (
a
), ssign (
b
)); }
308
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
309
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
310
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
311
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
312
static
forcedinline
uint8_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<uint8_t, __m256i>::get
(v, i); }
313
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
uint8_t
s)
noexcept
{
return
SIMDFallbackOps<uint8_t, __m256i>::set
(v, i, s); }
314
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
315
316
//==============================================================================
317
static
forcedinline
uint8_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
318
{
319
__m256i
lo
=
_mm256_unpacklo_epi8
(
a
,
_mm256_setzero_si256
());
320
__m256i
hi
=
_mm256_unpackhi_epi8
(
a
,
_mm256_setzero_si256
());
321
322
for
(
int
i = 0; i < 3; ++i)
323
{
324
lo
=
_mm256_hadd_epi16
(
lo
,
lo
);
325
hi
=
_mm256_hadd_epi16
(
hi
,
hi
);
326
}
327
328
#if JUCE_GCC
329
return
(
uint8_t
) ((
static_cast<
uint32_t
>
(
lo
[0]) & 0xffu) +
330
(
static_cast<
uint32_t
>
(
hi
[0]) & 0xffu) +
331
(
static_cast<
uint32_t
>
(
lo
[2]) & 0xffu) +
332
(
static_cast<
uint32_t
>
(
hi
[2]) & 0xffu));
333
#else
334
constexpr
int
mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
335
336
return
(
uint8_t
) ((
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
lo
)) & 0xffu) +
337
(
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
hi
)) & 0xffu) +
338
(
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
lo
, mask))) & 0xffu) +
339
(
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
hi
, mask))) & 0xffu));
340
#endif
341
}
342
343
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
344
{
345
// unpack and multiply
346
__m256i
even
=
_mm256_mullo_epi16
(
a
,
b
);
347
__m256i
odd
=
_mm256_mullo_epi16
(
_mm256_srli_epi16
(
a
, 8),
_mm256_srli_epi16
(
b
, 8));
348
349
return
_mm256_or_si256
(
_mm256_slli_epi16
(
odd
, 8),
350
_mm256_srli_epi16
(
_mm256_slli_epi16
(
even
, 8), 8));
351
}
352
};
353
354
//==============================================================================
359
template
<>
360
struct
SIMDNativeOps<
int16_t
>
361
{
362
//==============================================================================
363
using
vSIMDType
=
__m256i
;
364
365
//==============================================================================
366
DECLARE_AVX_SIMD_CONST (
int16_t
,
kAllBitsSet
);
367
368
//==============================================================================
369
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
int16_t
s)
noexcept
{
return
_mm256_set1_epi16
(s); }
370
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
int16_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
371
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
int16_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
372
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi16
(
a
,
b
); }
373
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi16
(
a
,
b
); }
374
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_mullo_epi16
(
a
,
b
); }
375
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
376
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
377
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
378
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
379
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
380
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_min_epi16
(
a
,
b
); }
381
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_max_epi16
(
a
,
b
); }
382
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi16
(
a
,
b
); }
383
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi16
(
a
,
b
); }
384
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
385
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
386
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
387
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
388
static
forcedinline
int16_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<int16_t, __m256i>::get
(v, i); }
389
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
int16_t
s)
noexcept
{
return
SIMDFallbackOps<int16_t, __m256i>::set
(v, i, s); }
390
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
391
392
//==============================================================================
393
static
forcedinline
int16_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
394
{
395
__m256i
tmp
=
_mm256_hadd_epi16
(
a
,
a
);
396
tmp
=
_mm256_hadd_epi16
(
tmp
,
tmp
);
397
tmp
=
_mm256_hadd_epi16
(
tmp
,
tmp
);
398
399
#if JUCE_GCC
400
return
(
int16_t
) ((
tmp
[0] & 0xffff) + (
tmp
[2] & 0xffff));
401
#else
402
constexpr
int
mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
403
404
return
(
int16_t
) ((
_mm256_cvtsi256_si32
(
tmp
) & 0xffff) +
405
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
tmp
, mask)) & 0xffff));
406
#endif
407
}
408
};
409
410
//==============================================================================
415
template
<>
416
struct
SIMDNativeOps<
uint16_t
>
417
{
418
//==============================================================================
419
using
vSIMDType
=
__m256i
;
420
421
//==============================================================================
422
DECLARE_AVX_SIMD_CONST (
uint16_t
,
kHighBit
);
423
DECLARE_AVX_SIMD_CONST (
uint16_t
,
kAllBitsSet
);
424
425
//==============================================================================
426
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
ssign (
__m256i
a
)
noexcept
{
return
_mm256_xor_si256
(
a
, load (
kHighBit
)); }
427
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
uint16_t
s)
noexcept
{
return
_mm256_set1_epi16
((
int16_t
) s); }
428
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
uint16_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
429
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
uint16_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
430
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi16
(
a
,
b
); }
431
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi16
(
a
,
b
); }
432
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_mullo_epi16
(
a
,
b
); }
433
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
434
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
435
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
436
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
437
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
438
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_min_epu16
(
a
,
b
); }
439
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_max_epu16
(
a
,
b
); }
440
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi16
(
a
,
b
); }
441
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi16
(ssign (
a
), ssign (
b
)); }
442
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
443
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
444
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
445
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
446
static
forcedinline
uint16_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<uint16_t, __m256i>::get
(v, i); }
447
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
uint16_t
s)
noexcept
{
return
SIMDFallbackOps<uint16_t, __m256i>::set
(v, i, s); }
448
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
449
450
//==============================================================================
451
static
forcedinline
uint16_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
452
{
453
__m256i
tmp
=
_mm256_hadd_epi16
(
a
,
a
);
454
tmp
=
_mm256_hadd_epi16
(
tmp
,
tmp
);
455
tmp
=
_mm256_hadd_epi16
(
tmp
,
tmp
);
456
457
#if JUCE_GCC
458
return
(
uint16_t
) ((
static_cast<
uint32_t
>
(
tmp
[0]) & 0xffffu) +
459
(
static_cast<
uint32_t
>
(
tmp
[2]) & 0xffffu));
460
#else
461
constexpr
int
mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
462
463
return
(
uint16_t
) ((
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
tmp
)) & 0xffffu) +
464
(
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
tmp
, mask))) & 0xffffu));
465
#endif
466
}
467
};
468
469
//==============================================================================
474
template
<>
475
struct
SIMDNativeOps<
int32_t
>
476
{
477
//==============================================================================
478
using
vSIMDType
=
__m256i
;
479
480
//==============================================================================
481
DECLARE_AVX_SIMD_CONST (
int32_t
,
kAllBitsSet
);
482
483
//==============================================================================
484
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
int32_t
s)
noexcept
{
return
_mm256_set1_epi32
(s); }
485
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
int32_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
486
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
int32_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
487
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi32
(
a
,
b
); }
488
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi32
(
a
,
b
); }
489
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_mullo_epi32
(
a
,
b
); }
490
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
491
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
492
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
493
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
494
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
495
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_min_epi32
(
a
,
b
); }
496
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_max_epi32
(
a
,
b
); }
497
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi32
(
a
,
b
); }
498
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi32
(
a
,
b
); }
499
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
500
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
501
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
502
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
503
static
forcedinline
int32_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<int32_t, __m256i>::get
(v, i); }
504
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
int32_t
s)
noexcept
{
return
SIMDFallbackOps<int32_t, __m256i>::set
(v, i, s); }
505
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
506
507
//==============================================================================
508
static
forcedinline
int32_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
509
{
510
__m256i
tmp
=
_mm256_hadd_epi32
(
a
,
a
);
511
tmp
=
_mm256_hadd_epi32
(
tmp
,
tmp
);
512
513
#if JUCE_GCC
514
return
(
int32_t
) (
tmp
[0] +
tmp
[2]);
515
#else
516
constexpr
int
mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
517
518
return
_mm256_cvtsi256_si32
(
tmp
) +
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
tmp
, mask));
519
#endif
520
}
521
};
522
523
//==============================================================================
528
template
<>
529
struct
SIMDNativeOps<
uint32_t
>
530
{
531
//==============================================================================
532
using
vSIMDType
=
__m256i
;
533
534
//==============================================================================
535
DECLARE_AVX_SIMD_CONST (
uint32_t
,
kAllBitsSet
);
536
DECLARE_AVX_SIMD_CONST (
uint32_t
,
kHighBit
);
537
538
//==============================================================================
539
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
ssign (
__m256i
a
)
noexcept
{
return
_mm256_xor_si256
(
a
, load (
kHighBit
)); }
540
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
uint32_t
s)
noexcept
{
return
_mm256_set1_epi32
((
int32_t
) s); }
541
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
uint32_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
542
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
uint32_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
543
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi32
(
a
,
b
); }
544
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi32
(
a
,
b
); }
545
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_mullo_epi32
(
a
,
b
); }
546
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
547
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
548
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
549
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
550
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
551
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_min_epu32
(
a
,
b
); }
552
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_max_epu32
(
a
,
b
); }
553
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi32
(
a
,
b
); }
554
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi32
(ssign (
a
), ssign (
b
)); }
555
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
556
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
557
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
558
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
559
static
forcedinline
uint32_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<uint32_t, __m256i>::get
(v, i); }
560
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
uint32_t
s)
noexcept
{
return
SIMDFallbackOps<uint32_t, __m256i>::set
(v, i, s); }
561
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
562
563
//==============================================================================
564
static
forcedinline
uint32_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
565
{
566
__m256i
tmp
=
_mm256_hadd_epi32
(
a
,
a
);
567
tmp
=
_mm256_hadd_epi32
(
tmp
,
tmp
);
568
569
#if JUCE_GCC
570
return
static_cast<
uint32_t
>
(
tmp
[0]) +
static_cast<
uint32_t
>
(
tmp
[2]);
571
#else
572
constexpr
int
mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
573
574
return
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
tmp
))
575
+
static_cast<
uint32_t
>
(
_mm256_cvtsi256_si32
(
_mm256_permute4x64_epi64
(
tmp
, mask)));
576
#endif
577
}
578
};
579
580
//==============================================================================
585
template
<>
586
struct
SIMDNativeOps<
int64_t
>
587
{
588
//==============================================================================
589
using
vSIMDType
=
__m256i
;
590
591
//==============================================================================
592
DECLARE_AVX_SIMD_CONST (
int64_t
,
kAllBitsSet
);
593
594
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
int64_t
s)
noexcept
{
return
_mm256_set1_epi64x
((
int64_t
) s); }
595
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
int64_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
596
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
int64_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
597
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi64
(
a
,
b
); }
598
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi64
(
a
,
b
); }
599
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
600
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
601
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
602
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
603
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
604
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
__m256i
lt
= greaterThan (
b
,
a
);
return
bit_or (bit_and (
lt
,
a
), bit_andnot (
lt
,
b
)); }
605
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
__m256i
gt
= greaterThan (
a
,
b
);
return
bit_or (bit_and (
gt
,
a
), bit_andnot (
gt
,
b
)); }
606
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi64
(
a
,
b
); }
607
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi64
(
a
,
b
); }
608
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
609
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
610
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
611
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
612
static
forcedinline
int64_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<int64_t, __m256i>::get
(v, i); }
613
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
int64_t
s)
noexcept
{
return
SIMDFallbackOps<int64_t, __m256i>::set
(v, i, s); }
614
static
forcedinline
int64_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
{
return
SIMDFallbackOps<int64_t, __m256i>::sum
(
a
); }
615
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
noexcept
{
return
SIMDFallbackOps<int64_t, __m256i>::mul
(
a
,
b
); }
616
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
617
};
618
619
//==============================================================================
624
template
<>
625
struct
SIMDNativeOps<
uint64_t
>
626
{
627
//==============================================================================
628
using
vSIMDType
=
__m256i
;
629
630
//==============================================================================
631
DECLARE_AVX_SIMD_CONST (
uint64_t
,
kAllBitsSet
);
632
DECLARE_AVX_SIMD_CONST (
uint64_t
,
kHighBit
);
633
634
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
expand (
uint64_t
s)
noexcept
{
return
_mm256_set1_epi64x
((
int64_t
) s); }
635
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
load (
const
uint64_t
* p)
noexcept
{
return
_mm256_load_si256
(
reinterpret_cast<
const
__m256i
*
>
(p)); }
636
static
forcedinline
void
JUCE_VECTOR_CALLTYPE
store (
__m256i
value,
uint64_t
* dest)
noexcept
{
_mm256_store_si256
(
reinterpret_cast<
__m256i
*
>
(dest), value); }
637
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
ssign (
__m256i
a
)
noexcept
{
return
_mm256_xor_si256
(
a
, load (
kHighBit
)); }
638
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
add (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_add_epi64
(
a
,
b
); }
639
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
sub (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_sub_epi64
(
a
,
b
); }
640
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_and (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_and_si256
(
a
,
b
); }
641
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_or (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_or_si256
(
a
,
b
); }
642
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_xor (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_xor_si256
(
a
,
b
); }
643
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_andnot (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_andnot_si256
(
a
,
b
); }
644
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
bit_not (
__m256i
a
)
noexcept
{
return
_mm256_andnot_si256
(
a
, load (
kAllBitsSet
)); }
645
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
min (
__m256i
a
,
__m256i
b
)
noexcept
{
__m256i
lt
= greaterThan (
b
,
a
);
return
bit_or (bit_and (
lt
,
a
), bit_andnot (
lt
,
b
)); }
646
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
max (
__m256i
a
,
__m256i
b
)
noexcept
{
__m256i
gt
= greaterThan (
a
,
b
);
return
bit_or (bit_and (
gt
,
a
), bit_andnot (
gt
,
b
)); }
647
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
equal (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpeq_epi64
(
a
,
b
); }
648
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThan (
__m256i
a
,
__m256i
b
)
noexcept
{
return
_mm256_cmpgt_epi64
(ssign (
a
), ssign (
b
)); }
649
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
greaterThanOrEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_or (greaterThan (
a
,
b
), equal (
a
,
b
)); }
650
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
multiplyAdd (
__m256i
a
,
__m256i
b
,
__m256i
c
)
noexcept
{
return
add (
a
, mul (
b
,
c
)); }
651
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
notEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
bit_not (equal (
a
,
b
)); }
652
static
forcedinline
bool
JUCE_VECTOR_CALLTYPE
allEqual (
__m256i
a
,
__m256i
b
)
noexcept
{
return
(
_mm256_movemask_epi8
(equal (
a
,
b
)) == -1); }
653
static
forcedinline
uint64_t
JUCE_VECTOR_CALLTYPE
get (
__m256i
v,
size_t
i)
noexcept
{
return
SIMDFallbackOps<uint64_t, __m256i>::get
(v, i); }
654
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
set (
__m256i
v,
size_t
i,
uint64_t
s)
noexcept
{
return
SIMDFallbackOps<uint64_t, __m256i>::set
(v, i, s); }
655
static
forcedinline
uint64_t
JUCE_VECTOR_CALLTYPE
sum (
__m256i
a
)
noexcept
{
return
SIMDFallbackOps<uint64_t, __m256i>::sum
(
a
); }
656
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
mul (
__m256i
a
,
__m256i
b
)
noexcept
{
return
SIMDFallbackOps<uint64_t, __m256i>::mul
(
a
,
b
); }
657
static
forcedinline
__m256i
JUCE_VECTOR_CALLTYPE
truncate (
__m256i
a
)
noexcept
{
return
a
; }
658
};
659
660
#endif
661
662
#if JUCE_GCC && (__GNUC__ >= 6)
663
#pragma GCC diagnostic pop
664
#endif
665
666
}
// namespace dsp
667
}
// namespace juce
juce::Array
Definition
juce_Array.h:56
juce::dsp::SIMDFallbackOps
Definition
juce_fallback_SIMDNativeOps.h:61
JuceLibraryCode
modules
juce_dsp
native
juce_avx_SIMDNativeOps.h
Generated by
1.9.8