OpenShot Audio Library | OpenShotAudio 0.3.2
Loading...
Searching...
No Matches
juce_FloatVectorOperations.cpp
1/*
2 ==============================================================================
3
4 This file is part of the JUCE library.
5 Copyright (c) 2017 - ROLI Ltd.
6
7 JUCE is an open source library subject to commercial or open-source
8 licensing.
9
10 The code included in this file is provided under the terms of the ISC license
11 http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12 To use, copy, modify, and/or distribute this software for any purpose with or
13 without fee is hereby granted provided that the above copyright notice and
14 this permission notice appear in all copies.
15
16 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18 DISCLAIMED.
19
20 ==============================================================================
21*/
22
23namespace juce
24{
25
26namespace FloatVectorHelpers
27{
28 #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
29 #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
30 #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
31
32 #if JUCE_USE_SSE_INTRINSICS
33 inline static bool isAligned (const void* p) noexcept
34 {
35 return (((pointer_sized_int) p) & 15) == 0;
36 }
37
38 struct BasicOps32
39 {
40 using Type = float;
41 using ParallelType = __m128;
42 using IntegerType = __m128;
43 enum { numParallel = 4 };
44
45 // Integer and parallel types are the same for SSE. On neon they have different types
46 static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
47 static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
48
49 static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
50 static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
51 static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
52 static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
53 static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
54
55 static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
56 static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
57 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
58 static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
59 static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
60
61 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
62 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
63 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
64 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
65
66 static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
67 static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
68 };
69
70 struct BasicOps64
71 {
72 using Type = double;
73 using ParallelType = __m128d;
74 using IntegerType = __m128d;
75 enum { numParallel = 2 };
76
77 // Integer and parallel types are the same for SSE. On neon they have different types
78 static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
79 static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
80
81 static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
82 static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
83 static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
84 static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
85 static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
86
87 static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
88 static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
89 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
90 static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
91 static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
92
93 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
94 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
95 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
96 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
97
98 static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
99 static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
100 };
101
102
103
104 #define JUCE_BEGIN_VEC_OP \
105 using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
106 { \
107 const int numLongOps = num / Mode::numParallel;
108
109 #define JUCE_FINISH_VEC_OP(normalOp) \
110 num &= (Mode::numParallel - 1); \
111 if (num == 0) return; \
112 } \
113 for (int i = 0; i < num; ++i) normalOp;
114
115 #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
116 JUCE_BEGIN_VEC_OP \
117 setupOp \
118 if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
119 else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
120 JUCE_FINISH_VEC_OP (normalOp)
121
122 #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
123 JUCE_BEGIN_VEC_OP \
124 setupOp \
125 if (FloatVectorHelpers::isAligned (dest)) \
126 { \
127 if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
128 else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
129 }\
130 else \
131 { \
132 if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
133 else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
134 } \
135 JUCE_FINISH_VEC_OP (normalOp)
136
137 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
138 JUCE_BEGIN_VEC_OP \
139 setupOp \
140 if (FloatVectorHelpers::isAligned (dest)) \
141 { \
142 if (FloatVectorHelpers::isAligned (src1)) \
143 { \
144 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
145 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
146 } \
147 else \
148 { \
149 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
150 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
151 } \
152 } \
153 else \
154 { \
155 if (FloatVectorHelpers::isAligned (src1)) \
156 { \
157 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
158 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
159 } \
160 else \
161 { \
162 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
163 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
164 } \
165 } \
166 JUCE_FINISH_VEC_OP (normalOp)
167
168 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
169 JUCE_BEGIN_VEC_OP \
170 setupOp \
171 if (FloatVectorHelpers::isAligned (dest)) \
172 { \
173 if (FloatVectorHelpers::isAligned (src1)) \
174 { \
175 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
176 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
177 } \
178 else \
179 { \
180 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
181 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
182 } \
183 } \
184 else \
185 { \
186 if (FloatVectorHelpers::isAligned (src1)) \
187 { \
188 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
189 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
190 } \
191 else \
192 { \
193 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
194 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
195 } \
196 } \
197 JUCE_FINISH_VEC_OP (normalOp)
198
199
200 //==============================================================================
201 #elif JUCE_USE_ARM_NEON
202
203 struct BasicOps32
204 {
205 using Type = float;
206 using ParallelType = float32x4_t;
207 using IntegerType = uint32x4_t;
208 union signMaskUnion { ParallelType f; IntegerType i; };
209 enum { numParallel = 4 };
210
211 static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
212 static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
213
214 static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
215 static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
216 static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
217 static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
218 static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
219
220 static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
221 static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
222 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
223 static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
224 static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
225
226 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
227 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
228 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
229 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
230
231 static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
232 static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
233 };
234
235 struct BasicOps64
236 {
237 using Type = double;
238 using ParallelType = double;
239 using IntegerType = uint64;
240 union signMaskUnion { ParallelType f; IntegerType i; };
241 enum { numParallel = 1 };
242
243 static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
244 static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
245
246 static forcedinline ParallelType load1 (Type v) noexcept { return v; }
247 static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
248 static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
249 static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
250 static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
251
252 static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
253 static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
254 static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
255 static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
256 static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
257
258 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
259 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
260 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
261 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
262
263 static forcedinline Type max (ParallelType a) noexcept { return a; }
264 static forcedinline Type min (ParallelType a) noexcept { return a; }
265 };
266
267 #define JUCE_BEGIN_VEC_OP \
268 using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
269 if (Mode::numParallel > 1) \
270 { \
271 const int numLongOps = num / Mode::numParallel;
272
273 #define JUCE_FINISH_VEC_OP(normalOp) \
274 num &= (Mode::numParallel - 1); \
275 if (num == 0) return; \
276 } \
277 for (int i = 0; i < num; ++i) normalOp;
278
279 #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
280 JUCE_BEGIN_VEC_OP \
281 setupOp \
282 JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
283 JUCE_FINISH_VEC_OP (normalOp)
284
285 #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
286 JUCE_BEGIN_VEC_OP \
287 setupOp \
288 JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
289 JUCE_FINISH_VEC_OP (normalOp)
290
291 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
292 JUCE_BEGIN_VEC_OP \
293 setupOp \
294 JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
295 JUCE_FINISH_VEC_OP (normalOp)
296
297 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
298 JUCE_BEGIN_VEC_OP \
299 setupOp \
300 JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
301 JUCE_FINISH_VEC_OP (normalOp)
302
303
304 //==============================================================================
305 #else
306 #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
307 for (int i = 0; i < num; ++i) normalOp;
308
309 #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
310 for (int i = 0; i < num; ++i) normalOp;
311
312 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
313 for (int i = 0; i < num; ++i) normalOp;
314
315 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
316 for (int i = 0; i < num; ++i) normalOp;
317
318 #endif
319
320 //==============================================================================
321 #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
322 for (int i = 0; i < numLongOps; ++i) \
323 { \
324 locals (srcLoad, dstLoad); \
325 dstStore (dest, vecOp); \
326 increment; \
327 }
328
329 #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
330 for (int i = 0; i < numLongOps; ++i) \
331 { \
332 locals (src1Load, src2Load); \
333 dstStore (dest, vecOp); \
334 increment; \
335 }
336
337 #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
338 for (int i = 0; i < numLongOps; ++i) \
339 { \
340 locals (src1Load, src2Load, dstLoad); \
341 dstStore (dest, vecOp); \
342 increment; \
343 }
344
345 #define JUCE_LOAD_NONE(srcLoad, dstLoad)
346 #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
347 #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
348 #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
349 #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
350 #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
351
352 union signMask32 { float f; uint32 i; };
353 union signMask64 { double d; uint64 i; };
354
355 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
356 template<int typeSize> struct ModeType { using Mode = BasicOps32; };
357 template<> struct ModeType<8> { using Mode = BasicOps64; };
358
359 template <typename Mode>
360 struct MinMax
361 {
362 using Type = typename Mode::Type;
363 using ParallelType = typename Mode::ParallelType;
364
365 static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
366 {
367 int numLongOps = num / Mode::numParallel;
368
369 if (numLongOps > 1)
370 {
371 ParallelType val;
372
373 #if ! JUCE_USE_ARM_NEON
374 if (isAligned (src))
375 {
376 val = Mode::loadA (src);
377
378 if (isMinimum)
379 {
380 while (--numLongOps > 0)
381 {
382 src += Mode::numParallel;
383 val = Mode::min (val, Mode::loadA (src));
384 }
385 }
386 else
387 {
388 while (--numLongOps > 0)
389 {
390 src += Mode::numParallel;
391 val = Mode::max (val, Mode::loadA (src));
392 }
393 }
394 }
395 else
396 #endif
397 {
398 val = Mode::loadU (src);
399
400 if (isMinimum)
401 {
402 while (--numLongOps > 0)
403 {
404 src += Mode::numParallel;
405 val = Mode::min (val, Mode::loadU (src));
406 }
407 }
408 else
409 {
410 while (--numLongOps > 0)
411 {
412 src += Mode::numParallel;
413 val = Mode::max (val, Mode::loadU (src));
414 }
415 }
416 }
417
418 Type result = isMinimum ? Mode::min (val)
419 : Mode::max (val);
420
421 num &= (Mode::numParallel - 1);
422 src += Mode::numParallel;
423
424 for (int i = 0; i < num; ++i)
425 result = isMinimum ? jmin (result, src[i])
426 : jmax (result, src[i]);
427
428 return result;
429 }
430
431 return isMinimum ? juce::findMinimum (src, num)
432 : juce::findMaximum (src, num);
433 }
434
435 static Range<Type> findMinAndMax (const Type* src, int num) noexcept
436 {
437 int numLongOps = num / Mode::numParallel;
438
439 if (numLongOps > 1)
440 {
441 ParallelType mn, mx;
442
443 #if ! JUCE_USE_ARM_NEON
444 if (isAligned (src))
445 {
446 mn = Mode::loadA (src);
447 mx = mn;
448
449 while (--numLongOps > 0)
450 {
451 src += Mode::numParallel;
452 const ParallelType v = Mode::loadA (src);
453 mn = Mode::min (mn, v);
454 mx = Mode::max (mx, v);
455 }
456 }
457 else
458 #endif
459 {
460 mn = Mode::loadU (src);
461 mx = mn;
462
463 while (--numLongOps > 0)
464 {
465 src += Mode::numParallel;
466 const ParallelType v = Mode::loadU (src);
467 mn = Mode::min (mn, v);
468 mx = Mode::max (mx, v);
469 }
470 }
471
472 Range<Type> result (Mode::min (mn),
473 Mode::max (mx));
474
475 num &= (Mode::numParallel - 1);
476 src += Mode::numParallel;
477
478 for (int i = 0; i < num; ++i)
479 result = result.getUnionWith (src[i]);
480
481 return result;
482 }
483
484 return Range<Type>::findMinAndMax (src, num);
485 }
486 };
487 #endif
488}
489
490//==============================================================================
491namespace
492{
493 #if JUCE_USE_VDSP_FRAMEWORK
494 // This casts away constness to account for slightly different vDSP function signatures
495 // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
496 template <typename ValueType>
497 ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
498 #endif
499}
500
501//==============================================================================
502void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
503{
504 #if JUCE_USE_VDSP_FRAMEWORK
505 vDSP_vclr (dest, 1, (size_t) num);
506 #else
507 zeromem (dest, (size_t) num * sizeof (float));
508 #endif
509}
510
511void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
512{
513 #if JUCE_USE_VDSP_FRAMEWORK
514 vDSP_vclrD (dest, 1, (size_t) num);
515 #else
516 zeromem (dest, (size_t) num * sizeof (double));
517 #endif
518}
519
520void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
521{
522 #if JUCE_USE_VDSP_FRAMEWORK
523 vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
524 #else
525 JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
526 const Mode::ParallelType val = Mode::load1 (valueToFill);)
527 #endif
528}
529
530void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
531{
532 #if JUCE_USE_VDSP_FRAMEWORK
533 vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
534 #else
535 JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
536 const Mode::ParallelType val = Mode::load1 (valueToFill);)
537 #endif
538}
539
540void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
541{
542 memcpy (dest, src, (size_t) num * sizeof (float));
543}
544
545void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
546{
547 memcpy (dest, src, (size_t) num * sizeof (double));
548}
549
550void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
551{
552 #if JUCE_USE_VDSP_FRAMEWORK
553 vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
554 #else
555 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
556 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
557 const Mode::ParallelType mult = Mode::load1 (multiplier);)
558 #endif
559}
560
561void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
562{
563 #if JUCE_USE_VDSP_FRAMEWORK
564 vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
565 #else
566 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
567 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
568 const Mode::ParallelType mult = Mode::load1 (multiplier);)
569 #endif
570}
571
572void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
573{
574 #if JUCE_USE_VDSP_FRAMEWORK
575 vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
576 #else
577 JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
578 const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
579 #endif
580}
581
582void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
583{
584 JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
585 const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
586}
587
588void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
589{
590 #if JUCE_USE_VDSP_FRAMEWORK
592 #else
593 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
594 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
595 const Mode::ParallelType am = Mode::load1 (amount);)
596 #endif
597}
598
599void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
600{
601 #if JUCE_USE_VDSP_FRAMEWORK
603 #else
604 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
605 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
606 const Mode::ParallelType am = Mode::load1 (amount);)
607 #endif
608}
609
610void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
611{
612 #if JUCE_USE_VDSP_FRAMEWORK
613 vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
614 #else
615 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
616 #endif
617}
618
619void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
620{
621 #if JUCE_USE_VDSP_FRAMEWORK
622 vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
623 #else
624 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
625 #endif
626}
627
628void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
629{
630 #if JUCE_USE_VDSP_FRAMEWORK
631 vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
632 #else
633 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
634 #endif
635}
636
637void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
638{
639 #if JUCE_USE_VDSP_FRAMEWORK
640 vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
641 #else
642 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
643 #endif
644}
645
646void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
647{
648 #if JUCE_USE_VDSP_FRAMEWORK
649 vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
650 #else
651 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
652 #endif
653}
654
655void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
656{
657 #if JUCE_USE_VDSP_FRAMEWORK
658 vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
659 #else
660 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
661 #endif
662}
663
664void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
665{
666 #if JUCE_USE_VDSP_FRAMEWORK
667 vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
668 #else
669 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
670 #endif
671}
672
673void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
674{
675 #if JUCE_USE_VDSP_FRAMEWORK
676 vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
677 #else
678 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
679 #endif
680}
681
682void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
683{
684 #if JUCE_USE_VDSP_FRAMEWORK
685 vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
686 #else
687 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
688 JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
689 const Mode::ParallelType mult = Mode::load1 (multiplier);)
690 #endif
691}
692
693void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
694{
695 #if JUCE_USE_VDSP_FRAMEWORK
696 vDSP_vsmaD (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
697 #else
698 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
699 JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
700 const Mode::ParallelType mult = Mode::load1 (multiplier);)
701 #endif
702}
703
704void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
705{
706 #if JUCE_USE_VDSP_FRAMEWORK
707 vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
708 #else
709 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
710 JUCE_LOAD_SRC1_SRC2_DEST,
711 JUCE_INCREMENT_SRC1_SRC2_DEST, )
712 #endif
713}
714
715void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
716{
717 #if JUCE_USE_VDSP_FRAMEWORK
718 vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
719 #else
720 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
721 JUCE_LOAD_SRC1_SRC2_DEST,
722 JUCE_INCREMENT_SRC1_SRC2_DEST, )
723 #endif
724}
725
726void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
727{
728 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
729 JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
730 const Mode::ParallelType mult = Mode::load1 (multiplier);)
731}
732
733void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
734{
735 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
736 JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
737 const Mode::ParallelType mult = Mode::load1 (multiplier);)
738}
739
740void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
741{
742 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
743 JUCE_LOAD_SRC1_SRC2_DEST,
744 JUCE_INCREMENT_SRC1_SRC2_DEST, )
745}
746
747void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
748{
749 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
750 JUCE_LOAD_SRC1_SRC2_DEST,
751 JUCE_INCREMENT_SRC1_SRC2_DEST, )
752}
753
754void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
755{
756 #if JUCE_USE_VDSP_FRAMEWORK
757 vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
758 #else
759 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
760 #endif
761}
762
763void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
764{
765 #if JUCE_USE_VDSP_FRAMEWORK
766 vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
767 #else
768 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
769 #endif
770}
771
772void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
773{
774 #if JUCE_USE_VDSP_FRAMEWORK
775 vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
776 #else
777 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
778 #endif
779}
780
781void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
782{
783 #if JUCE_USE_VDSP_FRAMEWORK
784 vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
785 #else
786 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
787 #endif
788}
789
790void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
791{
792 #if JUCE_USE_VDSP_FRAMEWORK
793 vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
794 #else
795 JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
796 const Mode::ParallelType mult = Mode::load1 (multiplier);)
797 #endif
798}
799
800void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
801{
802 #if JUCE_USE_VDSP_FRAMEWORK
803 vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
804 #else
805 JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
806 const Mode::ParallelType mult = Mode::load1 (multiplier);)
807 #endif
808}
809
810void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
811{
812 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
813 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
814 const Mode::ParallelType mult = Mode::load1 (multiplier);)
815}
816
817void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
818{
819 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
820 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
821 const Mode::ParallelType mult = Mode::load1 (multiplier);)
822}
823
824void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
825{
826 #if JUCE_USE_VDSP_FRAMEWORK
827 vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
828 #else
829 copyWithMultiply (dest, src, -1.0f, num);
830 #endif
831}
832
833void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
834{
835 #if JUCE_USE_VDSP_FRAMEWORK
836 vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
837 #else
838 copyWithMultiply (dest, src, -1.0f, num);
839 #endif
840}
841
842void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
843{
844 #if JUCE_USE_VDSP_FRAMEWORK
845 vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
846 #else
847 FloatVectorHelpers::signMask32 signMask;
848 signMask.i = 0x7fffffffUL;
849 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]), Mode::bit_and (s, mask),
850 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
851 const Mode::ParallelType mask = Mode::load1 (signMask.f);)
852
853 ignoreUnused (signMask);
854 #endif
855}
856
857void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
858{
859 #if JUCE_USE_VDSP_FRAMEWORK
860 vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
861 #else
862 FloatVectorHelpers::signMask64 signMask;
863 signMask.i = 0x7fffffffffffffffULL;
864
865 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]), Mode::bit_and (s, mask),
866 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
867 const Mode::ParallelType mask = Mode::load1 (signMask.d);)
868
869 ignoreUnused (signMask);
870 #endif
871}
872
873void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
874{
875 #if JUCE_USE_ARM_NEON
876 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
878 JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
879 #else
880 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
881 Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast<const __m128i*> (src)))),
882 JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
883 const Mode::ParallelType mult = Mode::load1 (multiplier);)
884 #endif
885}
886
887void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
888{
889 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
890 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
891 const Mode::ParallelType cmp = Mode::load1 (comp);)
892}
893
894void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
895{
896 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
897 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
898 const Mode::ParallelType cmp = Mode::load1 (comp);)
899}
900
901void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
902{
903 #if JUCE_USE_VDSP_FRAMEWORK
904 vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
905 #else
906 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
907 #endif
908}
909
910void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
911{
912 #if JUCE_USE_VDSP_FRAMEWORK
913 vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
914 #else
915 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
916 #endif
917}
918
919void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
920{
921 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
922 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
923 const Mode::ParallelType cmp = Mode::load1 (comp);)
924}
925
926void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
927{
928 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
929 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
930 const Mode::ParallelType cmp = Mode::load1 (comp);)
931}
932
933void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
934{
935 #if JUCE_USE_VDSP_FRAMEWORK
936 vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
937 #else
938 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
939 #endif
940}
941
942void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
943{
944 #if JUCE_USE_VDSP_FRAMEWORK
945 vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
946 #else
947 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
948 #endif
949}
950
951void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
952{
953 jassert(high >= low);
954
955 #if JUCE_USE_VDSP_FRAMEWORK
956 vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
957 #else
958 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
959 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
960 const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
961 #endif
962}
963
964void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
965{
966 jassert(high >= low);
967
968 #if JUCE_USE_VDSP_FRAMEWORK
969 vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
970 #else
971 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
972 JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
973 const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
974 #endif
975}
976
977Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
978{
979 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
980 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
981 #else
983 #endif
984}
985
986Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
987{
988 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
989 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
990 #else
992 #endif
993}
994
995float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
996{
997 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
998 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
999 #else
1000 return juce::findMinimum (src, num);
1001 #endif
1002}
1003
1004double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
1005{
1006 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1007 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
1008 #else
1009 return juce::findMinimum (src, num);
1010 #endif
1011}
1012
1013float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
1014{
1015 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1016 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
1017 #else
1018 return juce::findMaximum (src, num);
1019 #endif
1020}
1021
1022double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
1023{
1024 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1025 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
1026 #else
1027 return juce::findMaximum (src, num);
1028 #endif
1029}
1030
1031intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
1032{
1033 intptr_t fpsr = 0;
1034 #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
1035 fpsr = static_cast<intptr_t> (_mm_getcsr());
1036 #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
1037 #if defined (__arm64__) || defined (__aarch64__)
1038 asm volatile("mrs %0, fpcr" : "=r" (fpsr));
1039 #elif JUCE_USE_ARM_NEON
1040 asm volatile("vmrs %0, fpscr" : "=r" (fpsr));
1041 #endif
1042 #else
1043 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1044 jassertfalse; // No support for getting the floating point status register for your platform
1045 #endif
1046 #endif
1047
1048 return fpsr;
1049}
1050
1051void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister (intptr_t fpsr) noexcept
1052{
1053 #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
1054 auto fpsr_w = static_cast<uint32_t> (fpsr);
1055 _mm_setcsr (fpsr_w);
1056 #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
1057 #if defined (__arm64__) || defined (__aarch64__)
1058 asm volatile("msr fpcr, %0" : : "ri" (fpsr));
1059 #elif JUCE_USE_ARM_NEON
1060 asm volatile("vmsr fpscr, %0" : : "ri" (fpsr));
1061 #endif
1062 #else
1063 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1064 jassertfalse; // No support for getting the floating point status register for your platform
1065 #endif
1066 ignoreUnused (fpsr);
1067 #endif
1068}
1069
1071{
1072 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1073 #if JUCE_USE_SSE_INTRINSICS
1075 #else /*JUCE_USE_ARM_NEON*/
1076 intptr_t mask = (1 << 24 /* FZ */);
1077 #endif
1078 setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0));
1079 #else
1080 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1081 jassertfalse; // No support for flush to zero mode on your platform
1082 #endif
1083 ignoreUnused (shouldEnable);
1084 #endif
1085}
1086
1088{
1089 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1090 #if JUCE_USE_SSE_INTRINSICS
1091 intptr_t mask = 0x8040;
1092 #else /*JUCE_USE_ARM_NEON*/
1093 intptr_t mask = (1 << 24 /* FZ */);
1094 #endif
1095
1096 setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldDisable ? mask : 0));
1097 #else
1098 ignoreUnused (shouldDisable);
1099
1100 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1101 jassertfalse; // No support for disable denormals mode on your platform
1102 #endif
1103 #endif
1104}
1105
1107{
1108 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1109 #if JUCE_USE_SSE_INTRINSICS
1110 intptr_t mask = 0x8040;
1111 #else /*JUCE_USE_ARM_NEON*/
1112 intptr_t mask = (1 << 24 /* FZ */);
1113 #endif
1114
1115 return ((getFpStatusRegister() & mask) == mask);
1116 #else
1117 return false;
1118 #endif
1119}
1120
1121ScopedNoDenormals::ScopedNoDenormals() noexcept
1122{
1123 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1124 #if JUCE_USE_SSE_INTRINSICS
1125 intptr_t mask = 0x8040;
1126 #else /*JUCE_USE_ARM_NEON*/
1127 intptr_t mask = (1 << 24 /* FZ */);
1128 #endif
1129
1130 fpsr = FloatVectorOperations::getFpStatusRegister();
1131 FloatVectorOperations::setFpStatusRegister (fpsr | mask);
1132 #endif
1133}
1134
1135ScopedNoDenormals::~ScopedNoDenormals() noexcept
1136{
1137 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1138 FloatVectorOperations::setFpStatusRegister (fpsr);
1139 #endif
1140}
1141
1142
1143//==============================================================================
1144//==============================================================================
1145#if JUCE_UNIT_TESTS
1146
1147class FloatVectorOperationsTests : public UnitTest
1148{
1149public:
1151 : UnitTest ("FloatVectorOperations", UnitTestCategories::audio)
1152 {}
1153
1154 template <typename ValueType>
1155 struct TestRunner
1156 {
1157 static void runTest (UnitTest& u, Random random)
1158 {
1159 const int range = random.nextBool() ? 500 : 10;
1160 const int num = random.nextInt (range) + 1;
1161
1162 HeapBlock<ValueType> buffer1 (num + 16), buffer2 (num + 16);
1163 HeapBlock<int> buffer3 (num + 16);
1164
1165 #if JUCE_ARM
1166 ValueType* const data1 = buffer1;
1167 ValueType* const data2 = buffer2;
1168 int* const int1 = buffer3;
1169 #else
1170 // These tests deliberately operate on misaligned memory and will be flagged up by
1171 // checks for undefined behavior!
1172 ValueType* const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16));
1173 ValueType* const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16));
1174 int* const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16));
1175 #endif
1176
1177 fillRandomly (random, data1, num);
1178 fillRandomly (random, data2, num);
1179
1180 Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
1181 Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
1182 u.expect (minMax1 == minMax2);
1183
1184 u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
1185 u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
1186
1187 u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
1188 u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
1189
1190 FloatVectorOperations::clear (data1, num);
1191 u.expect (areAllValuesEqual (data1, num, 0));
1192
1193 FloatVectorOperations::fill (data1, (ValueType) 2, num);
1194 u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
1195
1196 FloatVectorOperations::add (data1, (ValueType) 2, num);
1197 u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
1198
1199 FloatVectorOperations::copy (data2, data1, num);
1200 u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
1201
1202 FloatVectorOperations::add (data2, data1, num);
1203 u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
1204
1205 FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
1206 u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
1207
1208 FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
1209 u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
1210
1211 FloatVectorOperations::multiply (data1, (ValueType) 2, num);
1212 u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
1213
1214 FloatVectorOperations::multiply (data1, data2, num);
1215 u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
1216
1217 FloatVectorOperations::negate (data2, data1, num);
1218 u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
1219
1220 FloatVectorOperations::subtract (data1, data2, num);
1221 u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
1222
1223 FloatVectorOperations::abs (data1, data2, num);
1224 u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
1225
1226 FloatVectorOperations::abs (data2, data1, num);
1227 u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
1228
1229 fillRandomly (random, int1, num);
1230 doConversionTest (u, data1, data2, int1, num);
1231
1232 FloatVectorOperations::fill (data1, (ValueType) 2, num);
1233 FloatVectorOperations::fill (data2, (ValueType) 3, num);
1234 FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
1235 u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
1236 }
1237
1238 static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
1239 {
1240 FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
1241 convertFixed (data2, int1, 2.0f, num);
1242 u.expect (buffersMatch (data1, data2, num));
1243 }
1244
1245 static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
1246
1247 static void fillRandomly (Random& random, ValueType* d, int num)
1248 {
1249 while (--num >= 0)
1250 *d++ = (ValueType) (random.nextDouble() * 1000.0);
1251 }
1252
1253 static void fillRandomly (Random& random, int* d, int num)
1254 {
1255 while (--num >= 0)
1256 *d++ = random.nextInt();
1257 }
1258
1259 static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
1260 {
1261 while (--num >= 0)
1262 *d++ = (float) *s++ * multiplier;
1263 }
1264
1265 static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
1266 {
1267 while (--num >= 0)
1268 if (*d++ != target)
1269 return false;
1270
1271 return true;
1272 }
1273
1274 static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
1275 {
1276 while (--num >= 0)
1277 if (! valuesMatch (*d1++, *d2++))
1278 return false;
1279
1280 return true;
1281 }
1282
1283 static bool valuesMatch (ValueType v1, ValueType v2)
1284 {
1285 return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
1286 }
1287 };
1288
1289 void runTest() override
1290 {
1291 beginTest ("FloatVectorOperations");
1292
1293 for (int i = 1000; --i >= 0;)
1294 {
1295 TestRunner<float>::runTest (*this, getRandom());
1296 TestRunner<double>::runTest (*this, getRandom());
1297 }
1298 }
1299};
1300
1301static FloatVectorOperationsTests vectorOpTests;
1302
1303#endif
1304
1305} // namespace juce
Array()=default
static void JUCE_CALLTYPE convertFixedToFloat(float *dest, const int *src, float multiplier, int numValues) noexcept
static void JUCE_CALLTYPE disableDenormalisedNumberSupport(bool shouldDisable=true) noexcept
static void JUCE_CALLTYPE max(float *dest, const float *src, float comp, int num) noexcept
static void JUCE_CALLTYPE negate(float *dest, const float *src, int numValues) noexcept
static float JUCE_CALLTYPE findMaximum(const float *src, int numValues) noexcept
static void JUCE_CALLTYPE fill(float *dest, float valueToFill, int numValues) noexcept
static void JUCE_CALLTYPE multiply(float *dest, const float *src, int numValues) noexcept
static void JUCE_CALLTYPE enableFlushToZeroMode(bool shouldEnable) noexcept
static void JUCE_CALLTYPE clear(float *dest, int numValues) noexcept
static void JUCE_CALLTYPE subtract(float *dest, const float *src, int numValues) noexcept
static void JUCE_CALLTYPE clip(float *dest, const float *src, float low, float high, int num) noexcept
static void JUCE_CALLTYPE copyWithMultiply(float *dest, const float *src, float multiplier, int numValues) noexcept
static void JUCE_CALLTYPE copy(float *dest, const float *src, int numValues) noexcept
static void JUCE_CALLTYPE subtractWithMultiply(float *dest, const float *src, float multiplier, int numValues) noexcept
static void JUCE_CALLTYPE addWithMultiply(float *dest, const float *src, float multiplier, int numValues) noexcept
static void JUCE_CALLTYPE min(float *dest, const float *src, float comp, int num) noexcept
static float JUCE_CALLTYPE findMinimum(const float *src, int numValues) noexcept
static Range< float > JUCE_CALLTYPE findMinAndMax(const float *src, int numValues) noexcept
static void JUCE_CALLTYPE abs(float *dest, const float *src, int numValues) noexcept
static void JUCE_CALLTYPE add(float *dest, float amountToAdd, int numValues) noexcept
static bool JUCE_CALLTYPE areDenormalsDisabled() noexcept
static Range findMinAndMax(const ValueType *values, int numValues) noexcept
Definition juce_Range.h:273