58 #ifndef INCLUDED_volk_32f_x2_min_32f_a_H
59 #define INCLUDED_volk_32f_x2_min_32f_a_H
65 #include <xmmintrin.h>
70 unsigned int num_points)
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
75 float* cPtr = cVector;
76 const float* aPtr = aVector;
77 const float* bPtr = bVector;
80 for (; number < quarterPoints; number++) {
93 number = quarterPoints * 4;
94 for (; number < num_points; number++) {
95 const float a = *aPtr++;
96 const float b = *bPtr++;
97 *cPtr++ = (a < b ? a : b);
104 #include <arm_neon.h>
107 const float* aVector,
108 const float* bVector,
109 unsigned int num_points)
111 float* cPtr = cVector;
112 const float* aPtr = aVector;
113 const float* bPtr = bVector;
114 unsigned int number = 0;
115 unsigned int quarter_points = num_points / 4;
117 float32x4_t a_vec, b_vec, c_vec;
118 for (number = 0; number < quarter_points; number++) {
119 a_vec = vld1q_f32(aPtr);
120 b_vec = vld1q_f32(bPtr);
122 c_vec = vminq_f32(a_vec, b_vec);
124 vst1q_f32(cPtr, c_vec);
130 for (number = quarter_points * 4; number < num_points; number++) {
131 const float a = *aPtr++;
132 const float b = *bPtr++;
133 *cPtr++ = (a < b ? a : b);
139 #ifdef LV_HAVE_GENERIC
142 const float* aVector,
143 const float* bVector,
144 unsigned int num_points)
146 float* cPtr = cVector;
147 const float* aPtr = aVector;
148 const float* bPtr = bVector;
149 unsigned int number = 0;
151 for (number = 0; number < num_points; number++) {
152 const float a = *aPtr++;
153 const float b = *bPtr++;
154 *cPtr++ = (a < b ? a : b);
162 extern void volk_32f_x2_min_32f_a_orc_impl(
float* cVector,
163 const float* aVector,
164 const float* bVector,
165 unsigned int num_points);
167 static inline void volk_32f_x2_min_32f_u_orc(
float* cVector,
168 const float* aVector,
169 const float* bVector,
170 unsigned int num_points)
172 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
177 #include <immintrin.h>
180 const float* aVector,
181 const float* bVector,
182 unsigned int num_points)
184 unsigned int number = 0;
185 const unsigned int eighthPoints = num_points / 8;
187 float* cPtr = cVector;
188 const float* aPtr = aVector;
189 const float* bPtr = bVector;
191 __m256 aVal, bVal, cVal;
192 for (; number < eighthPoints; number++) {
193 aVal = _mm256_load_ps(aPtr);
194 bVal = _mm256_load_ps(bPtr);
196 cVal = _mm256_min_ps(aVal, bVal);
198 _mm256_store_ps(cPtr, cVal);
205 number = eighthPoints * 8;
206 for (; number < num_points; number++) {
207 const float a = *aPtr++;
208 const float b = *bPtr++;
209 *cPtr++ = (a < b ? a : b);
214 #ifdef LV_HAVE_AVX512F
215 #include <immintrin.h>
217 static inline void volk_32f_x2_min_32f_a_avx512f(
float* cVector,
218 const float* aVector,
219 const float* bVector,
220 unsigned int num_points)
222 unsigned int number = 0;
223 const unsigned int sixteenthPoints = num_points / 16;
225 float* cPtr = cVector;
226 const float* aPtr = aVector;
227 const float* bPtr = bVector;
229 __m512 aVal, bVal, cVal;
230 for (; number < sixteenthPoints; number++) {
231 aVal = _mm512_load_ps(aPtr);
232 bVal = _mm512_load_ps(bPtr);
234 cVal = _mm512_min_ps(aVal, bVal);
236 _mm512_store_ps(cPtr, cVal);
243 number = sixteenthPoints * 16;
244 for (; number < num_points; number++) {
245 const float a = *aPtr++;
246 const float b = *bPtr++;
247 *cPtr++ = (a < b ? a : b);
255 #ifndef INCLUDED_volk_32f_x2_min_32f_u_H
256 #define INCLUDED_volk_32f_x2_min_32f_u_H
258 #include <inttypes.h>
261 #ifdef LV_HAVE_AVX512F
262 #include <immintrin.h>
264 static inline void volk_32f_x2_min_32f_u_avx512f(
float* cVector,
265 const float* aVector,
266 const float* bVector,
267 unsigned int num_points)
269 unsigned int number = 0;
270 const unsigned int sixteenthPoints = num_points / 16;
272 float* cPtr = cVector;
273 const float* aPtr = aVector;
274 const float* bPtr = bVector;
276 __m512 aVal, bVal, cVal;
277 for (; number < sixteenthPoints; number++) {
278 aVal = _mm512_loadu_ps(aPtr);
279 bVal = _mm512_loadu_ps(bPtr);
281 cVal = _mm512_min_ps(aVal, bVal);
283 _mm512_storeu_ps(cPtr, cVal);
290 number = sixteenthPoints * 16;
291 for (; number < num_points; number++) {
292 const float a = *aPtr++;
293 const float b = *bPtr++;
294 *cPtr++ = (a < b ? a : b);
300 #include <immintrin.h>
303 const float* aVector,
304 const float* bVector,
305 unsigned int num_points)
307 unsigned int number = 0;
308 const unsigned int eighthPoints = num_points / 8;
310 float* cPtr = cVector;
311 const float* aPtr = aVector;
312 const float* bPtr = bVector;
314 __m256 aVal, bVal, cVal;
315 for (; number < eighthPoints; number++) {
316 aVal = _mm256_loadu_ps(aPtr);
317 bVal = _mm256_loadu_ps(bPtr);
319 cVal = _mm256_min_ps(aVal, bVal);
321 _mm256_storeu_ps(cPtr, cVal);
328 number = eighthPoints * 8;
329 for (; number < num_points; number++) {
330 const float a = *aPtr++;
331 const float b = *bPtr++;
332 *cPtr++ = (a < b ? a : b);
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2080
static void volk_32f_x2_min_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:67
static void volk_32f_x2_min_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:106
static void volk_32f_x2_min_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:141
static void volk_32f_x2_min_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:302
static void volk_32f_x2_min_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:179