Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
58 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
59 #define INCLUDED_volk_32fc_magnitude_32f_u_H
60 
61 #include <inttypes.h>
62 #include <math.h>
63 #include <stdio.h>
64 
65 #ifdef LV_HAVE_AVX
66 #include <immintrin.h>
68 
69 static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector,
70  const lv_32fc_t* complexVector,
71  unsigned int num_points)
72 {
73  unsigned int number = 0;
74  const unsigned int eighthPoints = num_points / 8;
75 
76  const float* complexVectorPtr = (float*)complexVector;
77  float* magnitudeVectorPtr = magnitudeVector;
78 
79  __m256 cplxValue1, cplxValue2, result;
80 
81  for (; number < eighthPoints; number++) {
82  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83  cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
84  result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
85  _mm256_storeu_ps(magnitudeVectorPtr, result);
86 
87  complexVectorPtr += 16;
88  magnitudeVectorPtr += 8;
89  }
90 
91  number = eighthPoints * 8;
92  for (; number < num_points; number++) {
93  float val1Real = *complexVectorPtr++;
94  float val1Imag = *complexVectorPtr++;
95  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
96  }
97 }
98 #endif /* LV_HAVE_AVX */
99 
100 #ifdef LV_HAVE_SSE3
101 #include <pmmintrin.h>
103 
104 static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector,
105  const lv_32fc_t* complexVector,
106  unsigned int num_points)
107 {
108  unsigned int number = 0;
109  const unsigned int quarterPoints = num_points / 4;
110 
111  const float* complexVectorPtr = (float*)complexVector;
112  float* magnitudeVectorPtr = magnitudeVector;
113 
114  __m128 cplxValue1, cplxValue2, result;
115  for (; number < quarterPoints; number++) {
116  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
117  complexVectorPtr += 4;
118 
119  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
120  complexVectorPtr += 4;
121 
122  result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
123 
124  _mm_storeu_ps(magnitudeVectorPtr, result);
125  magnitudeVectorPtr += 4;
126  }
127 
128  number = quarterPoints * 4;
129  for (; number < num_points; number++) {
130  float val1Real = *complexVectorPtr++;
131  float val1Imag = *complexVectorPtr++;
132  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
133  }
134 }
135 #endif /* LV_HAVE_SSE3 */
136 
137 
138 #ifdef LV_HAVE_SSE
140 #include <xmmintrin.h>
141 
142 static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector,
143  const lv_32fc_t* complexVector,
144  unsigned int num_points)
145 {
146  unsigned int number = 0;
147  const unsigned int quarterPoints = num_points / 4;
148 
149  const float* complexVectorPtr = (float*)complexVector;
150  float* magnitudeVectorPtr = magnitudeVector;
151 
152  __m128 cplxValue1, cplxValue2, result;
153 
154  for (; number < quarterPoints; number++) {
155  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
156  complexVectorPtr += 4;
157 
158  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
159  complexVectorPtr += 4;
160 
161  result = _mm_magnitude_ps(cplxValue1, cplxValue2);
162  _mm_storeu_ps(magnitudeVectorPtr, result);
163  magnitudeVectorPtr += 4;
164  }
165 
166  number = quarterPoints * 4;
167  for (; number < num_points; number++) {
168  float val1Real = *complexVectorPtr++;
169  float val1Imag = *complexVectorPtr++;
170  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
171  }
172 }
173 #endif /* LV_HAVE_SSE */
174 
175 
176 #ifdef LV_HAVE_GENERIC
177 
178 static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector,
179  const lv_32fc_t* complexVector,
180  unsigned int num_points)
181 {
182  const float* complexVectorPtr = (float*)complexVector;
183  float* magnitudeVectorPtr = magnitudeVector;
184  unsigned int number = 0;
185  for (number = 0; number < num_points; number++) {
186  const float real = *complexVectorPtr++;
187  const float imag = *complexVectorPtr++;
188  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
189  }
190 }
191 #endif /* LV_HAVE_GENERIC */
192 
193 
194 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
195 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
196 #define INCLUDED_volk_32fc_magnitude_32f_a_H
197 
198 #include <inttypes.h>
199 #include <math.h>
200 #include <stdio.h>
201 
202 #ifdef LV_HAVE_AVX
203 #include <immintrin.h>
205 
206 static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector,
207  const lv_32fc_t* complexVector,
208  unsigned int num_points)
209 {
210  unsigned int number = 0;
211  const unsigned int eighthPoints = num_points / 8;
212 
213  const float* complexVectorPtr = (float*)complexVector;
214  float* magnitudeVectorPtr = magnitudeVector;
215 
216  __m256 cplxValue1, cplxValue2, result;
217  for (; number < eighthPoints; number++) {
218  cplxValue1 = _mm256_load_ps(complexVectorPtr);
219  complexVectorPtr += 8;
220 
221  cplxValue2 = _mm256_load_ps(complexVectorPtr);
222  complexVectorPtr += 8;
223 
224  result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
225  _mm256_store_ps(magnitudeVectorPtr, result);
226  magnitudeVectorPtr += 8;
227  }
228 
229  number = eighthPoints * 8;
230  for (; number < num_points; number++) {
231  float val1Real = *complexVectorPtr++;
232  float val1Imag = *complexVectorPtr++;
233  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
234  }
235 }
236 #endif /* LV_HAVE_AVX */
237 
238 #ifdef LV_HAVE_SSE3
239 #include <pmmintrin.h>
241 
242 static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector,
243  const lv_32fc_t* complexVector,
244  unsigned int num_points)
245 {
246  unsigned int number = 0;
247  const unsigned int quarterPoints = num_points / 4;
248 
249  const float* complexVectorPtr = (float*)complexVector;
250  float* magnitudeVectorPtr = magnitudeVector;
251 
252  __m128 cplxValue1, cplxValue2, result;
253  for (; number < quarterPoints; number++) {
254  cplxValue1 = _mm_load_ps(complexVectorPtr);
255  complexVectorPtr += 4;
256 
257  cplxValue2 = _mm_load_ps(complexVectorPtr);
258  complexVectorPtr += 4;
259 
260  result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
261  _mm_store_ps(magnitudeVectorPtr, result);
262  magnitudeVectorPtr += 4;
263  }
264 
265  number = quarterPoints * 4;
266  for (; number < num_points; number++) {
267  float val1Real = *complexVectorPtr++;
268  float val1Imag = *complexVectorPtr++;
269  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
270  }
271 }
272 #endif /* LV_HAVE_SSE3 */
273 
274 #ifdef LV_HAVE_SSE
276 #include <xmmintrin.h>
277 
278 static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
279  const lv_32fc_t* complexVector,
280  unsigned int num_points)
281 {
282  unsigned int number = 0;
283  const unsigned int quarterPoints = num_points / 4;
284 
285  const float* complexVectorPtr = (float*)complexVector;
286  float* magnitudeVectorPtr = magnitudeVector;
287 
288  __m128 cplxValue1, cplxValue2, result;
289  for (; number < quarterPoints; number++) {
290  cplxValue1 = _mm_load_ps(complexVectorPtr);
291  complexVectorPtr += 4;
292 
293  cplxValue2 = _mm_load_ps(complexVectorPtr);
294  complexVectorPtr += 4;
295 
296  result = _mm_magnitude_ps(cplxValue1, cplxValue2);
297  _mm_store_ps(magnitudeVectorPtr, result);
298  magnitudeVectorPtr += 4;
299  }
300 
301  number = quarterPoints * 4;
302  for (; number < num_points; number++) {
303  float val1Real = *complexVectorPtr++;
304  float val1Imag = *complexVectorPtr++;
305  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
306  }
307 }
308 #endif /* LV_HAVE_SSE */
309 
310 
311 #ifdef LV_HAVE_GENERIC
312 
313 static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
314  const lv_32fc_t* complexVector,
315  unsigned int num_points)
316 {
317  const float* complexVectorPtr = (float*)complexVector;
318  float* magnitudeVectorPtr = magnitudeVector;
319  unsigned int number = 0;
320  for (number = 0; number < num_points; number++) {
321  const float real = *complexVectorPtr++;
322  const float imag = *complexVectorPtr++;
323  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
324  }
325 }
326 #endif /* LV_HAVE_GENERIC */
327 
328 
329 #ifdef LV_HAVE_NEON
330 #include <arm_neon.h>
331 
332 static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector,
333  const lv_32fc_t* complexVector,
334  unsigned int num_points)
335 {
336  unsigned int number;
337  unsigned int quarter_points = num_points / 4;
338  const float* complexVectorPtr = (float*)complexVector;
339  float* magnitudeVectorPtr = magnitudeVector;
340 
341  float32x4x2_t complex_vec;
342  float32x4_t magnitude_vec;
343  for (number = 0; number < quarter_points; number++) {
344  complex_vec = vld2q_f32(complexVectorPtr);
345  complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
346  magnitude_vec =
347  vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
348  magnitude_vec = vrsqrteq_f32(magnitude_vec);
349  magnitude_vec = vrecpeq_f32(magnitude_vec); // no plain ol' sqrt
350  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
351 
352  complexVectorPtr += 8;
353  magnitudeVectorPtr += 4;
354  }
355 
356  for (number = quarter_points * 4; number < num_points; number++) {
357  const float real = *complexVectorPtr++;
358  const float imag = *complexVectorPtr++;
359  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
360  }
361 }
362 #endif /* LV_HAVE_NEON */
363 
364 
365 #ifdef LV_HAVE_NEON
383  float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
384 {
385  unsigned int number;
386  unsigned int quarter_points = num_points / 4;
387  const float* complexVectorPtr = (float*)complexVector;
388  float* magnitudeVectorPtr = magnitudeVector;
389 
390  const float threshold = 0.4142135;
391 
392  float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
393  a_high = vdupq_n_f32(0.84);
394  b_high = vdupq_n_f32(0.561);
395  a_low = vdupq_n_f32(0.99);
396  b_low = vdupq_n_f32(0.197);
397 
398  uint32x4_t comp0, comp1;
399 
400  float32x4x2_t complex_vec;
401  float32x4_t min_vec, max_vec, magnitude_vec;
402  float32x4_t real_abs, imag_abs;
403  for (number = 0; number < quarter_points; number++) {
404  complex_vec = vld2q_f32(complexVectorPtr);
405 
406  real_abs = vabsq_f32(complex_vec.val[0]);
407  imag_abs = vabsq_f32(complex_vec.val[1]);
408 
409  min_vec = vminq_f32(real_abs, imag_abs);
410  max_vec = vmaxq_f32(real_abs, imag_abs);
411 
412  // effective branch to choose coefficient pair.
413  comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
414  comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
415 
416  // and 0s or 1s with coefficients from previous effective branch
417  a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
418  vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
419  b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
420  vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
421 
422  // coefficients chosen, do the weighted sum
423  min_vec = vmulq_f32(min_vec, b_vec);
424  max_vec = vmulq_f32(max_vec, a_vec);
425 
426  magnitude_vec = vaddq_f32(min_vec, max_vec);
427  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
428 
429  complexVectorPtr += 8;
430  magnitudeVectorPtr += 4;
431  }
432 
433  for (number = quarter_points * 4; number < num_points; number++) {
434  const float real = *complexVectorPtr++;
435  const float imag = *complexVectorPtr++;
436  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
437  }
438 }
439 #endif /* LV_HAVE_NEON */
440 
441 
442 #ifdef LV_HAVE_ORC
443 
444 extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector,
445  const lv_32fc_t* complexVector,
446  unsigned int num_points);
447 
448 static inline void volk_32fc_magnitude_32f_u_orc(float* magnitudeVector,
449  const lv_32fc_t* complexVector,
450  unsigned int num_points)
451 {
452  volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
453 }
454 #endif /* LV_HAVE_ORC */
455 
456 
457 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2787
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_magnitude_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:313
static void volk_32fc_magnitude_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:142
static void volk_32fc_magnitude_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:104
static void volk_32fc_magnitude_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:69
static void volk_32fc_magnitude_32f_neon_fancy_sweet(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Calculates the magnitude of the complexVector and stores the results in the magnitudeVector.
Definition: volk_32fc_magnitude_32f.h:382
static void volk_32fc_magnitude_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:178
static void volk_32fc_magnitude_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:206
static void volk_32fc_magnitude_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:332
static void volk_32fc_magnitude_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:242
static void volk_32fc_magnitude_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:278
static __m256 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:70
float complex lv_32fc_t
Definition: volk_complex.h:74
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:45
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:31