58 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
59 #define INCLUDED_volk_32fc_magnitude_32f_u_H
66 #include <immintrin.h>
71 unsigned int num_points)
73 unsigned int number = 0;
74 const unsigned int eighthPoints = num_points / 8;
76 const float* complexVectorPtr = (
float*)complexVector;
77 float* magnitudeVectorPtr = magnitudeVector;
79 __m256 cplxValue1, cplxValue2, result;
81 for (; number < eighthPoints; number++) {
82 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
85 _mm256_storeu_ps(magnitudeVectorPtr, result);
87 complexVectorPtr += 16;
88 magnitudeVectorPtr += 8;
91 number = eighthPoints * 8;
92 for (; number < num_points; number++) {
93 float val1Real = *complexVectorPtr++;
94 float val1Imag = *complexVectorPtr++;
95 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
101 #include <pmmintrin.h>
106 unsigned int num_points)
108 unsigned int number = 0;
109 const unsigned int quarterPoints = num_points / 4;
111 const float* complexVectorPtr = (
float*)complexVector;
112 float* magnitudeVectorPtr = magnitudeVector;
114 __m128 cplxValue1, cplxValue2, result;
115 for (; number < quarterPoints; number++) {
117 complexVectorPtr += 4;
120 complexVectorPtr += 4;
125 magnitudeVectorPtr += 4;
128 number = quarterPoints * 4;
129 for (; number < num_points; number++) {
130 float val1Real = *complexVectorPtr++;
131 float val1Imag = *complexVectorPtr++;
132 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
140 #include <xmmintrin.h>
144 unsigned int num_points)
146 unsigned int number = 0;
147 const unsigned int quarterPoints = num_points / 4;
149 const float* complexVectorPtr = (
float*)complexVector;
150 float* magnitudeVectorPtr = magnitudeVector;
152 __m128 cplxValue1, cplxValue2, result;
154 for (; number < quarterPoints; number++) {
156 complexVectorPtr += 4;
159 complexVectorPtr += 4;
163 magnitudeVectorPtr += 4;
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 float val1Real = *complexVectorPtr++;
169 float val1Imag = *complexVectorPtr++;
170 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
176 #ifdef LV_HAVE_GENERIC
180 unsigned int num_points)
182 const float* complexVectorPtr = (
float*)complexVector;
183 float* magnitudeVectorPtr = magnitudeVector;
184 unsigned int number = 0;
185 for (number = 0; number < num_points; number++) {
186 const float real = *complexVectorPtr++;
187 const float imag = *complexVectorPtr++;
188 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
195 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
196 #define INCLUDED_volk_32fc_magnitude_32f_a_H
198 #include <inttypes.h>
203 #include <immintrin.h>
208 unsigned int num_points)
210 unsigned int number = 0;
211 const unsigned int eighthPoints = num_points / 8;
213 const float* complexVectorPtr = (
float*)complexVector;
214 float* magnitudeVectorPtr = magnitudeVector;
216 __m256 cplxValue1, cplxValue2, result;
217 for (; number < eighthPoints; number++) {
218 cplxValue1 = _mm256_load_ps(complexVectorPtr);
219 complexVectorPtr += 8;
221 cplxValue2 = _mm256_load_ps(complexVectorPtr);
222 complexVectorPtr += 8;
225 _mm256_store_ps(magnitudeVectorPtr, result);
226 magnitudeVectorPtr += 8;
229 number = eighthPoints * 8;
230 for (; number < num_points; number++) {
231 float val1Real = *complexVectorPtr++;
232 float val1Imag = *complexVectorPtr++;
233 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
239 #include <pmmintrin.h>
244 unsigned int num_points)
246 unsigned int number = 0;
247 const unsigned int quarterPoints = num_points / 4;
249 const float* complexVectorPtr = (
float*)complexVector;
250 float* magnitudeVectorPtr = magnitudeVector;
252 __m128 cplxValue1, cplxValue2, result;
253 for (; number < quarterPoints; number++) {
255 complexVectorPtr += 4;
258 complexVectorPtr += 4;
262 magnitudeVectorPtr += 4;
265 number = quarterPoints * 4;
266 for (; number < num_points; number++) {
267 float val1Real = *complexVectorPtr++;
268 float val1Imag = *complexVectorPtr++;
269 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
276 #include <xmmintrin.h>
280 unsigned int num_points)
282 unsigned int number = 0;
283 const unsigned int quarterPoints = num_points / 4;
285 const float* complexVectorPtr = (
float*)complexVector;
286 float* magnitudeVectorPtr = magnitudeVector;
288 __m128 cplxValue1, cplxValue2, result;
289 for (; number < quarterPoints; number++) {
291 complexVectorPtr += 4;
294 complexVectorPtr += 4;
298 magnitudeVectorPtr += 4;
301 number = quarterPoints * 4;
302 for (; number < num_points; number++) {
303 float val1Real = *complexVectorPtr++;
304 float val1Imag = *complexVectorPtr++;
305 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
311 #ifdef LV_HAVE_GENERIC
315 unsigned int num_points)
317 const float* complexVectorPtr = (
float*)complexVector;
318 float* magnitudeVectorPtr = magnitudeVector;
319 unsigned int number = 0;
320 for (number = 0; number < num_points; number++) {
321 const float real = *complexVectorPtr++;
322 const float imag = *complexVectorPtr++;
323 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
330 #include <arm_neon.h>
334 unsigned int num_points)
337 unsigned int quarter_points = num_points / 4;
338 const float* complexVectorPtr = (
float*)complexVector;
339 float* magnitudeVectorPtr = magnitudeVector;
341 float32x4x2_t complex_vec;
342 float32x4_t magnitude_vec;
343 for (number = 0; number < quarter_points; number++) {
344 complex_vec = vld2q_f32(complexVectorPtr);
345 complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
347 vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
348 magnitude_vec = vrsqrteq_f32(magnitude_vec);
349 magnitude_vec = vrecpeq_f32(magnitude_vec);
350 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
352 complexVectorPtr += 8;
353 magnitudeVectorPtr += 4;
356 for (number = quarter_points * 4; number < num_points; number++) {
357 const float real = *complexVectorPtr++;
358 const float imag = *complexVectorPtr++;
359 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
383 float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points)
386 unsigned int quarter_points = num_points / 4;
387 const float* complexVectorPtr = (
float*)complexVector;
388 float* magnitudeVectorPtr = magnitudeVector;
390 const float threshold = 0.4142135;
392 float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
393 a_high = vdupq_n_f32(0.84);
394 b_high = vdupq_n_f32(0.561);
395 a_low = vdupq_n_f32(0.99);
396 b_low = vdupq_n_f32(0.197);
398 uint32x4_t comp0, comp1;
400 float32x4x2_t complex_vec;
401 float32x4_t min_vec, max_vec, magnitude_vec;
402 float32x4_t real_abs, imag_abs;
403 for (number = 0; number < quarter_points; number++) {
404 complex_vec = vld2q_f32(complexVectorPtr);
406 real_abs = vabsq_f32(complex_vec.val[0]);
407 imag_abs = vabsq_f32(complex_vec.val[1]);
409 min_vec = vminq_f32(real_abs, imag_abs);
410 max_vec = vmaxq_f32(real_abs, imag_abs);
413 comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
414 comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
417 a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
418 vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
419 b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
420 vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
423 min_vec = vmulq_f32(min_vec, b_vec);
424 max_vec = vmulq_f32(max_vec, a_vec);
426 magnitude_vec = vaddq_f32(min_vec, max_vec);
427 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
429 complexVectorPtr += 8;
430 magnitudeVectorPtr += 4;
433 for (number = quarter_points * 4; number < num_points; number++) {
434 const float real = *complexVectorPtr++;
435 const float imag = *complexVectorPtr++;
436 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
444 extern void volk_32fc_magnitude_32f_a_orc_impl(
float* magnitudeVector,
446 unsigned int num_points);
448 static inline void volk_32fc_magnitude_32f_u_orc(
float* magnitudeVector,
450 unsigned int num_points)
452 volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2787
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_magnitude_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:313
static void volk_32fc_magnitude_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:142
static void volk_32fc_magnitude_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:104
static void volk_32fc_magnitude_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:69
static void volk_32fc_magnitude_32f_neon_fancy_sweet(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Calculates the magnitude of the complexVector and stores the results in the magnitudeVector.
Definition: volk_32fc_magnitude_32f.h:382
static void volk_32fc_magnitude_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:178
static void volk_32fc_magnitude_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:206
static void volk_32fc_magnitude_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:332
static void volk_32fc_magnitude_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:242
static void volk_32fc_magnitude_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:278
static __m256 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:70
float complex lv_32fc_t
Definition: volk_complex.h:74
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:45
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:31