42 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
43 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
51 #include <immintrin.h>
53 static inline void volk_16ic_s32f_magnitude_32f_a_avx2(
float* magnitudeVector,
56 unsigned int num_points)
58 unsigned int number = 0;
59 const unsigned int eighthPoints = num_points / 8;
61 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
62 float* magnitudeVectorPtr = magnitudeVector;
64 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
66 __m256 cplxValue1, cplxValue2, result;
69 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
71 for (; number < eighthPoints; number++) {
73 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
74 complexVectorPtr += 16;
75 short1 = _mm256_extracti128_si256(int1, 0);
76 short2 = _mm256_extracti128_si256(int1, 1);
78 int1 = _mm256_cvtepi16_epi32(short1);
79 int2 = _mm256_cvtepi16_epi32(short2);
80 cplxValue1 = _mm256_cvtepi32_ps(int1);
81 cplxValue2 = _mm256_cvtepi32_ps(int2);
83 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
84 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
86 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
87 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
89 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
90 result = _mm256_permutevar8x32_ps(result, idx);
92 result = _mm256_sqrt_ps(result);
94 _mm256_store_ps(magnitudeVectorPtr, result);
96 magnitudeVectorPtr += 8;
99 number = eighthPoints * 8;
100 magnitudeVectorPtr = &magnitudeVector[number];
101 complexVectorPtr = (
const int16_t*)&complexVector[number];
102 for (; number < num_points; number++) {
103 float val1Real = (float)(*complexVectorPtr++) / scalar;
104 float val1Imag = (float)(*complexVectorPtr++) / scalar;
105 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
112 #include <pmmintrin.h>
117 unsigned int num_points)
119 unsigned int number = 0;
120 const unsigned int quarterPoints = num_points / 4;
122 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
123 float* magnitudeVectorPtr = magnitudeVector;
127 __m128 cplxValue1, cplxValue2, result;
131 for (; number < quarterPoints; number++) {
133 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
134 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
135 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
136 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
138 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
139 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
140 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
141 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
146 complexVectorPtr += 8;
148 cplxValue1 =
_mm_mul_ps(cplxValue1, invScalar);
149 cplxValue2 =
_mm_mul_ps(cplxValue2, invScalar);
151 cplxValue1 =
_mm_mul_ps(cplxValue1, cplxValue1);
152 cplxValue2 =
_mm_mul_ps(cplxValue2, cplxValue2);
160 magnitudeVectorPtr += 4;
163 number = quarterPoints * 4;
164 magnitudeVectorPtr = &magnitudeVector[number];
165 complexVectorPtr = (
const int16_t*)&complexVector[number];
166 for (; number < num_points; number++) {
167 float val1Real = (float)(*complexVectorPtr++) / scalar;
168 float val1Imag = (float)(*complexVectorPtr++) / scalar;
169 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
175 #include <xmmintrin.h>
180 unsigned int num_points)
182 unsigned int number = 0;
183 const unsigned int quarterPoints = num_points / 4;
185 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
186 float* magnitudeVectorPtr = magnitudeVector;
188 const float iScalar = 1.0 / scalar;
191 __m128 cplxValue1, cplxValue2, result, re, im;
195 for (; number < quarterPoints; number++) {
196 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
197 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
198 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
199 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
201 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
202 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
203 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
204 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
212 complexVectorPtr += 8;
217 cplxValue1 =
_mm_mul_ps(cplxValue1, cplxValue1);
218 cplxValue2 =
_mm_mul_ps(cplxValue2, cplxValue2);
226 magnitudeVectorPtr += 4;
229 number = quarterPoints * 4;
230 magnitudeVectorPtr = &magnitudeVector[number];
231 complexVectorPtr = (
const int16_t*)&complexVector[number];
232 for (; number < num_points; number++) {
233 float val1Real = (float)(*complexVectorPtr++) * iScalar;
234 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
235 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
242 #ifdef LV_HAVE_GENERIC
247 unsigned int num_points)
249 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
250 float* magnitudeVectorPtr = magnitudeVector;
251 unsigned int number = 0;
252 const float invScalar = 1.0 / scalar;
253 for (number = 0; number < num_points; number++) {
254 float real = ((float)(*complexVectorPtr++)) * invScalar;
255 float imag = ((float)(*complexVectorPtr++)) * invScalar;
256 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
261 #ifdef LV_HAVE_ORC_DISABLED
263 extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(
float* magnitudeVector,
266 unsigned int num_points);
268 static inline void volk_16ic_s32f_magnitude_32f_u_orc(
float* magnitudeVector,
271 unsigned int num_points)
273 volk_16ic_s32f_magnitude_32f_a_orc_impl(
274 magnitudeVector, complexVector, scalar, num_points);
281 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
282 #define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
284 #include <inttypes.h>
290 #include <immintrin.h>
292 static inline void volk_16ic_s32f_magnitude_32f_u_avx2(
float* magnitudeVector,
295 unsigned int num_points)
297 unsigned int number = 0;
298 const unsigned int eighthPoints = num_points / 8;
300 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
301 float* magnitudeVectorPtr = magnitudeVector;
303 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
305 __m256 cplxValue1, cplxValue2, result;
308 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
310 for (; number < eighthPoints; number++) {
312 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
313 complexVectorPtr += 16;
314 short1 = _mm256_extracti128_si256(int1, 0);
315 short2 = _mm256_extracti128_si256(int1, 1);
317 int1 = _mm256_cvtepi16_epi32(short1);
318 int2 = _mm256_cvtepi16_epi32(short2);
319 cplxValue1 = _mm256_cvtepi32_ps(int1);
320 cplxValue2 = _mm256_cvtepi32_ps(int2);
322 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
323 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
325 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
326 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
328 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
329 result = _mm256_permutevar8x32_ps(result, idx);
331 result = _mm256_sqrt_ps(result);
333 _mm256_storeu_ps(magnitudeVectorPtr, result);
335 magnitudeVectorPtr += 8;
338 number = eighthPoints * 8;
339 magnitudeVectorPtr = &magnitudeVector[number];
340 complexVectorPtr = (
const int16_t*)&complexVector[number];
341 for (; number < num_points; number++) {
342 float val1Real = (float)(*complexVectorPtr++) / scalar;
343 float val1Imag = (float)(*complexVectorPtr++) / scalar;
344 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
Definition: sse2neon.h:6527
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1039
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
Definition: sse2neon.h:2659
static void volk_16ic_s32f_magnitude_32f_generic(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:244
static void volk_16ic_s32f_magnitude_32f_a_sse(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:177
static void volk_16ic_s32f_magnitude_32f_a_sse3(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:114
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
short complex lv_16sc_t
Definition: volk_complex.h:71