60 #ifdef LV_HAVE_GENERIC
66 unsigned int num_points)
68 const float* complexVectorPtr = (
float*)complexVector;
69 int16_t* magnitudeVectorPtr = magnitudeVector;
70 unsigned int number = 0;
71 for (number = 0; number < num_points; number++) {
76 *magnitudeVectorPtr++ = (int16_t)
rintf(scalar * sqrtf(real + imag));
81 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
82 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
90 #include <immintrin.h>
92 static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
95 unsigned int num_points)
97 unsigned int number = 0;
98 const unsigned int eighthPoints = num_points / 8;
100 const float* complexVectorPtr = (
const float*)complexVector;
101 int16_t* magnitudeVectorPtr = magnitudeVector;
103 __m256 vScalar = _mm256_set1_ps(scalar);
104 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
105 __m256 cplxValue1, cplxValue2, result;
109 for (; number < eighthPoints; number++) {
110 cplxValue1 = _mm256_load_ps(complexVectorPtr);
111 complexVectorPtr += 8;
113 cplxValue2 = _mm256_load_ps(complexVectorPtr);
114 complexVectorPtr += 8;
116 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
117 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
119 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
121 result = _mm256_sqrt_ps(result);
123 result = _mm256_mul_ps(result, vScalar);
125 resultInt = _mm256_cvtps_epi32(result);
126 resultInt = _mm256_packs_epi32(resultInt, resultInt);
127 resultInt = _mm256_permutevar8x32_epi32(
129 resultShort = _mm256_extracti128_si256(resultInt, 0);
131 magnitudeVectorPtr += 8;
134 number = eighthPoints * 8;
136 magnitudeVector + number, complexVector + number, scalar, num_points - number);
141 #include <pmmintrin.h>
146 unsigned int num_points)
148 unsigned int number = 0;
149 const unsigned int quarterPoints = num_points / 4;
151 const float* complexVectorPtr = (
const float*)complexVector;
152 int16_t* magnitudeVectorPtr = magnitudeVector;
156 __m128 cplxValue1, cplxValue2, result;
160 for (; number < quarterPoints; number++) {
162 complexVectorPtr += 4;
165 complexVectorPtr += 4;
167 cplxValue1 =
_mm_mul_ps(cplxValue1, cplxValue1);
168 cplxValue2 =
_mm_mul_ps(cplxValue2, cplxValue2);
177 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
178 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
179 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
180 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
183 number = quarterPoints * 4;
185 magnitudeVector + number, complexVector + number, scalar, num_points - number);
191 #include <xmmintrin.h>
196 unsigned int num_points)
198 unsigned int number = 0;
199 const unsigned int quarterPoints = num_points / 4;
201 const float* complexVectorPtr = (
const float*)complexVector;
202 int16_t* magnitudeVectorPtr = magnitudeVector;
206 __m128 cplxValue1, cplxValue2, result;
211 for (; number < quarterPoints; number++) {
213 complexVectorPtr += 4;
216 complexVectorPtr += 4;
235 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
236 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
237 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
238 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
241 number = quarterPoints * 4;
243 magnitudeVector + number, complexVector + number, scalar, num_points - number);
250 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
251 #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
253 #include <inttypes.h>
259 #include <immintrin.h>
261 static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
264 unsigned int num_points)
266 unsigned int number = 0;
267 const unsigned int eighthPoints = num_points / 8;
269 const float* complexVectorPtr = (
const float*)complexVector;
270 int16_t* magnitudeVectorPtr = magnitudeVector;
272 __m256 vScalar = _mm256_set1_ps(scalar);
273 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
274 __m256 cplxValue1, cplxValue2, result;
278 for (; number < eighthPoints; number++) {
279 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
280 complexVectorPtr += 8;
282 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
283 complexVectorPtr += 8;
285 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
286 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
288 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
290 result = _mm256_sqrt_ps(result);
292 result = _mm256_mul_ps(result, vScalar);
294 resultInt = _mm256_cvtps_epi32(result);
295 resultInt = _mm256_packs_epi32(resultInt, resultInt);
296 resultInt = _mm256_permutevar8x32_epi32(
298 resultShort = _mm256_extracti128_si256(resultInt, 0);
300 magnitudeVectorPtr += 8;
303 number = eighthPoints * 8;
305 magnitudeVector + number, complexVector + number, scalar, num_points - number);
static float rintf(float x)
Definition: config.h:45
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
Definition: sse2neon.h:6527
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:6010
FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1039
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
Definition: sse2neon.h:2659
static void volk_32fc_s32f_magnitude_16i_generic(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:63
static void volk_32fc_s32f_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:193
static void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:143
#define __VOLK_VOLATILE
Definition: volk_common.h:73
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
float complex lv_32fc_t
Definition: volk_complex.h:74