61 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
62 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
69 #include <smmintrin.h>
71 #ifdef LV_HAVE_LIB_SIMDMATH
75 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(
float* outputVector,
77 const float normalizeFactor,
78 unsigned int num_points)
80 const float* complexVectorPtr = (
float*)complexVector;
81 float* outPtr = outputVector;
83 unsigned int number = 0;
84 const float invNormalizeFactor = 1.0 / normalizeFactor;
86 #ifdef LV_HAVE_LIB_SIMDMATH
87 const unsigned int quarterPoints = num_points / 4;
92 __m128 complex1, complex2, iValue, qValue;
95 for (; number < quarterPoints; number++) {
98 complexVectorPtr += 4;
100 complexVectorPtr += 4;
105 phase = atan2f4(qValue, iValue);
115 number = quarterPoints * 4;
118 for (; number < num_points; number++) {
119 const float real = *complexVectorPtr++;
120 const float imag = *complexVectorPtr++;
121 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
128 #include <xmmintrin.h>
130 #ifdef LV_HAVE_LIB_SIMDMATH
131 #include <simdmath.h>
136 const float normalizeFactor,
137 unsigned int num_points)
139 const float* complexVectorPtr = (
float*)complexVector;
140 float* outPtr = outputVector;
142 unsigned int number = 0;
143 const float invNormalizeFactor = 1.0 / normalizeFactor;
145 #ifdef LV_HAVE_LIB_SIMDMATH
146 const unsigned int quarterPoints = num_points / 4;
151 __m128 complex1, complex2, iValue, qValue;
155 for (; number < quarterPoints; number++) {
158 complexVectorPtr += 4;
160 complexVectorPtr += 4;
165 phase = atan2f4(qValue, iValue);
177 number = quarterPoints * 4;
180 for (; number < num_points; number++) {
181 const float real = *complexVectorPtr++;
182 const float imag = *complexVectorPtr++;
183 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
188 #ifdef LV_HAVE_GENERIC
192 const float normalizeFactor,
193 unsigned int num_points)
195 float* outPtr = outputVector;
196 const float* inPtr = (
float*)inputVector;
197 const float invNormalizeFactor = 1.0 / normalizeFactor;
199 for (number = 0; number < num_points; number++) {
200 const float real = *inPtr++;
201 const float imag = *inPtr++;
202 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1079
FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1064
FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask)
Definition: sse2neon.h:7458
FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1205
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_or_ps(__m128, __m128)
Definition: sse2neon.h:2237
static void volk_32fc_s32f_atan2_32f_generic(float *outputVector, const lv_32fc_t *inputVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:190
static void volk_32fc_s32f_atan2_32f_a_sse(float *outputVector, const lv_32fc_t *complexVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:134
float complex lv_32fc_t
Definition: volk_complex.h:74