62 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
63 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
70 #include <immintrin.h>
72 static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(
lv_16sc_t* complexVector,
76 unsigned int num_points)
78 unsigned int number = 0;
79 const float* iBufferPtr = iBuffer;
80 const float* qBufferPtr = qBuffer;
82 __m256 vScalar = _mm256_set1_ps(scalar);
84 const unsigned int eighthPoints = num_points / 8;
86 __m256 iValue, qValue, cplxValue1, cplxValue2;
87 __m256i intValue1, intValue2;
89 int16_t* complexVectorPtr = (int16_t*)complexVector;
91 for (; number < eighthPoints; number++) {
92 iValue = _mm256_load_ps(iBufferPtr);
93 qValue = _mm256_load_ps(qBufferPtr);
96 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
97 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
100 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
101 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
103 intValue1 = _mm256_cvtps_epi32(cplxValue1);
104 intValue2 = _mm256_cvtps_epi32(cplxValue2);
106 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
108 _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
109 complexVectorPtr += 16;
115 number = eighthPoints * 8;
116 complexVectorPtr = (int16_t*)(&complexVector[number]);
117 for (; number < num_points; number++) {
118 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
119 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
126 #include <emmintrin.h>
129 const float* iBuffer,
130 const float* qBuffer,
132 unsigned int num_points)
134 unsigned int number = 0;
135 const float* iBufferPtr = iBuffer;
136 const float* qBufferPtr = qBuffer;
140 const unsigned int quarterPoints = num_points / 4;
142 __m128 iValue, qValue, cplxValue1, cplxValue2;
145 int16_t* complexVectorPtr = (int16_t*)complexVector;
147 for (; number < quarterPoints; number++) {
165 complexVectorPtr += 8;
171 number = quarterPoints * 4;
172 complexVectorPtr = (int16_t*)(&complexVector[number]);
173 for (; number < num_points; number++) {
174 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
175 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
182 #include <xmmintrin.h>
185 const float* iBuffer,
186 const float* qBuffer,
188 unsigned int num_points)
190 unsigned int number = 0;
191 const float* iBufferPtr = iBuffer;
192 const float* qBufferPtr = qBuffer;
196 const unsigned int quarterPoints = num_points / 4;
198 __m128 iValue, qValue, cplxValue;
200 int16_t* complexVectorPtr = (int16_t*)complexVector;
204 for (; number < quarterPoints; number++) {
214 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
215 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
216 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
217 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
225 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
226 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
227 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
228 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
234 number = quarterPoints * 4;
235 complexVectorPtr = (int16_t*)(&complexVector[number]);
236 for (; number < num_points; number++) {
237 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
238 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
244 #ifdef LV_HAVE_GENERIC
247 const float* iBuffer,
248 const float* qBuffer,
250 unsigned int num_points)
252 int16_t* complexVectorPtr = (int16_t*)complexVector;
253 const float* iBufferPtr = iBuffer;
254 const float* qBufferPtr = qBuffer;
255 unsigned int number = 0;
257 for (number = 0; number < num_points; number++) {
258 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
259 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
267 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
268 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
270 #include <inttypes.h>
275 #include <immintrin.h>
277 static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(
lv_16sc_t* complexVector,
278 const float* iBuffer,
279 const float* qBuffer,
281 unsigned int num_points)
283 unsigned int number = 0;
284 const float* iBufferPtr = iBuffer;
285 const float* qBufferPtr = qBuffer;
287 __m256 vScalar = _mm256_set1_ps(scalar);
289 const unsigned int eighthPoints = num_points / 8;
291 __m256 iValue, qValue, cplxValue1, cplxValue2;
292 __m256i intValue1, intValue2;
294 int16_t* complexVectorPtr = (int16_t*)complexVector;
296 for (; number < eighthPoints; number++) {
297 iValue = _mm256_loadu_ps(iBufferPtr);
298 qValue = _mm256_loadu_ps(qBufferPtr);
301 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
302 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
305 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
306 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
308 intValue1 = _mm256_cvtps_epi32(cplxValue1);
309 intValue2 = _mm256_cvtps_epi32(cplxValue2);
311 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
313 _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
314 complexVectorPtr += 16;
320 number = eighthPoints * 8;
321 complexVectorPtr = (int16_t*)(&complexVector[number]);
322 for (; number < num_points; number++) {
323 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
324 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
static float rintf(float x)
Definition: config.h:45
FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b)
Definition: sse2neon.h:5050
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_cvtps_epi32(__m128)
Definition: sse2neon.h:4036
FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2920
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2942
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:128
static void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:184
static void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:246
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
short complex lv_16sc_t
Definition: volk_complex.h:71