60 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
61 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
69 #include <immintrin.h>
72 volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
75 unsigned int num_points)
77 unsigned int number = 0;
78 const unsigned int eighthPoints = num_points / 8;
80 const float* complexVectorPtr = (
float*)complexVector;
81 int16_t* iBufferPtr = iBuffer;
83 __m256 vScalar = _mm256_set1_ps(scalar);
85 __m256 cplxValue1, cplxValue2, iValue;
89 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
91 for (; number < eighthPoints; number++) {
92 cplxValue1 = _mm256_load_ps(complexVectorPtr);
93 complexVectorPtr += 8;
95 cplxValue2 = _mm256_load_ps(complexVectorPtr);
96 complexVectorPtr += 8;
99 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2,
_MM_SHUFFLE(2, 0, 2, 0));
101 iValue = _mm256_mul_ps(iValue, vScalar);
104 a = _mm256_cvtps_epi32(iValue);
105 a = _mm256_packs_epi32(a, a);
106 a = _mm256_permutevar8x32_epi32(a, idx);
107 b = _mm256_extracti128_si256(a, 0);
113 number = eighthPoints * 8;
114 iBufferPtr = &iBuffer[number];
115 for (; number < num_points; number++) {
116 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
125 #include <xmmintrin.h>
131 unsigned int num_points)
133 unsigned int number = 0;
134 const unsigned int quarterPoints = num_points / 4;
136 const float* complexVectorPtr = (
float*)complexVector;
137 int16_t* iBufferPtr = iBuffer;
141 __m128 cplxValue1, cplxValue2, iValue;
145 for (; number < quarterPoints; number++) {
147 complexVectorPtr += 4;
150 complexVectorPtr += 4;
158 *iBufferPtr++ = (int16_t)(floatBuffer[0]);
159 *iBufferPtr++ = (int16_t)(floatBuffer[1]);
160 *iBufferPtr++ = (int16_t)(floatBuffer[2]);
161 *iBufferPtr++ = (int16_t)(floatBuffer[3]);
164 number = quarterPoints * 4;
165 iBufferPtr = &iBuffer[number];
166 for (; number < num_points; number++) {
167 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
175 #ifdef LV_HAVE_GENERIC
181 unsigned int num_points)
183 const float* complexVectorPtr = (
float*)complexVector;
184 int16_t* iBufferPtr = iBuffer;
185 unsigned int number = 0;
186 for (number = 0; number < num_points; number++) {
187 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
196 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
197 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
199 #include <inttypes.h>
204 #include <immintrin.h>
207 volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
210 unsigned int num_points)
212 unsigned int number = 0;
213 const unsigned int eighthPoints = num_points / 8;
215 const float* complexVectorPtr = (
float*)complexVector;
216 int16_t* iBufferPtr = iBuffer;
218 __m256 vScalar = _mm256_set1_ps(scalar);
220 __m256 cplxValue1, cplxValue2, iValue;
224 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
226 for (; number < eighthPoints; number++) {
227 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228 complexVectorPtr += 8;
230 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231 complexVectorPtr += 8;
234 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2,
_MM_SHUFFLE(2, 0, 2, 0));
236 iValue = _mm256_mul_ps(iValue, vScalar);
239 a = _mm256_cvtps_epi32(iValue);
240 a = _mm256_packs_epi32(a, a);
241 a = _mm256_permutevar8x32_epi32(a, idx);
242 b = _mm256_extracti128_si256(a, 0);
248 number = eighthPoints * 8;
249 iBufferPtr = &iBuffer[number];
250 for (; number < num_points; number++) {
251 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:6010
#define _MM_FROUND_TO_ZERO
Definition: sse2neon.h:202
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:178
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:128
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
float complex lv_32fc_t
Definition: volk_complex.h:74