60 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
61 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
67 #include <immintrin.h>
71 unsigned int num_points)
73 const float* complexVectorPtr = (
float*)complexVector;
74 float* iBufferPtr = iBuffer;
75 float* qBufferPtr = qBuffer;
77 unsigned int number = 0;
79 const unsigned int eighthPoints = num_points / 8;
80 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
81 for (; number < eighthPoints; number++) {
82 cplxValue1 = _mm256_load_ps(complexVectorPtr);
83 complexVectorPtr += 8;
85 cplxValue2 = _mm256_load_ps(complexVectorPtr);
86 complexVectorPtr += 8;
88 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
89 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
92 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
94 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
96 _mm256_store_ps(iBufferPtr, iValue);
97 _mm256_store_ps(qBufferPtr, qValue);
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *iBufferPtr++ = *complexVectorPtr++;
106 *qBufferPtr++ = *complexVectorPtr++;
112 #include <xmmintrin.h>
117 unsigned int num_points)
119 const float* complexVectorPtr = (
float*)complexVector;
120 float* iBufferPtr = iBuffer;
121 float* qBufferPtr = qBuffer;
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
125 __m128 cplxValue1, cplxValue2, iValue, qValue;
126 for (; number < quarterPoints; number++) {
128 complexVectorPtr += 4;
131 complexVectorPtr += 4;
145 number = quarterPoints * 4;
146 for (; number < num_points; number++) {
147 *iBufferPtr++ = *complexVectorPtr++;
148 *qBufferPtr++ = *complexVectorPtr++;
155 #include <arm_neon.h>
160 unsigned int num_points)
162 unsigned int number = 0;
163 unsigned int quarter_points = num_points / 4;
164 const float* complexVectorPtr = (
float*)complexVector;
165 float* iBufferPtr = iBuffer;
166 float* qBufferPtr = qBuffer;
167 float32x4x2_t complexInput;
169 for (number = 0; number < quarter_points; number++) {
170 complexInput = vld2q_f32(complexVectorPtr);
171 vst1q_f32(iBufferPtr, complexInput.val[0]);
172 vst1q_f32(qBufferPtr, complexInput.val[1]);
173 complexVectorPtr += 8;
178 for (number = quarter_points * 4; number < num_points; number++) {
179 *iBufferPtr++ = *complexVectorPtr++;
180 *qBufferPtr++ = *complexVectorPtr++;
186 #ifdef LV_HAVE_GENERIC
191 unsigned int num_points)
193 const float* complexVectorPtr = (
float*)complexVector;
194 float* iBufferPtr = iBuffer;
195 float* qBufferPtr = qBuffer;
197 for (number = 0; number < num_points; number++) {
198 *iBufferPtr++ = *complexVectorPtr++;
199 *qBufferPtr++ = *complexVectorPtr++;
207 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
208 #define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
210 #include <inttypes.h>
214 #include <immintrin.h>
218 unsigned int num_points)
220 const float* complexVectorPtr = (
float*)complexVector;
221 float* iBufferPtr = iBuffer;
222 float* qBufferPtr = qBuffer;
224 unsigned int number = 0;
226 const unsigned int eighthPoints = num_points / 8;
227 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
228 for (; number < eighthPoints; number++) {
229 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
230 complexVectorPtr += 8;
232 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
233 complexVectorPtr += 8;
235 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
236 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
239 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
241 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
243 _mm256_storeu_ps(iBufferPtr, iValue);
244 _mm256_storeu_ps(qBufferPtr, qValue);
250 number = eighthPoints * 8;
251 for (; number < num_points; number++) {
252 *iBufferPtr++ = *complexVectorPtr++;
253 *qBufferPtr++ = *complexVectorPtr++;
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:188
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:68
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:114
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:157
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:215
float complex lv_32fc_t
Definition: volk_complex.h:74