60 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
61 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
67 #include <immintrin.h>
72 unsigned int num_points)
74 unsigned int number = 0;
75 float* complexVectorPtr = (
float*)complexVector;
76 const float* iBufferPtr = iBuffer;
77 const float* qBufferPtr = qBuffer;
79 const uint64_t eighthPoints = num_points / 8;
81 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
82 for (; number < eighthPoints; number++) {
83 iValue = _mm256_load_ps(iBufferPtr);
84 qValue = _mm256_load_ps(qBufferPtr);
87 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
89 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
91 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
92 _mm256_store_ps(complexVectorPtr, cplxValue);
93 complexVectorPtr += 8;
95 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
96 _mm256_store_ps(complexVectorPtr, cplxValue);
97 complexVectorPtr += 8;
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *complexVectorPtr++ = *iBufferPtr++;
106 *complexVectorPtr++ = *qBufferPtr++;
113 #include <xmmintrin.h>
116 const float* iBuffer,
117 const float* qBuffer,
118 unsigned int num_points)
120 unsigned int number = 0;
121 float* complexVectorPtr = (
float*)complexVector;
122 const float* iBufferPtr = iBuffer;
123 const float* qBufferPtr = qBuffer;
125 const uint64_t quarterPoints = num_points / 4;
127 __m128 iValue, qValue, cplxValue;
128 for (; number < quarterPoints; number++) {
135 complexVectorPtr += 4;
140 complexVectorPtr += 4;
146 number = quarterPoints * 4;
147 for (; number < num_points; number++) {
148 *complexVectorPtr++ = *iBufferPtr++;
149 *complexVectorPtr++ = *qBufferPtr++;
156 #include <arm_neon.h>
159 const float* iBuffer,
160 const float* qBuffer,
161 unsigned int num_points)
163 unsigned int quarter_points = num_points / 4;
165 float* complexVectorPtr = (
float*)complexVector;
167 float32x4x2_t complex_vec;
168 for (number = 0; number < quarter_points; ++number) {
169 complex_vec.val[0] = vld1q_f32(iBuffer);
170 complex_vec.val[1] = vld1q_f32(qBuffer);
171 vst2q_f32(complexVectorPtr, complex_vec);
174 complexVectorPtr += 8;
177 for (number = quarter_points * 4; number < num_points; ++number) {
178 *complexVectorPtr++ = *iBuffer++;
179 *complexVectorPtr++ = *qBuffer++;
185 #ifdef LV_HAVE_GENERIC
188 const float* iBuffer,
189 const float* qBuffer,
190 unsigned int num_points)
192 float* complexVectorPtr = (
float*)complexVector;
193 const float* iBufferPtr = iBuffer;
194 const float* qBufferPtr = qBuffer;
197 for (number = 0; number < num_points; number++) {
198 *complexVectorPtr++ = *iBufferPtr++;
199 *complexVectorPtr++ = *qBufferPtr++;
207 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
208 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
210 #include <inttypes.h>
214 #include <immintrin.h>
217 const float* iBuffer,
218 const float* qBuffer,
219 unsigned int num_points)
221 unsigned int number = 0;
222 float* complexVectorPtr = (
float*)complexVector;
223 const float* iBufferPtr = iBuffer;
224 const float* qBufferPtr = qBuffer;
226 const uint64_t eighthPoints = num_points / 8;
228 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
229 for (; number < eighthPoints; number++) {
230 iValue = _mm256_loadu_ps(iBufferPtr);
231 qValue = _mm256_loadu_ps(qBufferPtr);
234 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
236 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
238 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
239 _mm256_storeu_ps(complexVectorPtr, cplxValue);
240 complexVectorPtr += 8;
242 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
243 _mm256_storeu_ps(complexVectorPtr, cplxValue);
244 complexVectorPtr += 8;
250 number = eighthPoints * 8;
251 for (; number < num_points; number++) {
252 *complexVectorPtr++ = *iBufferPtr++;
253 *complexVectorPtr++ = *qBufferPtr++;
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2920
FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2942
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:69
static void volk_32f_x2_interleave_32fc_generic(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:187
static void volk_32f_x2_interleave_32fc_neon(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:158
static void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:216
static void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:115
float complex lv_32fc_t
Definition: volk_complex.h:74