Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_64f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
60 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
61 #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
62 
63 #include <inttypes.h>
64 #include <stdio.h>
65 
66 #ifdef LV_HAVE_AVX
67 #include <immintrin.h>
68 
69 static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer,
70  double* qBuffer,
71  const lv_32fc_t* complexVector,
72  unsigned int num_points)
73 {
74  unsigned int number = 0;
75 
76  const float* complexVectorPtr = (float*)complexVector;
77  double* iBufferPtr = iBuffer;
78  double* qBufferPtr = qBuffer;
79 
80  const unsigned int quarterPoints = num_points / 4;
81  __m256 cplxValue;
82  __m128 complexH, complexL, fVal;
83  __m256d dVal;
84 
85  for (; number < quarterPoints; number++) {
86 
87  cplxValue = _mm256_loadu_ps(complexVectorPtr);
88  complexVectorPtr += 8;
89 
90  complexH = _mm256_extractf128_ps(cplxValue, 1);
91  complexL = _mm256_extractf128_ps(cplxValue, 0);
92 
93  // Arrange in i1i2i1i2 format
94  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
95  dVal = _mm256_cvtps_pd(fVal);
96  _mm256_storeu_pd(iBufferPtr, dVal);
97 
98  // Arrange in q1q2q1q2 format
99  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
100  dVal = _mm256_cvtps_pd(fVal);
101  _mm256_storeu_pd(qBufferPtr, dVal);
102 
103  iBufferPtr += 4;
104  qBufferPtr += 4;
105  }
106 
107  number = quarterPoints * 4;
108  for (; number < num_points; number++) {
109  *iBufferPtr++ = *complexVectorPtr++;
110  *qBufferPtr++ = *complexVectorPtr++;
111  }
112 }
113 #endif /* LV_HAVE_AVX */
114 
115 #ifdef LV_HAVE_SSE2
116 #include <emmintrin.h>
117 
118 static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer,
119  double* qBuffer,
120  const lv_32fc_t* complexVector,
121  unsigned int num_points)
122 {
123  unsigned int number = 0;
124 
125  const float* complexVectorPtr = (float*)complexVector;
126  double* iBufferPtr = iBuffer;
127  double* qBufferPtr = qBuffer;
128 
129  const unsigned int halfPoints = num_points / 2;
130  __m128 cplxValue, fVal;
131  __m128d dVal;
132 
133  for (; number < halfPoints; number++) {
134 
135  cplxValue = _mm_loadu_ps(complexVectorPtr);
136  complexVectorPtr += 4;
137 
138  // Arrange in i1i2i1i2 format
139  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
140  dVal = _mm_cvtps_pd(fVal);
141  _mm_storeu_pd(iBufferPtr, dVal);
142 
143  // Arrange in q1q2q1q2 format
144  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
145  dVal = _mm_cvtps_pd(fVal);
146  _mm_storeu_pd(qBufferPtr, dVal);
147 
148  iBufferPtr += 2;
149  qBufferPtr += 2;
150  }
151 
152  number = halfPoints * 2;
153  for (; number < num_points; number++) {
154  *iBufferPtr++ = *complexVectorPtr++;
155  *qBufferPtr++ = *complexVectorPtr++;
156  }
157 }
158 #endif /* LV_HAVE_SSE */
159 
160 #ifdef LV_HAVE_GENERIC
161 
162 static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer,
163  double* qBuffer,
164  const lv_32fc_t* complexVector,
165  unsigned int num_points)
166 {
167  unsigned int number = 0;
168  const float* complexVectorPtr = (float*)complexVector;
169  double* iBufferPtr = iBuffer;
170  double* qBufferPtr = qBuffer;
171 
172  for (number = 0; number < num_points; number++) {
173  *iBufferPtr++ = (double)*complexVectorPtr++;
174  *qBufferPtr++ = (double)*complexVectorPtr++;
175  }
176 }
177 #endif /* LV_HAVE_GENERIC */
178 
179 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
180 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
181 #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
182 
183 #include <inttypes.h>
184 #include <stdio.h>
185 
186 #ifdef LV_HAVE_AVX
187 #include <immintrin.h>
188 
189 static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer,
190  double* qBuffer,
191  const lv_32fc_t* complexVector,
192  unsigned int num_points)
193 {
194  unsigned int number = 0;
195 
196  const float* complexVectorPtr = (float*)complexVector;
197  double* iBufferPtr = iBuffer;
198  double* qBufferPtr = qBuffer;
199 
200  const unsigned int quarterPoints = num_points / 4;
201  __m256 cplxValue;
202  __m128 complexH, complexL, fVal;
203  __m256d dVal;
204 
205  for (; number < quarterPoints; number++) {
206 
207  cplxValue = _mm256_load_ps(complexVectorPtr);
208  complexVectorPtr += 8;
209 
210  complexH = _mm256_extractf128_ps(cplxValue, 1);
211  complexL = _mm256_extractf128_ps(cplxValue, 0);
212 
213  // Arrange in i1i2i1i2 format
214  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
215  dVal = _mm256_cvtps_pd(fVal);
216  _mm256_store_pd(iBufferPtr, dVal);
217 
218  // Arrange in q1q2q1q2 format
219  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
220  dVal = _mm256_cvtps_pd(fVal);
221  _mm256_store_pd(qBufferPtr, dVal);
222 
223  iBufferPtr += 4;
224  qBufferPtr += 4;
225  }
226 
227  number = quarterPoints * 4;
228  for (; number < num_points; number++) {
229  *iBufferPtr++ = *complexVectorPtr++;
230  *qBufferPtr++ = *complexVectorPtr++;
231  }
232 }
233 #endif /* LV_HAVE_AVX */
234 
235 #ifdef LV_HAVE_SSE2
236 #include <emmintrin.h>
237 
238 static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,
239  double* qBuffer,
240  const lv_32fc_t* complexVector,
241  unsigned int num_points)
242 {
243  unsigned int number = 0;
244 
245  const float* complexVectorPtr = (float*)complexVector;
246  double* iBufferPtr = iBuffer;
247  double* qBufferPtr = qBuffer;
248 
249  const unsigned int halfPoints = num_points / 2;
250  __m128 cplxValue, fVal;
251  __m128d dVal;
252 
253  for (; number < halfPoints; number++) {
254 
255  cplxValue = _mm_load_ps(complexVectorPtr);
256  complexVectorPtr += 4;
257 
258  // Arrange in i1i2i1i2 format
259  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
260  dVal = _mm_cvtps_pd(fVal);
261  _mm_store_pd(iBufferPtr, dVal);
262 
263  // Arrange in q1q2q1q2 format
264  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
265  dVal = _mm_cvtps_pd(fVal);
266  _mm_store_pd(qBufferPtr, dVal);
267 
268  iBufferPtr += 2;
269  qBufferPtr += 2;
270  }
271 
272  number = halfPoints * 2;
273  for (; number < num_points; number++) {
274  *iBufferPtr++ = *complexVectorPtr++;
275  *qBufferPtr++ = *complexVectorPtr++;
276  }
277 }
278 #endif /* LV_HAVE_SSE */
279 
280 #ifdef LV_HAVE_GENERIC
281 
282 static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer,
283  double* qBuffer,
284  const lv_32fc_t* complexVector,
285  unsigned int num_points)
286 {
287  unsigned int number = 0;
288  const float* complexVectorPtr = (float*)complexVector;
289  double* iBufferPtr = iBuffer;
290  double* qBufferPtr = qBuffer;
291 
292  for (number = 0; number < num_points; number++) {
293  *iBufferPtr++ = (double)*complexVectorPtr++;
294  *qBufferPtr++ = (double)*complexVectorPtr++;
295  }
296 }
297 #endif /* LV_HAVE_GENERIC */
298 
299 #ifdef LV_HAVE_NEONV8
300 #include <arm_neon.h>
301 
302 static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,
303  double* qBuffer,
304  const lv_32fc_t* complexVector,
305  unsigned int num_points)
306 {
307  unsigned int number = 0;
308  unsigned int half_points = num_points / 2;
309  const float* complexVectorPtr = (float*)complexVector;
310  double* iBufferPtr = iBuffer;
311  double* qBufferPtr = qBuffer;
312  float32x2x2_t complexInput;
313  float64x2_t iVal, qVal;
314 
315  for (number = 0; number < half_points; number++) {
316  complexInput = vld2_f32(complexVectorPtr);
317 
318  iVal = vcvt_f64_f32(complexInput.val[0]);
319  qVal = vcvt_f64_f32(complexInput.val[1]);
320 
321  vst1q_f64(iBufferPtr, iVal);
322  vst1q_f64(qBufferPtr, qVal);
323 
324  complexVectorPtr += 4;
325  iBufferPtr += 2;
326  qBufferPtr += 2;
327  }
328 
329  for (number = half_points * 2; number < num_points; number++) {
330  *iBufferPtr++ = (double)*complexVectorPtr++;
331  *qBufferPtr++ = (double)*complexVectorPtr++;
332  }
333 }
334 #endif /* LV_HAVE_NEONV8 */
335 
336 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
Definition: sse2neon.h:4096
float32x4_t __m128d
Definition: sse2neon.h:242
FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:6003
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:5897
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:189
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:118
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:162
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:238
static void volk_32fc_deinterleave_64f_x2_a_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:282
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:69
float complex lv_32fc_t
Definition: volk_complex.h:74