Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
57 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
58 #define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
59 
60 #include <inttypes.h>
61 #include <stdio.h>
62 
63 #ifdef LV_HAVE_AVX2
64 #include <immintrin.h>
65 
66 static inline void volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer,
67  const lv_32fc_t* complexVector,
68  unsigned int num_points)
69 {
70  unsigned int number = 0;
71  const unsigned int eighthPoints = num_points / 8;
72 
73  const float* complexVectorPtr = (const float*)complexVector;
74  float* iBufferPtr = iBuffer;
75 
76  __m256 cplxValue1, cplxValue2;
77  __m256 iValue;
78  __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
79  for (; number < eighthPoints; number++) {
80 
81  cplxValue1 = _mm256_load_ps(complexVectorPtr);
82  complexVectorPtr += 8;
83 
84  cplxValue2 = _mm256_load_ps(complexVectorPtr);
85  complexVectorPtr += 8;
86 
87  // Arrange in i1i2i3i4 format
88  iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
89  iValue = _mm256_permutevar8x32_ps(iValue, idx);
90 
91  _mm256_store_ps(iBufferPtr, iValue);
92 
93  iBufferPtr += 8;
94  }
95 
96  number = eighthPoints * 8;
97  for (; number < num_points; number++) {
98  *iBufferPtr++ = *complexVectorPtr++;
99  complexVectorPtr++;
100  }
101 }
102 #endif /* LV_HAVE_AVX2 */
103 
104 #ifdef LV_HAVE_SSE
105 #include <xmmintrin.h>
106 
107 static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer,
108  const lv_32fc_t* complexVector,
109  unsigned int num_points)
110 {
111  unsigned int number = 0;
112  const unsigned int quarterPoints = num_points / 4;
113 
114  const float* complexVectorPtr = (const float*)complexVector;
115  float* iBufferPtr = iBuffer;
116 
117  __m128 cplxValue1, cplxValue2, iValue;
118  for (; number < quarterPoints; number++) {
119 
120  cplxValue1 = _mm_load_ps(complexVectorPtr);
121  complexVectorPtr += 4;
122 
123  cplxValue2 = _mm_load_ps(complexVectorPtr);
124  complexVectorPtr += 4;
125 
126  // Arrange in i1i2i3i4 format
127  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
128 
129  _mm_store_ps(iBufferPtr, iValue);
130 
131  iBufferPtr += 4;
132  }
133 
134  number = quarterPoints * 4;
135  for (; number < num_points; number++) {
136  *iBufferPtr++ = *complexVectorPtr++;
137  complexVectorPtr++;
138  }
139 }
140 #endif /* LV_HAVE_SSE */
141 
142 
143 #ifdef LV_HAVE_GENERIC
144 
145 static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer,
146  const lv_32fc_t* complexVector,
147  unsigned int num_points)
148 {
149  unsigned int number = 0;
150  const float* complexVectorPtr = (float*)complexVector;
151  float* iBufferPtr = iBuffer;
152  for (number = 0; number < num_points; number++) {
153  *iBufferPtr++ = *complexVectorPtr++;
154  complexVectorPtr++;
155  }
156 }
157 #endif /* LV_HAVE_GENERIC */
158 
159 
160 #ifdef LV_HAVE_NEON
161 #include <arm_neon.h>
162 
163 static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer,
164  const lv_32fc_t* complexVector,
165  unsigned int num_points)
166 {
167  unsigned int number = 0;
168  unsigned int quarter_points = num_points / 4;
169  const float* complexVectorPtr = (float*)complexVector;
170  float* iBufferPtr = iBuffer;
171  float32x4x2_t complexInput;
172 
173  for (number = 0; number < quarter_points; number++) {
174  complexInput = vld2q_f32(complexVectorPtr);
175  vst1q_f32(iBufferPtr, complexInput.val[0]);
176  complexVectorPtr += 8;
177  iBufferPtr += 4;
178  }
179 
180  for (number = quarter_points * 4; number < num_points; number++) {
181  *iBufferPtr++ = *complexVectorPtr++;
182  complexVectorPtr++;
183  }
184 }
185 #endif /* LV_HAVE_NEON */
186 
187 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
188 
189 
190 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
191 #define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
192 
193 #include <inttypes.h>
194 #include <stdio.h>
195 
196 #ifdef LV_HAVE_AVX2
197 #include <immintrin.h>
198 
199 static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
200  const lv_32fc_t* complexVector,
201  unsigned int num_points)
202 {
203  unsigned int number = 0;
204  const unsigned int eighthPoints = num_points / 8;
205 
206  const float* complexVectorPtr = (const float*)complexVector;
207  float* iBufferPtr = iBuffer;
208 
209  __m256 cplxValue1, cplxValue2;
210  __m256 iValue;
211  __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
212  for (; number < eighthPoints; number++) {
213 
214  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
215  complexVectorPtr += 8;
216 
217  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
218  complexVectorPtr += 8;
219 
220  // Arrange in i1i2i3i4 format
221  iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
222  iValue = _mm256_permutevar8x32_ps(iValue, idx);
223 
224  _mm256_storeu_ps(iBufferPtr, iValue);
225 
226  iBufferPtr += 8;
227  }
228 
229  number = eighthPoints * 8;
230  for (; number < num_points; number++) {
231  *iBufferPtr++ = *complexVectorPtr++;
232  complexVectorPtr++;
233  }
234 }
235 #endif /* LV_HAVE_AVX2 */
236 
237 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_deinterleave_real_32f_generic(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:145
static void volk_32fc_deinterleave_real_32f_a_sse(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:107
static void volk_32fc_deinterleave_real_32f_neon(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:163
float complex lv_32fc_t
Definition: volk_complex.h:74