Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_imag_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
57 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
58 #define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
59 
60 #include <inttypes.h>
61 #include <stdio.h>
62 
63 #ifdef LV_HAVE_AVX
64 #include <immintrin.h>
65 
66 static inline void volk_32fc_deinterleave_imag_32f_a_avx(float* qBuffer,
67  const lv_32fc_t* complexVector,
68  unsigned int num_points)
69 {
70  unsigned int number = 0;
71  const unsigned int eighthPoints = num_points / 8;
72  const float* complexVectorPtr = (const float*)complexVector;
73  float* qBufferPtr = qBuffer;
74 
75  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
76  for (; number < eighthPoints; number++) {
77 
78  cplxValue1 = _mm256_load_ps(complexVectorPtr);
79  complexVectorPtr += 8;
80 
81  cplxValue2 = _mm256_load_ps(complexVectorPtr);
82  complexVectorPtr += 8;
83 
84  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
85  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
86 
87  // Arrange in q1q2q3q4 format
88  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
89 
90  _mm256_store_ps(qBufferPtr, qValue);
91 
92  qBufferPtr += 8;
93  }
94 
95  number = eighthPoints * 8;
96  for (; number < num_points; number++) {
97  complexVectorPtr++;
98  *qBufferPtr++ = *complexVectorPtr++;
99  }
100 }
101 #endif /* LV_HAVE_AVX */
102 
103 #ifdef LV_HAVE_SSE
104 #include <xmmintrin.h>
105 
106 static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer,
107  const lv_32fc_t* complexVector,
108  unsigned int num_points)
109 {
110  unsigned int number = 0;
111  const unsigned int quarterPoints = num_points / 4;
112 
113  const float* complexVectorPtr = (const float*)complexVector;
114  float* qBufferPtr = qBuffer;
115 
116  __m128 cplxValue1, cplxValue2, iValue;
117  for (; number < quarterPoints; number++) {
118 
119  cplxValue1 = _mm_load_ps(complexVectorPtr);
120  complexVectorPtr += 4;
121 
122  cplxValue2 = _mm_load_ps(complexVectorPtr);
123  complexVectorPtr += 4;
124 
125  // Arrange in q1q2q3q4 format
126  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
127 
128  _mm_store_ps(qBufferPtr, iValue);
129 
130  qBufferPtr += 4;
131  }
132 
133  number = quarterPoints * 4;
134  for (; number < num_points; number++) {
135  complexVectorPtr++;
136  *qBufferPtr++ = *complexVectorPtr++;
137  }
138 }
139 #endif /* LV_HAVE_SSE */
140 
141 #ifdef LV_HAVE_NEON
142 #include <arm_neon.h>
143 
144 static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer,
145  const lv_32fc_t* complexVector,
146  unsigned int num_points)
147 {
148  unsigned int number = 0;
149  unsigned int quarter_points = num_points / 4;
150  const float* complexVectorPtr = (float*)complexVector;
151  float* qBufferPtr = qBuffer;
152  float32x4x2_t complexInput;
153 
154  for (number = 0; number < quarter_points; number++) {
155  complexInput = vld2q_f32(complexVectorPtr);
156  vst1q_f32(qBufferPtr, complexInput.val[1]);
157  complexVectorPtr += 8;
158  qBufferPtr += 4;
159  }
160 
161  for (number = quarter_points * 4; number < num_points; number++) {
162  complexVectorPtr++;
163  *qBufferPtr++ = *complexVectorPtr++;
164  }
165 }
166 #endif /* LV_HAVE_NEON */
167 
168 #ifdef LV_HAVE_GENERIC
169 
170 static inline void volk_32fc_deinterleave_imag_32f_generic(float* qBuffer,
171  const lv_32fc_t* complexVector,
172  unsigned int num_points)
173 {
174  unsigned int number = 0;
175  const float* complexVectorPtr = (float*)complexVector;
176  float* qBufferPtr = qBuffer;
177  for (number = 0; number < num_points; number++) {
178  complexVectorPtr++;
179  *qBufferPtr++ = *complexVectorPtr++;
180  }
181 }
182 #endif /* LV_HAVE_GENERIC */
183 
184 
185 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
186 
187 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
188 #define INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
189 
190 #include <inttypes.h>
191 #include <stdio.h>
192 
193 #ifdef LV_HAVE_AVX
194 #include <immintrin.h>
195 
196 static inline void volk_32fc_deinterleave_imag_32f_u_avx(float* qBuffer,
197  const lv_32fc_t* complexVector,
198  unsigned int num_points)
199 {
200  unsigned int number = 0;
201  const unsigned int eighthPoints = num_points / 8;
202  const float* complexVectorPtr = (const float*)complexVector;
203  float* qBufferPtr = qBuffer;
204 
205  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
206  for (; number < eighthPoints; number++) {
207 
208  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
209  complexVectorPtr += 8;
210 
211  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
212  complexVectorPtr += 8;
213 
214  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
215  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
216 
217  // Arrange in q1q2q3q4 format
218  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
219 
220  _mm256_storeu_ps(qBufferPtr, qValue);
221 
222  qBufferPtr += 8;
223  }
224 
225  number = eighthPoints * 8;
226  for (; number < num_points; number++) {
227  complexVectorPtr++;
228  *qBufferPtr++ = *complexVectorPtr++;
229  }
230 }
231 #endif /* LV_HAVE_AVX */
232 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_u_H */
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_deinterleave_imag_32f_a_sse(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:106
static void volk_32fc_deinterleave_imag_32f_neon(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:144
static void volk_32fc_deinterleave_imag_32f_u_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:196
static void volk_32fc_deinterleave_imag_32f_a_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:66
static void volk_32fc_deinterleave_imag_32f_generic(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:170
float complex lv_32fc_t
Definition: volk_complex.h:74