51 #ifndef INCLUDED_volk_64f_convert_32f_u_H
52 #define INCLUDED_volk_64f_convert_32f_u_H
57 #ifdef LV_HAVE_AVX512F
58 #include <immintrin.h>
60 static inline void volk_64f_convert_32f_u_avx512f(
float* outputVector,
61 const double* inputVector,
62 unsigned int num_points)
64 unsigned int number = 0;
66 const unsigned int oneSixteenthPoints = num_points / 16;
68 const double* inputVectorPtr = (
const double*)inputVector;
69 float* outputVectorPtr = outputVector;
71 __m512d inputVal1, inputVal2;
73 for (; number < oneSixteenthPoints; number++) {
74 inputVal1 = _mm512_loadu_pd(inputVectorPtr);
76 inputVal2 = _mm512_loadu_pd(inputVectorPtr);
79 ret1 = _mm512_cvtpd_ps(inputVal1);
80 ret2 = _mm512_cvtpd_ps(inputVal2);
82 _mm256_storeu_ps(outputVectorPtr, ret1);
85 _mm256_storeu_ps(outputVectorPtr, ret2);
89 number = oneSixteenthPoints * 16;
90 for (; number < num_points; number++) {
91 outputVector[number] = (float)(inputVector[number]);
98 #include <immintrin.h>
101 const double* inputVector,
102 unsigned int num_points)
104 unsigned int number = 0;
106 const unsigned int oneEightPoints = num_points / 8;
108 const double* inputVectorPtr = (
const double*)inputVector;
109 float* outputVectorPtr = outputVector;
111 __m256d inputVal1, inputVal2;
113 for (; number < oneEightPoints; number++) {
114 inputVal1 = _mm256_loadu_pd(inputVectorPtr);
116 inputVal2 = _mm256_loadu_pd(inputVectorPtr);
119 ret1 = _mm256_cvtpd_ps(inputVal1);
120 ret2 = _mm256_cvtpd_ps(inputVal2);
123 outputVectorPtr += 4;
126 outputVectorPtr += 4;
129 number = oneEightPoints * 8;
130 for (; number < num_points; number++) {
131 outputVector[number] = (float)(inputVector[number]);
138 #include <emmintrin.h>
141 const double* inputVector,
142 unsigned int num_points)
144 unsigned int number = 0;
146 const unsigned int quarterPoints = num_points / 4;
148 const double* inputVectorPtr = (
const double*)inputVector;
149 float* outputVectorPtr = outputVector;
153 for (; number < quarterPoints; number++) {
165 outputVectorPtr += 4;
168 number = quarterPoints * 4;
169 for (; number < num_points; number++) {
170 outputVector[number] = (float)(inputVector[number]);
176 #ifdef LV_HAVE_GENERIC
179 const double* inputVector,
180 unsigned int num_points)
182 float* outputVectorPtr = outputVector;
183 const double* inputVectorPtr = inputVector;
184 unsigned int number = 0;
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
194 #ifndef INCLUDED_volk_64f_convert_32f_a_H
195 #define INCLUDED_volk_64f_convert_32f_a_H
197 #include <inttypes.h>
200 #ifdef LV_HAVE_AVX512F
201 #include <immintrin.h>
203 static inline void volk_64f_convert_32f_a_avx512f(
float* outputVector,
204 const double* inputVector,
205 unsigned int num_points)
207 unsigned int number = 0;
209 const unsigned int oneSixteenthPoints = num_points / 16;
211 const double* inputVectorPtr = (
const double*)inputVector;
212 float* outputVectorPtr = outputVector;
214 __m512d inputVal1, inputVal2;
216 for (; number < oneSixteenthPoints; number++) {
217 inputVal1 = _mm512_load_pd(inputVectorPtr);
219 inputVal2 = _mm512_load_pd(inputVectorPtr);
222 ret1 = _mm512_cvtpd_ps(inputVal1);
223 ret2 = _mm512_cvtpd_ps(inputVal2);
225 _mm256_store_ps(outputVectorPtr, ret1);
226 outputVectorPtr += 8;
228 _mm256_store_ps(outputVectorPtr, ret2);
229 outputVectorPtr += 8;
232 number = oneSixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = (float)(inputVector[number]);
241 #include <immintrin.h>
244 const double* inputVector,
245 unsigned int num_points)
247 unsigned int number = 0;
249 const unsigned int oneEightPoints = num_points / 8;
251 const double* inputVectorPtr = (
const double*)inputVector;
252 float* outputVectorPtr = outputVector;
254 __m256d inputVal1, inputVal2;
256 for (; number < oneEightPoints; number++) {
257 inputVal1 = _mm256_load_pd(inputVectorPtr);
259 inputVal2 = _mm256_load_pd(inputVectorPtr);
262 ret1 = _mm256_cvtpd_ps(inputVal1);
263 ret2 = _mm256_cvtpd_ps(inputVal2);
266 outputVectorPtr += 4;
269 outputVectorPtr += 4;
272 number = oneEightPoints * 8;
273 for (; number < num_points; number++) {
274 outputVector[number] = (float)(inputVector[number]);
281 #include <emmintrin.h>
284 const double* inputVector,
285 unsigned int num_points)
287 unsigned int number = 0;
289 const unsigned int quarterPoints = num_points / 4;
291 const double* inputVectorPtr = (
const double*)inputVector;
292 float* outputVectorPtr = outputVector;
296 for (; number < quarterPoints; number++) {
308 outputVectorPtr += 4;
311 number = quarterPoints * 4;
312 for (; number < num_points; number++) {
313 outputVector[number] = (float)(inputVector[number]);
319 #ifdef LV_HAVE_GENERIC
322 const double* inputVector,
323 unsigned int num_points)
325 float* outputVectorPtr = outputVector;
326 const double* inputVectorPtr = inputVector;
327 unsigned int number = 0;
329 for (number = 0; number < num_points; number++) {
330 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128d _mm_load_pd(const double *p)
Definition: sse2neon.h:4430
FORCE_INLINE __m128d _mm_loadu_pd(const double *p)
Definition: sse2neon.h:4563
float32x4_t __m128d
Definition: sse2neon.h:242
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2787
FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B)
Definition: sse2neon.h:2145
FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a)
Definition: sse2neon.h:3991
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:100
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:178
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:243
static void volk_64f_convert_32f_a_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:321
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:140
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:283