Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32f_convert_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
53 #ifndef INCLUDED_volk_32f_convert_64f_u_H
54 #define INCLUDED_volk_32f_convert_64f_u_H
55 
56 #include <inttypes.h>
57 #include <stdio.h>
58 
59 #ifdef LV_HAVE_AVX
60 #include <immintrin.h>
61 
62 static inline void volk_32f_convert_64f_u_avx(double* outputVector,
63  const float* inputVector,
64  unsigned int num_points)
65 {
66  unsigned int number = 0;
67 
68  const unsigned int quarterPoints = num_points / 4;
69 
70  const float* inputVectorPtr = (const float*)inputVector;
71  double* outputVectorPtr = outputVector;
72  __m256d ret;
73  __m128 inputVal;
74 
75  for (; number < quarterPoints; number++) {
76  inputVal = _mm_loadu_ps(inputVectorPtr);
77  inputVectorPtr += 4;
78 
79  ret = _mm256_cvtps_pd(inputVal);
80  _mm256_storeu_pd(outputVectorPtr, ret);
81 
82  outputVectorPtr += 4;
83  }
84 
85  number = quarterPoints * 4;
86  for (; number < num_points; number++) {
87  outputVector[number] = (double)(inputVector[number]);
88  }
89 }
90 
91 #endif /* LV_HAVE_AVX */
92 
93 #ifdef LV_HAVE_SSE2
94 #include <emmintrin.h>
95 
96 static inline void volk_32f_convert_64f_u_sse2(double* outputVector,
97  const float* inputVector,
98  unsigned int num_points)
99 {
100  unsigned int number = 0;
101 
102  const unsigned int quarterPoints = num_points / 4;
103 
104  const float* inputVectorPtr = (const float*)inputVector;
105  double* outputVectorPtr = outputVector;
106  __m128d ret;
107  __m128 inputVal;
108 
109  for (; number < quarterPoints; number++) {
110  inputVal = _mm_loadu_ps(inputVectorPtr);
111  inputVectorPtr += 4;
112 
113  ret = _mm_cvtps_pd(inputVal);
114 
115  _mm_storeu_pd(outputVectorPtr, ret);
116  outputVectorPtr += 2;
117 
118  inputVal = _mm_movehl_ps(inputVal, inputVal);
119 
120  ret = _mm_cvtps_pd(inputVal);
121 
122  _mm_storeu_pd(outputVectorPtr, ret);
123  outputVectorPtr += 2;
124  }
125 
126  number = quarterPoints * 4;
127  for (; number < num_points; number++) {
128  outputVector[number] = (double)(inputVector[number]);
129  }
130 }
131 #endif /* LV_HAVE_SSE2 */
132 
133 
134 #ifdef LV_HAVE_GENERIC
135 
136 static inline void volk_32f_convert_64f_generic(double* outputVector,
137  const float* inputVector,
138  unsigned int num_points)
139 {
140  double* outputVectorPtr = outputVector;
141  const float* inputVectorPtr = inputVector;
142  unsigned int number = 0;
143 
144  for (number = 0; number < num_points; number++) {
145  *outputVectorPtr++ = ((double)(*inputVectorPtr++));
146  }
147 }
148 #endif /* LV_HAVE_GENERIC */
149 
150 
151 #endif /* INCLUDED_volk_32f_convert_64f_u_H */
152 
153 
154 #ifndef INCLUDED_volk_32f_convert_64f_a_H
155 #define INCLUDED_volk_32f_convert_64f_a_H
156 
157 #include <inttypes.h>
158 #include <stdio.h>
159 
160 #ifdef LV_HAVE_AVX
161 #include <immintrin.h>
162 
163 static inline void volk_32f_convert_64f_a_avx(double* outputVector,
164  const float* inputVector,
165  unsigned int num_points)
166 {
167  unsigned int number = 0;
168 
169  const unsigned int quarterPoints = num_points / 4;
170 
171  const float* inputVectorPtr = (const float*)inputVector;
172  double* outputVectorPtr = outputVector;
173  __m256d ret;
174  __m128 inputVal;
175 
176  for (; number < quarterPoints; number++) {
177  inputVal = _mm_load_ps(inputVectorPtr);
178  inputVectorPtr += 4;
179 
180  ret = _mm256_cvtps_pd(inputVal);
181  _mm256_store_pd(outputVectorPtr, ret);
182 
183  outputVectorPtr += 4;
184  }
185 
186  number = quarterPoints * 4;
187  for (; number < num_points; number++) {
188  outputVector[number] = (double)(inputVector[number]);
189  }
190 }
191 #endif /* LV_HAVE_AVX */
192 
193 #ifdef LV_HAVE_SSE2
194 #include <emmintrin.h>
195 
196 static inline void volk_32f_convert_64f_a_sse2(double* outputVector,
197  const float* inputVector,
198  unsigned int num_points)
199 {
200  unsigned int number = 0;
201 
202  const unsigned int quarterPoints = num_points / 4;
203 
204  const float* inputVectorPtr = (const float*)inputVector;
205  double* outputVectorPtr = outputVector;
206  __m128d ret;
207  __m128 inputVal;
208 
209  for (; number < quarterPoints; number++) {
210  inputVal = _mm_load_ps(inputVectorPtr);
211  inputVectorPtr += 4;
212 
213  ret = _mm_cvtps_pd(inputVal);
214 
215  _mm_store_pd(outputVectorPtr, ret);
216  outputVectorPtr += 2;
217 
218  inputVal = _mm_movehl_ps(inputVal, inputVal);
219 
220  ret = _mm_cvtps_pd(inputVal);
221 
222  _mm_store_pd(outputVectorPtr, ret);
223  outputVectorPtr += 2;
224  }
225 
226  number = quarterPoints * 4;
227  for (; number < num_points; number++) {
228  outputVector[number] = (double)(inputVector[number]);
229  }
230 }
231 #endif /* LV_HAVE_SSE2 */
232 
233 
234 #ifdef LV_HAVE_GENERIC
235 
236 static inline void volk_32f_convert_64f_a_generic(double* outputVector,
237  const float* inputVector,
238  unsigned int num_points)
239 {
240  double* outputVectorPtr = outputVector;
241  const float* inputVectorPtr = inputVector;
242  unsigned int number = 0;
243 
244  for (number = 0; number < num_points; number++) {
245  *outputVectorPtr++ = ((double)(*inputVectorPtr++));
246  }
247 }
248 #endif /* LV_HAVE_GENERIC */
249 
250 
251 #endif /* INCLUDED_volk_32f_convert_64f_a_H */
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
Definition: sse2neon.h:4096
FORCE_INLINE __m128 _mm_movehl_ps(__m128 __A, __m128 __B)
Definition: sse2neon.h:2132
float32x4_t __m128d
Definition: sse2neon.h:242
FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:6003
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:5897
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
static void volk_32f_convert_64f_a_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:236
static void volk_32f_convert_64f_u_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:96
static void volk_32f_convert_64f_a_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:163
static void volk_32f_convert_64f_a_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:196
static void volk_32f_convert_64f_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:136
static void volk_32f_convert_64f_u_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:62