Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_64f_x2_min_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
59 #ifndef INCLUDED_volk_64f_x2_min_64f_a_H
60 #define INCLUDED_volk_64f_x2_min_64f_a_H
61 
62 #include <inttypes.h>
63 #include <stdio.h>
64 
65 #ifdef LV_HAVE_AVX512F
66 #include <immintrin.h>
67 
68 static inline void volk_64f_x2_min_64f_a_avx512f(double* cVector,
69  const double* aVector,
70  const double* bVector,
71  unsigned int num_points)
72 {
73  unsigned int number = 0;
74  const unsigned int eigthPoints = num_points / 8;
75 
76  double* cPtr = cVector;
77  const double* aPtr = aVector;
78  const double* bPtr = bVector;
79 
80  __m512d aVal, bVal, cVal;
81  for (; number < eigthPoints; number++) {
82 
83  aVal = _mm512_load_pd(aPtr);
84  bVal = _mm512_load_pd(bPtr);
85 
86  cVal = _mm512_min_pd(aVal, bVal);
87 
88  _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
89 
90  aPtr += 8;
91  bPtr += 8;
92  cPtr += 8;
93  }
94 
95  number = eigthPoints * 8;
96  for (; number < num_points; number++) {
97  const double a = *aPtr++;
98  const double b = *bPtr++;
99  *cPtr++ = (a < b ? a : b);
100  }
101 }
102 #endif /* LV_HAVE_AVX512F */
103 
104 
105 #ifdef LV_HAVE_AVX
106 #include <immintrin.h>
107 
108 static inline void volk_64f_x2_min_64f_a_avx(double* cVector,
109  const double* aVector,
110  const double* bVector,
111  unsigned int num_points)
112 {
113  unsigned int number = 0;
114  const unsigned int quarterPoints = num_points / 4;
115 
116  double* cPtr = cVector;
117  const double* aPtr = aVector;
118  const double* bPtr = bVector;
119 
120  __m256d aVal, bVal, cVal;
121  for (; number < quarterPoints; number++) {
122 
123  aVal = _mm256_load_pd(aPtr);
124  bVal = _mm256_load_pd(bPtr);
125 
126  cVal = _mm256_min_pd(aVal, bVal);
127 
128  _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
129 
130  aPtr += 4;
131  bPtr += 4;
132  cPtr += 4;
133  }
134 
135  number = quarterPoints * 4;
136  for (; number < num_points; number++) {
137  const double a = *aPtr++;
138  const double b = *bPtr++;
139  *cPtr++ = (a < b ? a : b);
140  }
141 }
142 #endif /* LV_HAVE_AVX */
143 
144 
145 #ifdef LV_HAVE_SSE2
146 #include <emmintrin.h>
147 
148 static inline void volk_64f_x2_min_64f_a_sse2(double* cVector,
149  const double* aVector,
150  const double* bVector,
151  unsigned int num_points)
152 {
153  unsigned int number = 0;
154  const unsigned int halfPoints = num_points / 2;
155 
156  double* cPtr = cVector;
157  const double* aPtr = aVector;
158  const double* bPtr = bVector;
159 
160  __m128d aVal, bVal, cVal;
161  for (; number < halfPoints; number++) {
162 
163  aVal = _mm_load_pd(aPtr);
164  bVal = _mm_load_pd(bPtr);
165 
166  cVal = _mm_min_pd(aVal, bVal);
167 
168  _mm_store_pd(cPtr, cVal); // Store the results back into the C container
169 
170  aPtr += 2;
171  bPtr += 2;
172  cPtr += 2;
173  }
174 
175  number = halfPoints * 2;
176  for (; number < num_points; number++) {
177  const double a = *aPtr++;
178  const double b = *bPtr++;
179  *cPtr++ = (a < b ? a : b);
180  }
181 }
182 #endif /* LV_HAVE_SSE2 */
183 
184 
185 #ifdef LV_HAVE_GENERIC
186 
187 static inline void volk_64f_x2_min_64f_generic(double* cVector,
188  const double* aVector,
189  const double* bVector,
190  unsigned int num_points)
191 {
192  double* cPtr = cVector;
193  const double* aPtr = aVector;
194  const double* bPtr = bVector;
195  unsigned int number = 0;
196 
197  for (number = 0; number < num_points; number++) {
198  const double a = *aPtr++;
199  const double b = *bPtr++;
200  *cPtr++ = (a < b ? a : b);
201  }
202 }
203 #endif /* LV_HAVE_GENERIC */
204 
205 
206 #endif /* INCLUDED_volk_64f_x2_min_64f_a_H */
207 
208 #ifndef INCLUDED_volk_64f_x2_min_64f_u_H
209 #define INCLUDED_volk_64f_x2_min_64f_u_H
210 
211 #include <inttypes.h>
212 #include <stdio.h>
213 
214 #ifdef LV_HAVE_AVX512F
215 #include <immintrin.h>
216 
217 static inline void volk_64f_x2_min_64f_u_avx512f(double* cVector,
218  const double* aVector,
219  const double* bVector,
220  unsigned int num_points)
221 {
222  unsigned int number = 0;
223  const unsigned int eigthPoints = num_points / 8;
224 
225  double* cPtr = cVector;
226  const double* aPtr = aVector;
227  const double* bPtr = bVector;
228 
229  __m512d aVal, bVal, cVal;
230  for (; number < eigthPoints; number++) {
231 
232  aVal = _mm512_loadu_pd(aPtr);
233  bVal = _mm512_loadu_pd(bPtr);
234 
235  cVal = _mm512_min_pd(aVal, bVal);
236 
237  _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
238 
239  aPtr += 8;
240  bPtr += 8;
241  cPtr += 8;
242  }
243 
244  number = eigthPoints * 8;
245  for (; number < num_points; number++) {
246  const double a = *aPtr++;
247  const double b = *bPtr++;
248  *cPtr++ = (a < b ? a : b);
249  }
250 }
251 #endif /* LV_HAVE_AVX512F */
252 
253 
254 #ifdef LV_HAVE_AVX
255 #include <immintrin.h>
256 
257 static inline void volk_64f_x2_min_64f_u_avx(double* cVector,
258  const double* aVector,
259  const double* bVector,
260  unsigned int num_points)
261 {
262  unsigned int number = 0;
263  const unsigned int quarterPoints = num_points / 4;
264 
265  double* cPtr = cVector;
266  const double* aPtr = aVector;
267  const double* bPtr = bVector;
268 
269  __m256d aVal, bVal, cVal;
270  for (; number < quarterPoints; number++) {
271 
272  aVal = _mm256_loadu_pd(aPtr);
273  bVal = _mm256_loadu_pd(bPtr);
274 
275  cVal = _mm256_min_pd(aVal, bVal);
276 
277  _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
278 
279  aPtr += 4;
280  bPtr += 4;
281  cPtr += 4;
282  }
283 
284  number = quarterPoints * 4;
285  for (; number < num_points; number++) {
286  const double a = *aPtr++;
287  const double b = *bPtr++;
288  *cPtr++ = (a < b ? a : b);
289  }
290 }
291 #endif /* LV_HAVE_AVX */
292 
293 
294 #endif /* INCLUDED_volk_64f_x2_min_64f_u_H */
FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
Definition: sse2neon.h:4705
FORCE_INLINE __m128d _mm_load_pd(const double *p)
Definition: sse2neon.h:4430
float32x4_t __m128d
Definition: sse2neon.h:242
FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:5897
static void volk_64f_x2_min_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:257
static void volk_64f_x2_min_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:148
static void volk_64f_x2_min_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:108
static void volk_64f_x2_min_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:187