58 #ifndef INCLUDED_volk_64f_x2_max_64f_a_H
59 #define INCLUDED_volk_64f_x2_max_64f_a_H
64 #ifdef LV_HAVE_AVX512F
65 #include <immintrin.h>
67 static inline void volk_64f_x2_max_64f_a_avx512f(
double* cVector,
68 const double* aVector,
69 const double* bVector,
70 unsigned int num_points)
72 unsigned int number = 0;
73 const unsigned int eigthPoints = num_points / 8;
75 double* cPtr = cVector;
76 const double* aPtr = aVector;
77 const double* bPtr = bVector;
79 __m512d aVal, bVal, cVal;
80 for (; number < eigthPoints; number++) {
82 aVal = _mm512_load_pd(aPtr);
83 bVal = _mm512_load_pd(bPtr);
85 cVal = _mm512_max_pd(aVal, bVal);
87 _mm512_store_pd(cPtr, cVal);
94 number = eigthPoints * 8;
95 for (; number < num_points; number++) {
96 const double a = *aPtr++;
97 const double b = *bPtr++;
98 *cPtr++ = (a > b ? a : b);
105 #include <immintrin.h>
108 const double* aVector,
109 const double* bVector,
110 unsigned int num_points)
112 unsigned int number = 0;
113 const unsigned int quarterPoints = num_points / 4;
115 double* cPtr = cVector;
116 const double* aPtr = aVector;
117 const double* bPtr = bVector;
119 __m256d aVal, bVal, cVal;
120 for (; number < quarterPoints; number++) {
122 aVal = _mm256_load_pd(aPtr);
123 bVal = _mm256_load_pd(bPtr);
125 cVal = _mm256_max_pd(aVal, bVal);
127 _mm256_store_pd(cPtr, cVal);
134 number = quarterPoints * 4;
135 for (; number < num_points; number++) {
136 const double a = *aPtr++;
137 const double b = *bPtr++;
138 *cPtr++ = (a > b ? a : b);
145 #include <emmintrin.h>
148 const double* aVector,
149 const double* bVector,
150 unsigned int num_points)
152 unsigned int number = 0;
153 const unsigned int halfPoints = num_points / 2;
155 double* cPtr = cVector;
156 const double* aPtr = aVector;
157 const double* bPtr = bVector;
160 for (; number < halfPoints; number++) {
174 number = halfPoints * 2;
175 for (; number < num_points; number++) {
176 const double a = *aPtr++;
177 const double b = *bPtr++;
178 *cPtr++ = (a > b ? a : b);
184 #ifdef LV_HAVE_GENERIC
187 const double* aVector,
188 const double* bVector,
189 unsigned int num_points)
191 double* cPtr = cVector;
192 const double* aPtr = aVector;
193 const double* bPtr = bVector;
194 unsigned int number = 0;
196 for (number = 0; number < num_points; number++) {
197 const double a = *aPtr++;
198 const double b = *bPtr++;
199 *cPtr++ = (a > b ? a : b);
208 #ifndef INCLUDED_volk_64f_x2_max_64f_u_H
209 #define INCLUDED_volk_64f_x2_max_64f_u_H
211 #include <inttypes.h>
214 #ifdef LV_HAVE_AVX512F
215 #include <immintrin.h>
217 static inline void volk_64f_x2_max_64f_u_avx512f(
double* cVector,
218 const double* aVector,
219 const double* bVector,
220 unsigned int num_points)
222 unsigned int number = 0;
223 const unsigned int eigthPoints = num_points / 8;
225 double* cPtr = cVector;
226 const double* aPtr = aVector;
227 const double* bPtr = bVector;
229 __m512d aVal, bVal, cVal;
230 for (; number < eigthPoints; number++) {
232 aVal = _mm512_loadu_pd(aPtr);
233 bVal = _mm512_loadu_pd(bPtr);
235 cVal = _mm512_max_pd(aVal, bVal);
237 _mm512_storeu_pd(cPtr, cVal);
244 number = eigthPoints * 8;
245 for (; number < num_points; number++) {
246 const double a = *aPtr++;
247 const double b = *bPtr++;
248 *cPtr++ = (a > b ? a : b);
255 #include <immintrin.h>
258 const double* aVector,
259 const double* bVector,
260 unsigned int num_points)
262 unsigned int number = 0;
263 const unsigned int quarterPoints = num_points / 4;
265 double* cPtr = cVector;
266 const double* aPtr = aVector;
267 const double* bPtr = bVector;
269 __m256d aVal, bVal, cVal;
270 for (; number < quarterPoints; number++) {
272 aVal = _mm256_loadu_pd(aPtr);
273 bVal = _mm256_loadu_pd(bPtr);
275 cVal = _mm256_max_pd(aVal, bVal);
277 _mm256_storeu_pd(cPtr, cVal);
284 number = quarterPoints * 4;
285 for (; number < num_points; number++) {
286 const double a = *aPtr++;
287 const double b = *bPtr++;
288 *cPtr++ = (a > b ? a : b);
FORCE_INLINE __m128d _mm_load_pd(const double *p)
Definition: sse2neon.h:4430
float32x4_t __m128d
Definition: sse2neon.h:242
FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b)
Definition: sse2neon.h:4644
FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:5897
static void volk_64f_x2_max_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:107
static void volk_64f_x2_max_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:257
static void volk_64f_x2_max_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:147
static void volk_64f_x2_max_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:186