56 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
57 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
63 #include <xmmintrin.h>
68 unsigned int num_points)
70 unsigned int number = 0;
71 const unsigned int quarterPoints = num_points / 4;
73 float* cPtr = cVector;
74 const float* aPtr = aVector;
78 for (; number < quarterPoints; number++) {
89 number = quarterPoints * 4;
90 for (; number < num_points; number++) {
91 *cPtr++ = (*aPtr++) * scalar;
97 #include <immintrin.h>
100 const float* aVector,
102 unsigned int num_points)
104 unsigned int number = 0;
105 const unsigned int eighthPoints = num_points / 8;
107 float* cPtr = cVector;
108 const float* aPtr = aVector;
110 __m256 aVal, bVal, cVal;
111 bVal = _mm256_set1_ps(scalar);
112 for (; number < eighthPoints; number++) {
114 aVal = _mm256_loadu_ps(aPtr);
116 cVal = _mm256_mul_ps(aVal, bVal);
118 _mm256_storeu_ps(cPtr, cVal);
124 number = eighthPoints * 8;
125 for (; number < num_points; number++) {
126 *cPtr++ = (*aPtr++) * scalar;
131 #ifdef LV_HAVE_GENERIC
134 const float* aVector,
136 unsigned int num_points)
138 unsigned int number = 0;
139 const float* inputPtr = aVector;
140 float* outputPtr = cVector;
141 for (number = 0; number < num_points; number++) {
142 *outputPtr = (*inputPtr) * scalar;
152 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
153 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
155 #include <inttypes.h>
159 #include <xmmintrin.h>
162 const float* aVector,
164 unsigned int num_points)
166 unsigned int number = 0;
167 const unsigned int quarterPoints = num_points / 4;
169 float* cPtr = cVector;
170 const float* aPtr = aVector;
174 for (; number < quarterPoints; number++) {
185 number = quarterPoints * 4;
186 for (; number < num_points; number++) {
187 *cPtr++ = (*aPtr++) * scalar;
193 #include <immintrin.h>
196 const float* aVector,
198 unsigned int num_points)
200 unsigned int number = 0;
201 const unsigned int eighthPoints = num_points / 8;
203 float* cPtr = cVector;
204 const float* aPtr = aVector;
206 __m256 aVal, bVal, cVal;
207 bVal = _mm256_set1_ps(scalar);
208 for (; number < eighthPoints; number++) {
209 aVal = _mm256_load_ps(aPtr);
211 cVal = _mm256_mul_ps(aVal, bVal);
213 _mm256_store_ps(cPtr, cVal);
219 number = eighthPoints * 8;
220 for (; number < num_points; number++) {
221 *cPtr++ = (*aPtr++) * scalar;
227 #include <arm_neon.h>
230 const float* aVector,
232 unsigned int num_points)
234 unsigned int number = 0;
235 const float* inputPtr = aVector;
236 float* outputPtr = cVector;
237 const unsigned int quarterPoints = num_points / 4;
239 float32x4_t aVal, cVal;
241 for (number = 0; number < quarterPoints; number++) {
242 aVal = vld1q_f32(inputPtr);
243 cVal = vmulq_n_f32(aVal, scalar);
244 vst1q_f32(outputPtr, cVal);
248 for (number = quarterPoints * 4; number < num_points; number++) {
249 *outputPtr++ = (*inputPtr++) * scalar;
255 #ifdef LV_HAVE_GENERIC
258 const float* aVector,
260 unsigned int num_points)
262 unsigned int number = 0;
263 const float* inputPtr = aVector;
264 float* outputPtr = cVector;
265 for (number = 0; number < num_points; number++) {
266 *outputPtr = (*inputPtr) * scalar;
276 extern void volk_32f_s32f_multiply_32f_a_orc_impl(
float* dst,
279 unsigned int num_points);
281 static inline void volk_32f_s32f_multiply_32f_u_orc(
float* cVector,
282 const float* aVector,
284 unsigned int num_points)
286 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2787
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32f_s32f_multiply_32f_a_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:195
static void volk_32f_s32f_multiply_32f_a_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:257
static void volk_32f_s32f_multiply_32f_u_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:65
static void volk_32f_s32f_multiply_32f_u_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:99
static void volk_32f_s32f_multiply_32f_a_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:161
static void volk_32f_s32f_multiply_32f_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:133
static void volk_32f_s32f_multiply_32f_u_neon(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:229