44 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
45 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
51 #include <immintrin.h>
54 const float* inputVector,
57 unsigned int num_points)
62 unsigned int number = 1;
66 const unsigned int eighthPoints = (num_points - 1) / 8;
68 float* outPtr = outputVector;
69 const float* inPtr = inputVector;
70 __m256 upperBound = _mm256_set1_ps(bound);
71 __m256 lowerBound = _mm256_set1_ps(-bound);
75 __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound);
76 __m256 negBoundAdjust = _mm256_set1_ps(2 * bound);
78 *outPtr = *inPtr - *saveValue;
85 for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
86 *outPtr = *(inPtr) - *(inPtr - 1);
95 for (; number < eighthPoints; number++) {
97 next3old1 = _mm256_loadu_ps((
float*)(inPtr - 1));
98 next4 = _mm256_load_ps(inPtr);
101 next3old1 = _mm256_sub_ps(next4, next3old1);
103 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
104 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
105 next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
106 next4 = _mm256_and_ps(next4, negBoundAdjust);
107 boundAdjust = _mm256_or_ps(next4, boundAdjust);
109 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
110 _mm256_store_ps(outPtr, next3old1);
114 for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
116 *outPtr = *(inPtr) - *(inPtr - 1);
118 *outPtr -= 2 * bound;
119 if (*outPtr < -bound)
120 *outPtr += 2 * bound;
125 *saveValue = inputVector[num_points - 1];
131 #include <xmmintrin.h>
134 const float* inputVector,
137 unsigned int num_points)
139 if (num_points < 1) {
142 unsigned int number = 1;
146 const unsigned int quarterPoints = (num_points - 1) / 4;
148 float* outPtr = outputVector;
149 const float* inPtr = inputVector;
158 *outPtr = *inPtr - *saveValue;
160 *outPtr -= 2 * bound;
161 if (*outPtr < -bound)
162 *outPtr += 2 * bound;
165 for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) {
166 *outPtr = *(inPtr) - *(inPtr - 1);
168 *outPtr -= 2 * bound;
169 if (*outPtr < -bound)
170 *outPtr += 2 * bound;
175 for (; number < quarterPoints; number++) {
184 boundAdjust =
_mm_and_ps(boundAdjust, posBoundAdjust);
187 boundAdjust =
_mm_or_ps(next4, boundAdjust);
189 next3old1 =
_mm_add_ps(next3old1, boundAdjust);
194 for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints));
197 *outPtr = *(inPtr) - *(inPtr - 1);
199 *outPtr -= 2 * bound;
200 if (*outPtr < -bound)
201 *outPtr += 2 * bound;
206 *saveValue = inputVector[num_points - 1];
210 #ifdef LV_HAVE_GENERIC
213 const float* inputVector,
216 unsigned int num_points)
218 if (num_points < 1) {
221 unsigned int number = 0;
222 float* outPtr = outputVector;
223 const float* inPtr = inputVector;
226 *outPtr = *inPtr - *saveValue;
228 *outPtr -= 2 * bound;
229 if (*outPtr < -bound)
230 *outPtr += 2 * bound;
234 for (number = 1; number < num_points; number++) {
235 *outPtr = *(inPtr) - *(inPtr - 1);
237 *outPtr -= 2 * bound;
238 if (*outPtr < -bound)
239 *outPtr += 2 * bound;
244 *saveValue = inputVector[num_points - 1];
252 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
253 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
255 #include <inttypes.h>
259 #include <immintrin.h>
262 const float* inputVector,
265 unsigned int num_points)
267 if (num_points < 1) {
270 unsigned int number = 1;
274 const unsigned int eighthPoints = (num_points - 1) / 8;
276 float* outPtr = outputVector;
277 const float* inPtr = inputVector;
278 __m256 upperBound = _mm256_set1_ps(bound);
279 __m256 lowerBound = _mm256_set1_ps(-bound);
283 __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound);
284 __m256 negBoundAdjust = _mm256_set1_ps(2 * bound);
286 *outPtr = *inPtr - *saveValue;
288 *outPtr -= 2 * bound;
289 if (*outPtr < -bound)
290 *outPtr += 2 * bound;
293 for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
294 *outPtr = *(inPtr) - *(inPtr - 1);
296 *outPtr -= 2 * bound;
297 if (*outPtr < -bound)
298 *outPtr += 2 * bound;
303 for (; number < eighthPoints; number++) {
305 next3old1 = _mm256_loadu_ps((
float*)(inPtr - 1));
306 next4 = _mm256_loadu_ps(inPtr);
309 next3old1 = _mm256_sub_ps(next4, next3old1);
311 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
312 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
313 next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
314 next4 = _mm256_and_ps(next4, negBoundAdjust);
315 boundAdjust = _mm256_or_ps(next4, boundAdjust);
317 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
318 _mm256_storeu_ps(outPtr, next3old1);
322 for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
324 *outPtr = *(inPtr) - *(inPtr - 1);
326 *outPtr -= 2 * bound;
327 if (*outPtr < -bound)
328 *outPtr += 2 * bound;
333 *saveValue = inputVector[num_points - 1];
FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2834
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1154
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1064
FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1039
FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1190
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_or_ps(__m128, __m128)
Definition: sse2neon.h:2237
static void volk_32f_s32f_32f_fm_detect_32f_a_avx(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:53
static void volk_32f_s32f_32f_fm_detect_32f_u_avx(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:261
static void volk_32f_s32f_32f_fm_detect_32f_a_sse(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:133
static void volk_32f_s32f_32f_fm_detect_32f_generic(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:212