42 #ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
43 #define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
50 #include <immintrin.h>
52 #ifdef LV_HAVE_LIB_SIMDMATH
59 const float normalizationFactor,
61 unsigned int num_points)
63 const float* inputPtr = (
const float*)complexFFTInput;
64 float* destPtr = logPowerOutput;
66 const float iRBW = 1.0 / rbw;
67 const float iNormalizationFactor = 1.0 / normalizationFactor;
69 #ifdef LV_HAVE_LIB_SIMDMATH
70 __m256 magScalar = _mm256_set1_ps(10.0);
71 magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
73 __m256 invRBW = _mm256_set1_ps(iRBW);
75 __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
78 __m256 input1, input2;
79 const uint64_t eighthPoints = num_points / 8;
80 for (; number < eighthPoints; number++) {
82 input1 = _mm256_load_ps(inputPtr);
84 input2 = _mm256_load_ps(inputPtr);
88 input1 = _mm256_mul_ps(input1, invNormalizationFactor);
89 input2 = _mm256_mul_ps(input2, invNormalizationFactor);
93 input1 = _mm256_mul_ps(input1, input1);
95 input2 = _mm256_mul_ps(input2, input2);
99 inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
100 inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
102 power = _mm256_hadd_ps(inputVal1, inputVal2);
105 power = _mm256_mul_ps(power, invRBW);
108 power = logf4(power);
111 power = _mm256_mul_ps(power, magScalar);
114 _mm256_store_ps(destPtr, power);
119 number = eighthPoints * 8;
122 for (; number < num_points; number++) {
129 const float real = *inputPtr++ * iNormalizationFactor;
130 const float imag = *inputPtr++ * iNormalizationFactor;
140 #include <pmmintrin.h>
143 #ifdef LV_HAVE_LIB_SIMDMATH
144 #include <simdmath.h>
150 const float normalizationFactor,
152 unsigned int num_points)
154 const float* inputPtr = (
const float*)complexFFTInput;
155 float* destPtr = logPowerOutput;
157 const float iRBW = 1.0 / rbw;
158 const float iNormalizationFactor = 1.0 / normalizationFactor;
160 #ifdef LV_HAVE_LIB_SIMDMATH
162 magScalar =
_mm_div_ps(magScalar, logf4(magScalar));
170 const uint64_t quarterPoints = num_points / 4;
171 for (; number < quarterPoints; number++) {
179 input1 =
_mm_mul_ps(input1, invNormalizationFactor);
180 input2 =
_mm_mul_ps(input2, invNormalizationFactor);
196 power = logf4(power);
207 number = quarterPoints * 4;
210 for (; number < num_points; number++) {
217 const float real = *inputPtr++ * iNormalizationFactor;
218 const float imag = *inputPtr++ * iNormalizationFactor;
228 #ifdef LV_HAVE_GENERIC
233 const float normalizationFactor,
235 unsigned int num_points)
238 volk_32fc_s32f_power_spectrum_32f(
239 logPowerOutput, complexFFTInput, normalizationFactor * sqrt(rbw), num_points);
241 volk_32fc_s32f_power_spectrum_32f(
242 logPowerOutput, complexFFTInput, normalizationFactor, num_points);
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
Definition: sse2neon.h:6527
FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1756
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_s32f_x2_power_spectral_density_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:231
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:57
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:148
#define volk_log2to10factor
Definition: volk_common.h:169
static float log2f_non_ieee(float f)
Definition: volk_common.h:159
float complex lv_32fc_t
Definition: volk_complex.h:74