Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32fc_s32f_x2_power_spectral_density_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
42 #ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
43 #define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
44 
45 #include <inttypes.h>
46 #include <math.h>
47 #include <stdio.h>
48 
49 #ifdef LV_HAVE_AVX
50 #include <immintrin.h>
51 
52 #ifdef LV_HAVE_LIB_SIMDMATH
53 #include <simdmath.h>
54 #endif /* LV_HAVE_LIB_SIMDMATH */
55 
56 static inline void
58  const lv_32fc_t* complexFFTInput,
59  const float normalizationFactor,
60  const float rbw,
61  unsigned int num_points)
62 {
63  const float* inputPtr = (const float*)complexFFTInput;
64  float* destPtr = logPowerOutput;
65  uint64_t number = 0;
66  const float iRBW = 1.0 / rbw;
67  const float iNormalizationFactor = 1.0 / normalizationFactor;
68 
69 #ifdef LV_HAVE_LIB_SIMDMATH
70  __m256 magScalar = _mm256_set1_ps(10.0);
71  magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
72 
73  __m256 invRBW = _mm256_set1_ps(iRBW);
74 
75  __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
76 
77  __m256 power;
78  __m256 input1, input2;
79  const uint64_t eighthPoints = num_points / 8;
80  for (; number < eighthPoints; number++) {
81  // Load the complex values
82  input1 = _mm256_load_ps(inputPtr);
83  inputPtr += 8;
84  input2 = _mm256_load_ps(inputPtr);
85  inputPtr += 8;
86 
87  // Apply the normalization factor
88  input1 = _mm256_mul_ps(input1, invNormalizationFactor);
89  input2 = _mm256_mul_ps(input2, invNormalizationFactor);
90 
91  // Multiply each value by itself
92  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
93  input1 = _mm256_mul_ps(input1, input1);
94  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
95  input2 = _mm256_mul_ps(input2, input2);
96 
97  // Horizontal add, to add (r*r) + (i*i) for each complex value
98  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
99  inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
100  inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
101 
102  power = _mm256_hadd_ps(inputVal1, inputVal2);
103 
104  // Divide by the rbw
105  power = _mm256_mul_ps(power, invRBW);
106 
107  // Calculate the natural log power
108  power = logf4(power);
109 
110  // Convert to log10 and multiply by 10.0
111  power = _mm256_mul_ps(power, magScalar);
112 
113  // Store the floating point results
114  _mm256_store_ps(destPtr, power);
115 
116  destPtr += 8;
117  }
118 
119  number = eighthPoints * 8;
120 #endif /* LV_HAVE_LIB_SIMDMATH */
121  // Calculate the FFT for any remaining points
122  for (; number < num_points; number++) {
123  // Calculate dBm
124  // 50 ohm load assumption
125  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
126  // 75 ohm load assumption
127  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
128 
129  const float real = *inputPtr++ * iNormalizationFactor;
130  const float imag = *inputPtr++ * iNormalizationFactor;
131 
132  *destPtr = volk_log2to10factor *
133  log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
134  destPtr++;
135  }
136 }
137 #endif /* LV_HAVE_AVX */
138 
139 #ifdef LV_HAVE_SSE3
140 #include <pmmintrin.h>
141 
142 
143 #ifdef LV_HAVE_LIB_SIMDMATH
144 #include <simdmath.h>
145 #endif /* LV_HAVE_LIB_SIMDMATH */
146 
147 static inline void
149  const lv_32fc_t* complexFFTInput,
150  const float normalizationFactor,
151  const float rbw,
152  unsigned int num_points)
153 {
154  const float* inputPtr = (const float*)complexFFTInput;
155  float* destPtr = logPowerOutput;
156  uint64_t number = 0;
157  const float iRBW = 1.0 / rbw;
158  const float iNormalizationFactor = 1.0 / normalizationFactor;
159 
160 #ifdef LV_HAVE_LIB_SIMDMATH
161  __m128 magScalar = _mm_set_ps1(10.0);
162  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
163 
164  __m128 invRBW = _mm_set_ps1(iRBW);
165 
166  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
167 
168  __m128 power;
169  __m128 input1, input2;
170  const uint64_t quarterPoints = num_points / 4;
171  for (; number < quarterPoints; number++) {
172  // Load the complex values
173  input1 = _mm_load_ps(inputPtr);
174  inputPtr += 4;
175  input2 = _mm_load_ps(inputPtr);
176  inputPtr += 4;
177 
178  // Apply the normalization factor
179  input1 = _mm_mul_ps(input1, invNormalizationFactor);
180  input2 = _mm_mul_ps(input2, invNormalizationFactor);
181 
182  // Multiply each value by itself
183  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
184  input1 = _mm_mul_ps(input1, input1);
185  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
186  input2 = _mm_mul_ps(input2, input2);
187 
188  // Horizontal add, to add (r*r) + (i*i) for each complex value
189  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
190  power = _mm_hadd_ps(input1, input2);
191 
192  // Divide by the rbw
193  power = _mm_mul_ps(power, invRBW);
194 
195  // Calculate the natural log power
196  power = logf4(power);
197 
198  // Convert to log10 and multiply by 10.0
199  power = _mm_mul_ps(power, magScalar);
200 
201  // Store the floating point results
202  _mm_store_ps(destPtr, power);
203 
204  destPtr += 4;
205  }
206 
207  number = quarterPoints * 4;
208 #endif /* LV_HAVE_LIB_SIMDMATH */
209  // Calculate the FFT for any remaining points
210  for (; number < num_points; number++) {
211  // Calculate dBm
212  // 50 ohm load assumption
213  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
214  // 75 ohm load assumption
215  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
216 
217  const float real = *inputPtr++ * iNormalizationFactor;
218  const float imag = *inputPtr++ * iNormalizationFactor;
219 
220  *destPtr = volk_log2to10factor *
221  log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
222  destPtr++;
223  }
224 }
225 #endif /* LV_HAVE_SSE3 */
226 
227 
228 #ifdef LV_HAVE_GENERIC
229 
230 static inline void
232  const lv_32fc_t* complexFFTInput,
233  const float normalizationFactor,
234  const float rbw,
235  unsigned int num_points)
236 {
237  if (rbw != 1.0)
238  volk_32fc_s32f_power_spectrum_32f(
239  logPowerOutput, complexFFTInput, normalizationFactor * sqrt(rbw), num_points);
240  else
241  volk_32fc_s32f_power_spectrum_32f(
242  logPowerOutput, complexFFTInput, normalizationFactor, num_points);
243 }
244 
245 #endif /* LV_HAVE_GENERIC */
246 
247 #endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
Definition: sse2neon.h:6527
FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1756
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_s32f_x2_power_spectral_density_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:231
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:57
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:148
#define volk_log2to10factor
Definition: volk_common.h:169
static float log2f_non_ieee(float f)
Definition: volk_common.h:159
float complex lv_32fc_t
Definition: volk_complex.h:74