Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
40 #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
41 #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
42 
43 #include <inttypes.h>
44 #include <math.h>
45 #include <stdio.h>
46 
47 #ifdef LV_HAVE_GENERIC
48 
49 static inline void
51  const lv_32fc_t* complexFFTInput,
52  const float normalizationFactor,
53  unsigned int num_points)
54 {
55  // Calculate the Power of the complex point
56  const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
57 
58  // Calculate dBm
59  // 50 ohm load assumption
60  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
61  // 75 ohm load assumption
62  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
63 
64  /*
65  * For generic reference, the code below is a volk-optimized
66  * approach that also leverages a faster log2 calculation
67  * to calculate the log10:
68  * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
69  *
70  * Generic code:
71  *
72  * const float real = *inputPtr++ * iNormalizationFactor;
73  * const float imag = *inputPtr++ * iNormalizationFactor;
74  * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
75  * realFFTDataPointsPtr++;
76  *
77  */
78 
79  // Calc mag^2
80  volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
81 
82  // Finish ((real * real) + (imag * imag)) calculation:
83  volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
84 
85  // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
86  // * log2(x)
87  volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
88  volk_32f_s32f_multiply_32f(
89  logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
90 }
91 #endif /* LV_HAVE_GENERIC */
92 
93 #ifdef LV_HAVE_SSE3
94 #include <pmmintrin.h>
95 
96 #ifdef LV_HAVE_LIB_SIMDMATH
97 #include <simdmath.h>
98 #endif /* LV_HAVE_LIB_SIMDMATH */
99 
100 static inline void
102  const lv_32fc_t* complexFFTInput,
103  const float normalizationFactor,
104  unsigned int num_points)
105 {
106  const float* inputPtr = (const float*)complexFFTInput;
107  float* destPtr = logPowerOutput;
108  uint64_t number = 0;
109  const float iNormalizationFactor = 1.0 / normalizationFactor;
110 #ifdef LV_HAVE_LIB_SIMDMATH
111  __m128 magScalar = _mm_set_ps1(10.0);
112  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
113 
114  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
115 
116  __m128 power;
117  __m128 input1, input2;
118  const uint64_t quarterPoints = num_points / 4;
119  for (; number < quarterPoints; number++) {
120  // Load the complex values
121  input1 = _mm_load_ps(inputPtr);
122  inputPtr += 4;
123  input2 = _mm_load_ps(inputPtr);
124  inputPtr += 4;
125 
126  // Apply the normalization factor
127  input1 = _mm_mul_ps(input1, invNormalizationFactor);
128  input2 = _mm_mul_ps(input2, invNormalizationFactor);
129 
130  // Multiply each value by itself
131  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
132  input1 = _mm_mul_ps(input1, input1);
133  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
134  input2 = _mm_mul_ps(input2, input2);
135 
136  // Horizontal add, to add (r*r) + (i*i) for each complex value
137  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
138  power = _mm_hadd_ps(input1, input2);
139 
140  // Calculate the natural log power
141  power = logf4(power);
142 
143  // Convert to log10 and multiply by 10.0
144  power = _mm_mul_ps(power, magScalar);
145 
146  // Store the floating point results
147  _mm_store_ps(destPtr, power);
148 
149  destPtr += 4;
150  }
151 
152  number = quarterPoints * 4;
153 #endif /* LV_HAVE_LIB_SIMDMATH */
154  // Calculate the FFT for any remaining points
155 
156  for (; number < num_points; number++) {
157  // Calculate dBm
158  // 50 ohm load assumption
159  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
160  // 75 ohm load assumption
161  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
162 
163  const float real = *inputPtr++ * iNormalizationFactor;
164  const float imag = *inputPtr++ * iNormalizationFactor;
165 
166  *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
167 
168  destPtr++;
169  }
170 }
171 #endif /* LV_HAVE_SSE3 */
172 
173 #ifdef LV_HAVE_NEON
174 #include <arm_neon.h>
176 
177 static inline void
179  const lv_32fc_t* complexFFTInput,
180  const float normalizationFactor,
181  unsigned int num_points)
182 {
183  float* logPowerOutputPtr = logPowerOutput;
184  const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
185  const float iNormalizationFactor = 1.0 / normalizationFactor;
186  unsigned int number;
187  unsigned int quarter_points = num_points / 4;
188  float32x4x2_t fft_vec;
189  float32x4_t log_pwr_vec;
190  float32x4_t mag_squared_vec;
191 
192  const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
193 
194  for (number = 0; number < quarter_points; number++) {
195  // Load
196  fft_vec = vld2q_f32((float*)complexFFTInputPtr);
197  // Prefetch next 4
198  __VOLK_PREFETCH(complexFFTInputPtr + 4);
199  // Normalize
200  fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
201  fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
202  mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
203  log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
204  // Store
205  vst1q_f32(logPowerOutputPtr, log_pwr_vec);
206  // Move pointers ahead
207  complexFFTInputPtr += 4;
208  logPowerOutputPtr += 4;
209  }
210 
211  // deal with the rest
212  for (number = quarter_points * 4; number < num_points; number++) {
213  const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
214  const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
215 
216  *logPowerOutputPtr =
217  volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
218  complexFFTInputPtr++;
219  logPowerOutputPtr++;
220  }
221 }
222 
223 #endif /* LV_HAVE_NEON */
224 
225 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
Definition: sse2neon.h:6527
FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1756
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_32fc_s32f_power_spectrum_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:101
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:50
static void volk_32fc_s32f_power_spectrum_32f_neon(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:178
#define volk_log2to10factor
Definition: volk_common.h:169
static float log2f_non_ieee(float f)
Definition: volk_common.h:159
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:71
#define lv_cimag(x)
Definition: volk_complex.h:98
#define lv_creal(x)
Definition: volk_complex.h:96
float complex lv_32fc_t
Definition: volk_complex.h:74
static float32x4_t _vlogq_f32(float32x4_t x)
Definition: volk_neon_intrinsics.h:143
static float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
Definition: volk_neon_intrinsics.h:73