You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
11 KiB
306 lines
11 KiB
/* |
|
* This file is free software: you can redistribute it and/or modify it |
|
* under the terms of the GNU General Public License as published by the |
|
* Free Software Foundation, either version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* This file is distributed in the hope that it will be useful, but |
|
* WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
* See the GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License along |
|
* with this program. If not, see <http://www.gnu.org/licenses/>. |
|
* |
|
* Code by Andy Piper and the betaflight team |
|
*/ |
|
|
|
#include <hal.h> |
|
#include "AP_HAL_ChibiOS.h" |
|
|
|
#if HAL_WITH_DSP |
|
|
|
#include <AP_HAL/AP_HAL.h> |
|
#include <AP_Math/AP_Math.h> |
|
#include <GCS_MAVLink/GCS.h> |
|
#include "DSP.h" |
|
#include <cmath> |
|
|
|
using namespace ChibiOS; |
|
|
|
#if DEBUG_FFT |
|
#define TIMER_START(timer) \ |
|
void *istate = hal.scheduler->disable_interrupts_save(); \ |
|
uint32_t timer##now = AP_HAL::micros() |
|
#define TIMER_END(timer) timer.time(timer##now); \ |
|
hal.scheduler->restore_interrupts(istate) |
|
#else |
|
#define TIMER_START(timer) |
|
#define TIMER_END(timer) |
|
#endif |
|
|
|
#define TICK_CYCLE 10 |
|
|
|
extern const AP_HAL::HAL& hal; |
|
|
|
// The algorithms originally came from betaflight but are now substantially modified based on theory and experiment. |
|
// https://holometer.fnal.gov/GH_FFT.pdf "Spectrum and spectral density estimation by the Discrete Fourier transform (DFT), |
|
// including a comprehensive list of window functions and some new flat-top windows." - Heinzel et. al is a great reference |
|
// for understanding the underlying theory although we do not use spectral density here since time resolution is equally |
|
// important as frequency resolution. Referred to as [Heinz] throughout the code. |
|
|
|
// initialize the FFT state machine |
|
AP_HAL::DSP::FFTWindowState* DSP::fft_init(uint16_t window_size, uint16_t sample_rate, uint8_t sliding_window_size) |
|
{ |
|
DSP::FFTWindowStateARM* fft = new DSP::FFTWindowStateARM(window_size, sample_rate, sliding_window_size); |
|
if (fft == nullptr || fft->_hanning_window == nullptr || fft->_rfft_data == nullptr || fft->_freq_bins == nullptr || fft->_derivative_freq_bins == nullptr) { |
|
delete fft; |
|
return nullptr; |
|
} |
|
return fft; |
|
} |
|
|
|
// start an FFT analysis |
|
void DSP::fft_start(FFTWindowState* state, FloatBuffer& samples, uint16_t advance) |
|
{ |
|
step_hanning((FFTWindowStateARM*)state, samples, advance); |
|
} |
|
|
|
// perform remaining steps of an FFT analysis |
|
uint16_t DSP::fft_analyse(AP_HAL::DSP::FFTWindowState* state, uint16_t start_bin, uint16_t end_bin, float noise_att_cutoff) |
|
{ |
|
FFTWindowStateARM* fft = (FFTWindowStateARM*)state; |
|
step_arm_cfft_f32(fft); |
|
step_bitreversal(fft); |
|
step_stage_rfft_f32(fft); |
|
step_arm_cmplx_mag_f32(fft, start_bin, end_bin, noise_att_cutoff); |
|
return step_calc_frequencies_f32(fft, start_bin, end_bin); |
|
} |
|
|
|
// create an instance of the FFT state machine |
|
DSP::FFTWindowStateARM::FFTWindowStateARM(uint16_t window_size, uint16_t sample_rate, uint8_t sliding_window_size) |
|
: AP_HAL::DSP::FFTWindowState::FFTWindowState(window_size, sample_rate, sliding_window_size) |
|
{ |
|
if (_freq_bins == nullptr || _hanning_window == nullptr || _rfft_data == nullptr || _derivative_freq_bins == nullptr) { |
|
GCS_SEND_TEXT(MAV_SEVERITY_WARNING, "Failed to allocate %u bytes for window %u for DSP", |
|
unsigned(sizeof(float) * (window_size * 3 + 2)), unsigned(window_size)); |
|
return; |
|
} |
|
|
|
// initialize the ARM data structure. |
|
// it's important not to use arm_rfft_fast_init_f32() as this links all of the twiddle tables |
|
// by being selective we save 70k in text space |
|
|
|
switch (window_size) { |
|
case 32: |
|
arm_rfft_32_fast_init_f32(&_fft_instance); |
|
break; |
|
case 64: |
|
arm_rfft_64_fast_init_f32(&_fft_instance); |
|
break; |
|
case 128: |
|
arm_rfft_128_fast_init_f32(&_fft_instance); |
|
break; |
|
case 256: |
|
arm_rfft_256_fast_init_f32(&_fft_instance); |
|
break; |
|
#if defined(STM32H7) |
|
// Don't pull in the larger FFT tables unless we have to |
|
case 512: |
|
arm_rfft_512_fast_init_f32(&_fft_instance); |
|
break; |
|
case 1024: |
|
arm_rfft_1024_fast_init_f32(&_fft_instance); |
|
break; |
|
#endif |
|
} |
|
} |
|
|
|
DSP::FFTWindowStateARM::~FFTWindowStateARM() {} |
|
|
|
extern "C" { |
|
void stage_rfft_f32(arm_rfft_fast_instance_f32 *S, float32_t *p, float32_t *pOut); |
|
void arm_cfft_radix8by2_f32(arm_cfft_instance_f32 *S, float32_t *p1); |
|
void arm_cfft_radix8by4_f32(arm_cfft_instance_f32 *S, float32_t *p1); |
|
void arm_radix8_butterfly_f32(float32_t *pSrc, uint16_t fftLen, const float32_t *pCoef, uint16_t twidCoefModifier); |
|
void arm_bitreversal_32(uint32_t *pSrc, const uint16_t bitRevLen, const uint16_t *pBitRevTable); |
|
} |
|
|
|
// step 1: filter the incoming samples through a Hanning window |
|
void DSP::step_hanning(FFTWindowStateARM* fft, FloatBuffer& samples, uint16_t advance) |
|
{ |
|
TIMER_START(_hanning_timer); |
|
|
|
// 5us |
|
// apply hanning window to gyro samples and store result in _freq_bins |
|
// hanning starts and ends with 0, could be skipped for minor speed improvement |
|
samples.peek(&fft->_freq_bins[0], fft->_window_size); // the caller ensures we get a full buffer of samples |
|
samples.advance(advance); |
|
arm_mult_f32(&fft->_freq_bins[0], &fft->_hanning_window[0], &fft->_freq_bins[0], fft->_window_size); |
|
|
|
TIMER_END(_hanning_timer); |
|
} |
|
|
|
// step 2: guts of complex fft processing |
|
void DSP::step_arm_cfft_f32(FFTWindowStateARM* fft) |
|
{ |
|
arm_cfft_instance_f32 *Sint = &(fft->_fft_instance.Sint); |
|
Sint->fftLen = fft->_fft_instance.fftLenRFFT / 2; |
|
|
|
TIMER_START(_arm_cfft_f32_timer); |
|
|
|
switch (fft->_bin_count) { |
|
case 16: // window 32 |
|
// 16us (BF) |
|
// 5us F7, 7us F4, 8us H7 |
|
case 128: // window 256 |
|
// 37us F7, 81us F4, 17us H7 |
|
arm_cfft_radix8by2_f32(Sint, fft->_freq_bins); |
|
break; |
|
case 32: // window 64 |
|
// 35us (BF) |
|
// 10us F7, 24us F4 |
|
case 256: // window 512 |
|
// 66us F7, 174us F4, 37us H7 |
|
arm_cfft_radix8by4_f32(Sint, fft->_freq_bins); |
|
break; |
|
case 64: // window 128 |
|
// 70us BF |
|
// 21us F7, 34us F4 |
|
case 512: // window 1024 |
|
// 152us F7, 73us H7 |
|
arm_radix8_butterfly_f32(fft->_freq_bins, fft->_bin_count, Sint->pTwiddle, 1); |
|
break; |
|
} |
|
|
|
TIMER_END(_arm_cfft_f32_timer); |
|
} |
|
|
|
// step 3: reverse the bits of the output |
|
void DSP::step_bitreversal(FFTWindowStateARM* fft) |
|
{ |
|
TIMER_START(_bitreversal_timer); |
|
// 6us (BF) |
|
// 32 - 2us F7, 3us F4, 1us H7 |
|
// 64 - 3us F7, 6us F4 |
|
// 128 - 4us F7, 9us F4 |
|
// 256 - 10us F7, 20us F4, 5us H7 |
|
// 512 - 22us F7, 54us F4, 15us H7 |
|
// 1024 - 42us F7, 15us H7 |
|
arm_bitreversal_32((uint32_t *)fft->_freq_bins, fft->_fft_instance.Sint.bitRevLength, fft->_fft_instance.Sint.pBitRevTable); |
|
|
|
TIMER_END(_bitreversal_timer); |
|
} |
|
|
|
// step 4: convert from complex to real data |
|
void DSP::step_stage_rfft_f32(FFTWindowStateARM* fft) |
|
{ |
|
TIMER_START(_stage_rfft_f32_timer); |
|
// 14us (BF) |
|
// 32 - 2us F7, 5us F4, 2us H7 |
|
// 64 - 5us F7, 16us F4 |
|
// 128 - 17us F7, 26us F4 |
|
// 256 - 21us F7, 70us F4, 9us H7 |
|
// 512 - 35us F7, 71us F4, 17us H7 |
|
// 1024 - 76us F7, 33us H7 |
|
// this does not work in place => _freq_bins AND _rfft_data needed |
|
stage_rfft_f32(&fft->_fft_instance, fft->_freq_bins, fft->_rfft_data); |
|
|
|
TIMER_END(_stage_rfft_f32_timer); |
|
} |
|
|
|
// step 5: find the magnitudes of the complex data |
|
void DSP::step_arm_cmplx_mag_f32(FFTWindowStateARM* fft, uint16_t start_bin, uint16_t end_bin, float noise_att_cutoff) |
|
{ |
|
TIMER_START(_arm_cmplx_mag_f32_timer); |
|
|
|
// 8us (BF) |
|
// 32 - 4us F7, 5us F4, 5us H7 |
|
// 64 - 7us F7, 13us F4 |
|
// 128 - 14us F7, 17us F4 |
|
// 256 - 29us F7, 28us F4, 7us H7 |
|
// 512 - 55us F7, 93us F4, 13us H7 |
|
// 1024 - 131us F7, 25us H7 |
|
// General case for the magnitudes - see https://stackoverflow.com/questions/42299932/dsp-libraries-rfft-strange-results |
|
// The frequency of each of those frequency components are given by k*fs/N |
|
|
|
arm_cmplx_mag_squared_f32(&fft->_rfft_data[2], &fft->_freq_bins[1], fft->_bin_count - 1); |
|
fft->_freq_bins[0] = sq(fft->_rfft_data[0]); // DC |
|
fft->_freq_bins[fft->_bin_count] = sq(fft->_rfft_data[1]); // Nyquist |
|
fft->_rfft_data[fft->_window_size] = fft->_rfft_data[1]; // Nyquist for the interpolator |
|
fft->_rfft_data[fft->_window_size + 1] = 0; |
|
|
|
step_cmplx_mag(fft, start_bin, end_bin, noise_att_cutoff); |
|
|
|
TIMER_END(_arm_cmplx_mag_f32_timer); |
|
} |
|
|
|
// step 6: find the bin with the highest energy and interpolate the required frequency |
|
uint16_t DSP::step_calc_frequencies_f32(FFTWindowStateARM* fft, uint16_t start_bin, uint16_t end_bin) |
|
{ |
|
TIMER_START(_step_calc_frequencies); |
|
// 4us H7 |
|
|
|
step_calc_frequencies(fft, start_bin, end_bin); |
|
|
|
TIMER_END(_step_calc_frequencies); |
|
|
|
#if DEBUG_FFT |
|
_output_count++; |
|
// outputs at approx 1hz |
|
if (_output_count % 400 == 0) { |
|
GCS_SEND_TEXT(MAV_SEVERITY_WARNING, "FFT(us): t1:%lu,t2:%lu,t3:%lu,t4:%lu,t5:%lu,t6:%lu", |
|
_hanning_timer._timer_avg, _arm_cfft_f32_timer._timer_avg, _bitreversal_timer._timer_avg, _stage_rfft_f32_timer._timer_avg, _arm_cmplx_mag_f32_timer._timer_avg, _step_calc_frequencies._timer_avg); |
|
} |
|
#endif |
|
|
|
return fft->_peak_data[CENTER]._bin; |
|
} |
|
|
|
static const float PI_N = M_PI / 32.0f; |
|
static const float CANDAN_FACTOR = tanf(PI_N) / PI_N; |
|
|
|
// Interpolate center frequency using http://users.metu.edu.tr/ccandan//pub_dir/FineDopplerEst_IEEE_SPL_June2011.pdf |
|
// This is slightly less accurate than Quinn, but much cheaper to calculate |
|
float DSP::calculate_candans_estimator(const FFTWindowStateARM* fft, uint16_t k_max) const |
|
{ |
|
if (k_max <= 1 || k_max == fft->_bin_count) { |
|
return 0.0f; |
|
} |
|
|
|
const uint16_t k_m1 = (k_max - 1) * 2; |
|
const uint16_t k_p1 = (k_max + 1) * 2; |
|
const uint16_t k = k_max * 2; |
|
|
|
const float npr = fft->_rfft_data[k_m1] - fft->_rfft_data[k_p1]; |
|
const float npc = fft->_rfft_data[k_m1 + 1] - fft->_rfft_data[k_p1 + 1]; |
|
const float dpr = 2.0f * fft->_rfft_data[k] - fft->_rfft_data[k_m1] - fft->_rfft_data[k_p1]; |
|
const float dpc = 2.0f * fft->_rfft_data[k + 1] - fft->_rfft_data[k_m1 + 1] - fft->_rfft_data[k_p1 + 1]; |
|
|
|
const float realn = npr * dpr + npc * dpc; |
|
const float reald = dpr * dpr + dpc * dpc; |
|
|
|
// sanity check |
|
if (is_zero(reald)) { |
|
return 0.0f; |
|
} |
|
|
|
float d = CANDAN_FACTOR * (realn / reald); |
|
|
|
// -0.5 < d < 0.5 which is the fraction of the sample spacing about the center element |
|
return constrain_float(d, -0.5f, 0.5f); |
|
} |
|
|
|
#if DEBUG_FFT |
|
void DSP::StepTimer::time(uint32_t start) |
|
{ |
|
_timer_total += (AP_HAL::micros() - start); |
|
_time_ticks = (_time_ticks + 1) % TICK_CYCLE; |
|
if (_time_ticks == 0) { |
|
_timer_avg = _timer_total / TICK_CYCLE; |
|
_timer_total = 0; |
|
} |
|
} |
|
#endif |
|
|
|
#endif
|
|
|