From 96c654499796d0cd3fc6d5121f69eff91db8de7b Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 21 Sep 2019 10:57:40 +1000 Subject: [PATCH] AP_NavEKF3: moved intermediate variables to common memory this moves intermediate variables from being per-core to being common between cores. This saves memory on systems with more than one core by avoiding allocating this memory on every core. This is an alternative to #11717 which moves memory onto the stack. It doesn't save as much memory as #11717, but avoids creating large stack frames --- libraries/AP_NavEKF2/AP_NavEKF2.cpp | 2 +- libraries/AP_NavEKF3/AP_NavEKF3.cpp | 17 +++++++++-- libraries/AP_NavEKF3/AP_NavEKF3.h | 7 ++++- libraries/AP_NavEKF3/AP_NavEKF3_core.cpp | 38 ++++++++++++++++-------- libraries/AP_NavEKF3/AP_NavEKF3_core.h | 32 +++++++++++++++----- 5 files changed, 71 insertions(+), 25 deletions(-) diff --git a/libraries/AP_NavEKF2/AP_NavEKF2.cpp b/libraries/AP_NavEKF2/AP_NavEKF2.cpp index 76e339bade..b754941008 100644 --- a/libraries/AP_NavEKF2/AP_NavEKF2.cpp +++ b/libraries/AP_NavEKF2/AP_NavEKF2.cpp @@ -662,7 +662,7 @@ bool NavEKF2::InitialiseFilter(void) } // allocate common intermediate variable space - core_common = (struct CoreCommon *)hal.util->malloc_type(NavEKF2_core::get_core_common_size(), AP_HAL::Util::MEM_FAST); + core_common = (void *)hal.util->malloc_type(NavEKF2_core::get_core_common_size(), AP_HAL::Util::MEM_FAST); if (core_common == nullptr) { _enable.set(0); hal.util->free_type(core, sizeof(NavEKF2_core)*num_cores, AP_HAL::Util::MEM_FAST); diff --git a/libraries/AP_NavEKF3/AP_NavEKF3.cpp b/libraries/AP_NavEKF3/AP_NavEKF3.cpp index b9c0410deb..0f15a51b55 100644 --- a/libraries/AP_NavEKF3/AP_NavEKF3.cpp +++ b/libraries/AP_NavEKF3/AP_NavEKF3.cpp @@ -682,9 +682,20 @@ bool NavEKF3::InitialiseFilter(void) gcs().send_text(MAV_SEVERITY_CRITICAL, "NavEKF3: allocation failed"); return false; } + + // allocate common intermediate variable space + core_common = (void *)hal.util->malloc_type(NavEKF3_core::get_core_common_size(), AP_HAL::Util::MEM_FAST); + if (core_common == nullptr) { + _enable.set(0); + hal.util->free_type(core, sizeof(NavEKF3_core)*num_cores, AP_HAL::Util::MEM_FAST); + core = nullptr; + gcs().send_text(MAV_SEVERITY_CRITICAL, "NavEKF3: common allocation failed"); + return false; + } + + // Call constructors on all cores for (uint8_t i = 0; i < num_cores; i++) { - //Call Constructors - new (&core[i]) NavEKF3_core(); + new (&core[i]) NavEKF3_core(this); } } @@ -694,7 +705,7 @@ bool NavEKF3::InitialiseFilter(void) bool core_setup_success = true; for (uint8_t core_index=0; core_indexperf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_UpdateFilter")), _perf_CovariancePrediction(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_CovariancePrediction")), _perf_FuseVelPosNED(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseVelPosNED")), @@ -19,7 +19,14 @@ NavEKF3_core::NavEKF3_core(void) : _perf_FuseSideslip(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseSideslip")), _perf_TerrainOffset(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_TerrainOffset")), _perf_FuseOptFlow(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseOptFlow")), - _perf_FuseBodyOdom(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseBodyOdom")) + _perf_FuseBodyOdom(hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_FuseBodyOdom")), + frontend(_frontend), + // setup the intermediate variables shared by all cores (to save memory) + common((struct core_common *)_frontend->core_common), + Kfusion(common->Kfusion), + KH(common->KH), + KHP(common->KHP), + nextP(common->nextP) { _perf_test[0] = hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_Test0"); _perf_test[1] = hal.util->perf_alloc(AP_HAL::Util::PC_ELAPSED, "EK3_Test1"); @@ -36,9 +43,8 @@ NavEKF3_core::NavEKF3_core(void) : } // setup this core backend -bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _core_index) +bool NavEKF3_core::setup_core(uint8_t _imu_index, uint8_t _core_index) { - frontend = _frontend; imu_index = _imu_index; gyro_index_active = imu_index; accel_index_active = imu_index; @@ -59,15 +65,15 @@ bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _c } // find the maximum time delay for all potential sensors - uint16_t maxTimeDelay_ms = MAX(_frontend->_hgtDelay_ms , - MAX(_frontend->_flowDelay_ms , - MAX(_frontend->_rngBcnDelay_ms , - MAX(_frontend->magDelay_ms , + uint16_t maxTimeDelay_ms = MAX(frontend->_hgtDelay_ms , + MAX(frontend->_flowDelay_ms , + MAX(frontend->_rngBcnDelay_ms , + MAX(frontend->magDelay_ms , (uint16_t)(EKF_TARGET_DT_MS) )))); // GPS sensing can have large delays and should not be included if disabled - if (_frontend->_fusionModeGPS != 3) { + if (frontend->_fusionModeGPS != 3) { // Wait for the configuration of all GPS units to be confirmed. Until this has occurred the GPS driver cannot provide a correct time delay float gps_delay_sec = 0; if (!AP::gps().get_lag(gps_delay_sec)) { @@ -90,7 +96,7 @@ bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _c // airspeed sensing can have large delays and should not be included if disabled if (_ahrs->airspeed_sensor_enabled()) { - maxTimeDelay_ms = MAX(maxTimeDelay_ms , _frontend->tasDelay_ms); + maxTimeDelay_ms = MAX(maxTimeDelay_ms , frontend->tasDelay_ms); } // calculate the IMU buffer length required to accommodate the maximum delay with some allowance for jitter @@ -100,13 +106,13 @@ bool NavEKF3_core::setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _c // with the worst case delay from current time to ekf fusion time // allow for worst case 50% extension of the ekf fusion time horizon delay due to timing jitter uint16_t ekf_delay_ms = maxTimeDelay_ms + (int)(ceilf((float)maxTimeDelay_ms * 0.5f)); - obs_buffer_length = (ekf_delay_ms / _frontend->sensorIntervalMin_ms) + 1; + obs_buffer_length = (ekf_delay_ms / frontend->sensorIntervalMin_ms) + 1; // limit to be no longer than the IMU buffer (we can't process data faster than the EKF prediction rate) obs_buffer_length = MIN(obs_buffer_length,imu_buffer_length); // calculate buffer size for optical flow data - const uint8_t flow_buffer_length = MIN((ekf_delay_ms / _frontend->flowIntervalMin_ms) + 1, imu_buffer_length); + const uint8_t flow_buffer_length = MIN((ekf_delay_ms / frontend->flowIntervalMin_ms) + 1, imu_buffer_length); if(!storedGPS.init(obs_buffer_length)) { return false; @@ -211,7 +217,7 @@ void NavEKF3_core::InitialiseVariables() lastKnownPositionNE.zero(); prevTnb.zero(); memset(&P[0][0], 0, sizeof(P)); - memset(&nextP[0][0], 0, sizeof(nextP)); + memset(common, 0, sizeof(*common)); flowDataValid = false; rangeDataToFuse = false; Popt = 0.0f; @@ -567,6 +573,12 @@ void NavEKF3_core::CovarianceInit() // Update Filter States - this should be called whenever new IMU data is available void NavEKF3_core::UpdateFilter(bool predict) { +#if CONFIG_HAL_BOARD == HAL_BOARD_SITL + // fill the common variables with NaN, so we catch any cases in + // SITL where they are used without initialisation + fill_nanf((float *)common, sizeof(*common)/sizeof(float)); +#endif + // Set the flag to indicate to the filter that the front-end has given permission for a new state prediction cycle to be started startPredictEnabled = predict; diff --git a/libraries/AP_NavEKF3/AP_NavEKF3_core.h b/libraries/AP_NavEKF3/AP_NavEKF3_core.h index 66f9217e91..5046400994 100644 --- a/libraries/AP_NavEKF3/AP_NavEKF3_core.h +++ b/libraries/AP_NavEKF3/AP_NavEKF3_core.h @@ -67,10 +67,10 @@ class NavEKF3_core { public: // Constructor - NavEKF3_core(void); + NavEKF3_core(NavEKF3 *_frontend); // setup this core backend - bool setup_core(NavEKF3 *_frontend, uint8_t _imu_index, uint8_t _core_index); + bool setup_core(uint8_t _imu_index, uint8_t _core_index); // Initialise the states from accelerometer and magnetometer data (if present) // This method can only be used when the vehicle is static @@ -365,7 +365,10 @@ public: // get timing statistics structure void getTimingStatistics(struct ekf_timing &timing); - + + // return size of core_common for use in frontend allocation + static uint32_t get_core_common_size(void) { return sizeof(core_common); } + private: // Reference to the global EKF frontend for parameters NavEKF3 *frontend; @@ -533,6 +536,21 @@ private: uint8_t type; // type specifiying Euler rotation order used, 1 = 312 (ZXY), 2 = 321 (ZYX) }; + /* + common intermediate variables used by all cores. These save a + lot memory by avoiding allocating these arrays on every core + Having these as stack variables would save even more memory, but + would give us very high stack usage in some functions, which + poses a risk of stack overflow until we have infrastructure in + place to calculate maximum stack usage using static analysis. + */ + struct core_common { + Vector28 Kfusion; + Matrix24 KH; + Matrix24 KHP; + Matrix24 nextP; + } *common; + // bias estimates for the IMUs that are enabled but not being used // by this core. struct { @@ -855,9 +873,9 @@ private: bool badIMUdata; // boolean true if the bad IMU data is detected float gpsNoiseScaler; // Used to scale the GPS measurement noise and consistency gates to compensate for operation with small satellite counts - Vector28 Kfusion; // Kalman gain vector - Matrix24 KH; // intermediate result used for covariance updates - Matrix24 KHP; // intermediate result used for covariance updates + Vector28 &Kfusion; // Kalman gain vector + Matrix24 &KH; // intermediate result used for covariance updates + Matrix24 &KHP; // intermediate result used for covariance updates Matrix24 P; // covariance matrix imu_ring_buffer_t storedIMU; // IMU data buffer obs_ring_buffer_t storedGPS; // GPS data buffer @@ -911,7 +929,7 @@ private: bool allMagSensorsFailed; // true if all magnetometer sensors have timed out on this flight and we are no longer using magnetometer data uint32_t lastSynthYawTime_ms; // time stamp when synthetic yaw measurement was last fused to maintain covariance health (msec) uint32_t ekfStartTime_ms; // time the EKF was started (msec) - Matrix24 nextP; // Predicted covariance matrix before addition of process noise to diagonals + Matrix24 &nextP; // Predicted covariance matrix before addition of process noise to diagonals Vector2f lastKnownPositionNE; // last known position uint32_t lastDecayTime_ms; // time of last decay of GPS position offset float velTestRatio; // sum of squares of GPS velocity innovation divided by fail threshold