Browse Source

logger watchdog: also check main thread

There was a time window where if a task with higher priority than the main
logger thread would busy-loop, it would block both logging threads and the
watchdog would not trigger if the writer was in idle state.
This can happen for fast SD card writes.
main
Beat Küng 3 years ago committed by Daniel Agar
parent
commit
a9129ea003
  1. 17
      src/modules/logger/logger.cpp
  2. 12
      src/modules/logger/watchdog.cpp
  3. 4
      src/modules/logger/watchdog.h

17
src/modules/logger/logger.cpp

@ -97,19 +97,24 @@ static void timer_callback(void *arg) @@ -97,19 +97,24 @@ static void timer_callback(void *arg)
timer_callback_data_s *data = (timer_callback_data_s *)arg;
if (watchdog_update(data->watchdog_data)) {
data->watchdog_triggered = true;
}
int semaphore_value = 0;
px4_sem_getvalue(&data->semaphore, &semaphore_value);
/* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast
* as the timer_callback here increases the semaphore count, the counter would increase unbounded,
* leading to an overflow at some point. This case we want to avoid here, so we check the current
* value against a (somewhat arbitrary) threshold, and avoid calling sem_post() if it's exceeded.
* (it's not a problem if the threshold is a bit too large, it just means the logger will do
* multiple iterations at once, the next time it's scheduled). */
int semaphore_value;
* multiple iterations at once, the next time it's scheduled).
* As the watchdog also uses the counter we use a conservatively high value */
bool semaphore_value_saturated = semaphore_value > 100;
if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) {
data->watchdog_triggered = true;
}
if (px4_sem_getvalue(&data->semaphore, &semaphore_value) == 0 && semaphore_value > 1) {
if (semaphore_value_saturated) {
return;
}

12
src/modules/logger/watchdog.cpp

@ -46,7 +46,7 @@ namespace px4 @@ -46,7 +46,7 @@ namespace px4
namespace logger
{
bool watchdog_update(watchdog_data_t &watchdog_data)
bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_saturated)
{
#if defined(__PX4_NUTTX) && defined(CONFIG_BUILD_FLAT)
@ -62,7 +62,9 @@ bool watchdog_update(watchdog_data_t &watchdog_data) @@ -62,7 +62,9 @@ bool watchdog_update(watchdog_data_t &watchdog_data)
// When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout
// will not trigger it. The longest period in ready state I measured was around 70ms,
// after a param change.
// We only check the log writer because it runs at lower priority than the main thread.
// Additionally we need to check the main thread as well, because if the main thread gets stalled as well
// while the writer is idle (no active write), it would not trigger.
// We do that by checking if the scheduling semaphore counter is saturated for a certain duration.
// No need to lock the tcb access, since we are in IRQ context
// update the timestamp if it has been scheduled recently
@ -100,7 +102,11 @@ bool watchdog_update(watchdog_data_t &watchdog_data) @@ -100,7 +102,11 @@ bool watchdog_update(watchdog_data_t &watchdog_data)
#endif
if (now - watchdog_data.ready_to_run_timestamp > 1_s) {
if (!semaphore_value_saturated) {
watchdog_data.sem_counter_saturated_start = now;
}
if (now - watchdog_data.sem_counter_saturated_start > 3_s || now - watchdog_data.ready_to_run_timestamp > 1_s) {
// boost the priority to make sure the logger continues to write to the log.
// Note that we never restore the priority, to keep the logic simple and because it is
// an event that must not occur under normal circumstances (if it does, there's a bug

4
src/modules/logger/watchdog.h

@ -50,6 +50,7 @@ struct watchdog_data_t { @@ -50,6 +50,7 @@ struct watchdog_data_t {
int logger_main_task_index = -1;
int logger_writer_task_index = -1;
hrt_abstime ready_to_run_timestamp = hrt_absolute_time();
hrt_abstime sem_counter_saturated_start = hrt_absolute_time();
uint8_t last_state = TSTATE_TASK_INVALID;
#endif /* __PX4_NUTTX */
};
@ -70,9 +71,10 @@ void watchdog_initialize(const pid_t pid_logger_main, const pthread_t writer_thr @@ -70,9 +71,10 @@ void watchdog_initialize(const pid_t pid_logger_main, const pthread_t writer_thr
* Expected to be called from IRQ context.
*
* @param watchdog_data
* @param semaphore_value_saturated if the scheduling semaphore counter is saturated
* @return true if watchdog is triggered, false otherwise
*/
bool watchdog_update(watchdog_data_t &watchdog_data);
bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_saturated);
} //namespace logger
} //namespace px4

Loading…
Cancel
Save