From a9129ea0039ede8f35854c349a1344792f78ef10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beat=20K=C3=BCng?= Date: Wed, 20 Apr 2022 14:58:17 +0200 Subject: [PATCH] logger watchdog: also check main thread There was a time window where if a task with higher priority than the main logger thread would busy-loop, it would block both logging threads and the watchdog would not trigger if the writer was in idle state. This can happen for fast SD card writes. --- src/modules/logger/logger.cpp | 17 +++++++++++------ src/modules/logger/watchdog.cpp | 12 +++++++++--- src/modules/logger/watchdog.h | 4 +++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/modules/logger/logger.cpp b/src/modules/logger/logger.cpp index 4e7fa75fe1..054c7e4522 100644 --- a/src/modules/logger/logger.cpp +++ b/src/modules/logger/logger.cpp @@ -97,19 +97,24 @@ static void timer_callback(void *arg) timer_callback_data_s *data = (timer_callback_data_s *)arg; - if (watchdog_update(data->watchdog_data)) { - data->watchdog_triggered = true; - } + int semaphore_value = 0; + + px4_sem_getvalue(&data->semaphore, &semaphore_value); /* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast * as the timer_callback here increases the semaphore count, the counter would increase unbounded, * leading to an overflow at some point. This case we want to avoid here, so we check the current * value against a (somewhat arbitrary) threshold, and avoid calling sem_post() if it's exceeded. * (it's not a problem if the threshold is a bit too large, it just means the logger will do - * multiple iterations at once, the next time it's scheduled). */ - int semaphore_value; + * multiple iterations at once, the next time it's scheduled). + * As the watchdog also uses the counter we use a conservatively high value */ + bool semaphore_value_saturated = semaphore_value > 100; + + if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) { + data->watchdog_triggered = true; + } - if (px4_sem_getvalue(&data->semaphore, &semaphore_value) == 0 && semaphore_value > 1) { + if (semaphore_value_saturated) { return; } diff --git a/src/modules/logger/watchdog.cpp b/src/modules/logger/watchdog.cpp index 124753809c..7618dd366c 100644 --- a/src/modules/logger/watchdog.cpp +++ b/src/modules/logger/watchdog.cpp @@ -46,7 +46,7 @@ namespace px4 namespace logger { -bool watchdog_update(watchdog_data_t &watchdog_data) +bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_saturated) { #if defined(__PX4_NUTTX) && defined(CONFIG_BUILD_FLAT) @@ -62,7 +62,9 @@ bool watchdog_update(watchdog_data_t &watchdog_data) // When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout // will not trigger it. The longest period in ready state I measured was around 70ms, // after a param change. - // We only check the log writer because it runs at lower priority than the main thread. + // Additionally we need to check the main thread as well, because if the main thread gets stalled as well + // while the writer is idle (no active write), it would not trigger. + // We do that by checking if the scheduling semaphore counter is saturated for a certain duration. // No need to lock the tcb access, since we are in IRQ context // update the timestamp if it has been scheduled recently @@ -100,7 +102,11 @@ bool watchdog_update(watchdog_data_t &watchdog_data) #endif - if (now - watchdog_data.ready_to_run_timestamp > 1_s) { + if (!semaphore_value_saturated) { + watchdog_data.sem_counter_saturated_start = now; + } + + if (now - watchdog_data.sem_counter_saturated_start > 3_s || now - watchdog_data.ready_to_run_timestamp > 1_s) { // boost the priority to make sure the logger continues to write to the log. // Note that we never restore the priority, to keep the logic simple and because it is // an event that must not occur under normal circumstances (if it does, there's a bug diff --git a/src/modules/logger/watchdog.h b/src/modules/logger/watchdog.h index 2c920b41cc..1ba1843593 100644 --- a/src/modules/logger/watchdog.h +++ b/src/modules/logger/watchdog.h @@ -50,6 +50,7 @@ struct watchdog_data_t { int logger_main_task_index = -1; int logger_writer_task_index = -1; hrt_abstime ready_to_run_timestamp = hrt_absolute_time(); + hrt_abstime sem_counter_saturated_start = hrt_absolute_time(); uint8_t last_state = TSTATE_TASK_INVALID; #endif /* __PX4_NUTTX */ }; @@ -70,9 +71,10 @@ void watchdog_initialize(const pid_t pid_logger_main, const pthread_t writer_thr * Expected to be called from IRQ context. * * @param watchdog_data + * @param semaphore_value_saturated if the scheduling semaphore counter is saturated * @return true if watchdog is triggered, false otherwise */ -bool watchdog_update(watchdog_data_t &watchdog_data); +bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_saturated); } //namespace logger } //namespace px4