From c15d3f7cfa83520f4c99a074933dc78855719089 Mon Sep 17 00:00:00 2001 From: Julian Oes Date: Fri, 26 Nov 2021 16:23:39 +0100 Subject: [PATCH] mavsdk_tests: retry gz model spawn command This should help when gzserver does not respond yet and we end up without a model and hence can't connect later and time out. This change also required a fix to prevent the tester to hang on terminating all runners. By using poll instead of only read we can prevent that and actually properly join the logger thread. --- test/mavsdk_tests/mavsdk_test_runner.py | 32 ++++++++----- test/mavsdk_tests/process_helper.py | 63 +++++++++++++++++++++---- 2 files changed, 73 insertions(+), 22 deletions(-) diff --git a/test/mavsdk_tests/mavsdk_test_runner.py b/test/mavsdk_tests/mavsdk_test_runner.py index d1eeb88f2e..f876ec2af8 100755 --- a/test/mavsdk_tests/mavsdk_test_runner.py +++ b/test/mavsdk_tests/mavsdk_test_runner.py @@ -444,20 +444,28 @@ class Tester: for runner in self.active_runners: runner.set_log_filename( self.determine_logfile_path(log_dir, runner.name)) - try: - runner.start() - except TimeoutError: - abort = True - print("A timeout happened for runner: {}" - .format(runner.name)) - break - # Workaround to prevent gz not being able to communicate - # with gzserver. In CI it tends to take longer. - if os.getenv("GITHUB_WORKFLOW") and runner.name == "gzserver": - time.sleep(10) + # Some runners need to be started a couple of times + # until they succeed. + for _ in range(10): + try: + runner.start() + except TimeoutError: + abort = True + print("A timeout happened for runner: {}" + .format(runner.name)) + break + + if runner.has_started_ok(): + break + + runner.stop + time.sleep(1) + else: - time.sleep(2) + abort = True + print("Could not start runner: {}".format(runner.name)) + break if abort: self.stop_runners() diff --git a/test/mavsdk_tests/process_helper.py b/test/mavsdk_tests/process_helper.py index e9372d57b4..282ee861f3 100644 --- a/test/mavsdk_tests/process_helper.py +++ b/test/mavsdk_tests/process_helper.py @@ -8,6 +8,7 @@ import subprocess import shutil import threading import errno +import select from typing import Any, Dict, List, TextIO, Optional @@ -62,18 +63,27 @@ class Runner: self.thread = threading.Thread(target=self.process_output) self.thread.start() + def has_started_ok(self) -> bool: + return True + def process_output(self) -> None: assert self.process.stdout is not None - while True: - line = self.process.stdout.readline() - if not line and \ - (self.stop_thread.is_set() or self.poll is not None): - break - if not line or line == "\n": - continue - self.output_queue.put(line) - self.log_fd.write(line) - self.log_fd.flush() + + poll_obj = select.poll() + poll_obj.register(self.process.stdout, select.POLLIN) + + while not self.stop_thread.is_set(): + poll_result = poll_obj.poll(0) + if poll_result: + line = self.process.stdout.readline() + if not line and \ + (self.stop_thread.is_set() or self.poll is not None): + break + if not line or line == "\n": + continue + self.output_queue.put(line) + self.log_fd.write(line) + self.log_fd.flush() def poll(self) -> Optional[int]: return self.process.poll() @@ -255,6 +265,39 @@ class GzmodelspawnRunner(Runner): "--model-name", self.model, "-x", "1.01", "-y", "0.98", "-z", "0.83"] + def has_started_ok(self) -> bool: + # The problem is that sometimes gzserver does not seem to start + # quickly enough and gz model spawn fails with the error: + # "An instance of Gazebo is not running." but still returns 0 + # as a result. + # We work around this by trying to start and then check whether + # using has_started_ok() whether it was succesful or not. + timeout_s = 3 + steps = 10 + for _ in range(steps*timeout_s): + if self.verbose: + print("Checking if gz model spawn is done...") + returncode = self.process.poll() + if returncode is None: + if self.verbose: + print("not done yet") + time.sleep(float(timeout_s)/float(steps)) + continue + + if self.verbose: + print("gz model spawn is done") + with open(self.log_filename, 'r') as f: + for line in f.readlines(): + if 'An instance of Gazebo is not running' in line: + return False + else: + return True + + if self.verbose: + print("gzmodelspawn did not return within {}s". + format(timeout_s)) + return False + class GzclientRunner(Runner): def __init__(self,