diff --git a/test/mavsdk_tests/mavsdk_test_runner.py b/test/mavsdk_tests/mavsdk_test_runner.py index d1eeb88f2e..f876ec2af8 100755 --- a/test/mavsdk_tests/mavsdk_test_runner.py +++ b/test/mavsdk_tests/mavsdk_test_runner.py @@ -444,20 +444,28 @@ class Tester: for runner in self.active_runners: runner.set_log_filename( self.determine_logfile_path(log_dir, runner.name)) - try: - runner.start() - except TimeoutError: - abort = True - print("A timeout happened for runner: {}" - .format(runner.name)) - break - # Workaround to prevent gz not being able to communicate - # with gzserver. In CI it tends to take longer. - if os.getenv("GITHUB_WORKFLOW") and runner.name == "gzserver": - time.sleep(10) + # Some runners need to be started a couple of times + # until they succeed. + for _ in range(10): + try: + runner.start() + except TimeoutError: + abort = True + print("A timeout happened for runner: {}" + .format(runner.name)) + break + + if runner.has_started_ok(): + break + + runner.stop + time.sleep(1) + else: - time.sleep(2) + abort = True + print("Could not start runner: {}".format(runner.name)) + break if abort: self.stop_runners() diff --git a/test/mavsdk_tests/process_helper.py b/test/mavsdk_tests/process_helper.py index e9372d57b4..282ee861f3 100644 --- a/test/mavsdk_tests/process_helper.py +++ b/test/mavsdk_tests/process_helper.py @@ -8,6 +8,7 @@ import subprocess import shutil import threading import errno +import select from typing import Any, Dict, List, TextIO, Optional @@ -62,18 +63,27 @@ class Runner: self.thread = threading.Thread(target=self.process_output) self.thread.start() + def has_started_ok(self) -> bool: + return True + def process_output(self) -> None: assert self.process.stdout is not None - while True: - line = self.process.stdout.readline() - if not line and \ - (self.stop_thread.is_set() or self.poll is not None): - break - if not line or line == "\n": - continue - self.output_queue.put(line) - self.log_fd.write(line) - self.log_fd.flush() + + poll_obj = select.poll() + poll_obj.register(self.process.stdout, select.POLLIN) + + while not self.stop_thread.is_set(): + poll_result = poll_obj.poll(0) + if poll_result: + line = self.process.stdout.readline() + if not line and \ + (self.stop_thread.is_set() or self.poll is not None): + break + if not line or line == "\n": + continue + self.output_queue.put(line) + self.log_fd.write(line) + self.log_fd.flush() def poll(self) -> Optional[int]: return self.process.poll() @@ -255,6 +265,39 @@ class GzmodelspawnRunner(Runner): "--model-name", self.model, "-x", "1.01", "-y", "0.98", "-z", "0.83"] + def has_started_ok(self) -> bool: + # The problem is that sometimes gzserver does not seem to start + # quickly enough and gz model spawn fails with the error: + # "An instance of Gazebo is not running." but still returns 0 + # as a result. + # We work around this by trying to start and then check whether + # using has_started_ok() whether it was succesful or not. + timeout_s = 3 + steps = 10 + for _ in range(steps*timeout_s): + if self.verbose: + print("Checking if gz model spawn is done...") + returncode = self.process.poll() + if returncode is None: + if self.verbose: + print("not done yet") + time.sleep(float(timeout_s)/float(steps)) + continue + + if self.verbose: + print("gz model spawn is done") + with open(self.log_filename, 'r') as f: + for line in f.readlines(): + if 'An instance of Gazebo is not running' in line: + return False + else: + return True + + if self.verbose: + print("gzmodelspawn did not return within {}s". + format(timeout_s)) + return False + class GzclientRunner(Runner): def __init__(self,