From e47b756e1bcb91c0244e1cede58c868d4ade8e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com> Date: Mon, 13 Mar 2023 13:52:38 +0100 Subject: [PATCH] Try killing remote processes both with `shell=False` and `shell=True` --- cluster/cluster.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cluster/cluster.py b/cluster/cluster.py index 484e259..c517652 100644 --- a/cluster/cluster.py +++ b/cluster/cluster.py @@ -146,6 +146,7 @@ def is_local(hostname: str) -> bool: HOSTNAME) + def start_process( commands: List[str], hostname: Optional[str] = None, @@ -261,11 +262,16 @@ def kill_process(hostname: str, pid: int, signal="TERM"): logging.debug(f"Killing PGID {pid} on {hostname}") if not is_local(hostname): args = ["ssh", hostname, "--", "kill", f"-{signal}", f"-{pid}"] - res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if res.returncode != 0: logging.error( f"error: {res.returncode} {res.stdout.decode().strip()} {res.stderr.decode().strip()}") - return False + logging.info("Retrying SSH kill with shell=True") + res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + if res.returncode != 0: + logging.error( + f"error: {res.returncode} {res.stdout.decode().strip()} {res.stderr.decode().strip()}") + return False else: if signal == "TERM": signal = pysignal.SIGTERM -- GitLab