From e159cd206d11073e4ca46d34ba15e106357fb044 Mon Sep 17 00:00:00 2001 From: Pau Espin Pedrol Date: Wed, 3 Apr 2019 17:53:54 +0200 Subject: process: Prevent NetNSProcess alive forever after SIGKILL NetNSProcess are run in the following process tree: osmo-gsm-tester -> sudo -> bash (osmo-gsm-tester_netns_exec.sh) -> tcpdump. Lots of osmo-gsm-tester_netns_exec.sh scripts with tcpdump child process were spotted in prod setup of osmo-gsm-tester. Apparently that happens because sometimes tcpdump doesn't get killed in time with SIGTERM and SIGINT, and as a result SIGKILL is sent by osmo-gsm-tester as usual termination procedure. When SIGKILL is sent, the parent sudo process is instantly killed without possibility to forward the signal to its children, leaving the bash script and tcpdump alive. In order to fix it, catch SIGKILL for this process class and send instead SIGUSR1. Then, modify the script under sudo to handle SIGUSR1 as if it was a SIGKILL towards its children to make sure child process in the netns terminates. Change-Id: I2bf389c47bbbd75f46af413e7ba897be5be386e1 --- src/osmo_gsm_tester/process.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/osmo_gsm_tester/process.py') diff --git a/src/osmo_gsm_tester/process.py b/src/osmo_gsm_tester/process.py index 7ecb67e..441d4ea 100644 --- a/src/osmo_gsm_tester/process.py +++ b/src/osmo_gsm_tester/process.py @@ -363,6 +363,11 @@ class NetNSProcess(Process): # HACK: Since we run under sudo, only way to kill root-owned process is to kill as root... # This function is overwritten from Process. def send_signal(self, sig): + if sig == signal.SIGKILL: + # if we kill sudo, its children (bash running NETNS_EXEC_BIN + + # tcpdump under it) are kept alive. Let's instead tell the script to + # kill tcpdump: + sig = signal.SIGUSR1 kill_cmd = ('kill', '-%d' % int(sig), str(self.process_obj.pid)) run_local_netns_sync(self.run_dir, self.name()+"-kill"+str(sig), self.netns, kill_cmd) -- cgit v1.2.3