aboutsummaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorPau Espin Pedrol <pespin@sysmocom.de>2019-04-03 17:53:54 +0200
committerPau Espin Pedrol <pespin@sysmocom.de>2019-04-03 17:53:57 +0200
commite159cd206d11073e4ca46d34ba15e106357fb044 (patch)
treec1c0dadaa65e8e496b06c39089c2b275f2f49358 /utils
parent17a4ed9029421bde84042d8e37d78a9ca043ca26 (diff)
process: Prevent NetNSProcess alive forever after SIGKILL
NetNSProcess are run in the following process tree: osmo-gsm-tester -> sudo -> bash (osmo-gsm-tester_netns_exec.sh) -> tcpdump. Lots of osmo-gsm-tester_netns_exec.sh scripts with tcpdump child process were spotted in prod setup of osmo-gsm-tester. Apparently that happens because sometimes tcpdump doesn't get killed in time with SIGTERM and SIGINT, and as a result SIGKILL is sent by osmo-gsm-tester as usual termination procedure. When SIGKILL is sent, the parent sudo process is instantly killed without possibility to forward the signal to its children, leaving the bash script and tcpdump alive. In order to fix it, catch SIGKILL for this process class and send instead SIGUSR1. Then, modify the script under sudo to handle SIGUSR1 as if it was a SIGKILL towards its children to make sure child process in the netns terminates. Change-Id: I2bf389c47bbbd75f46af413e7ba897be5be386e1
Diffstat (limited to 'utils')
-rwxr-xr-xutils/osmo-gsm-tester_netns_exec.sh38
1 files changed, 37 insertions, 1 deletions
diff --git a/utils/osmo-gsm-tester_netns_exec.sh b/utils/osmo-gsm-tester_netns_exec.sh
index 336b746..182ebff 100755
--- a/utils/osmo-gsm-tester_netns_exec.sh
+++ b/utils/osmo-gsm-tester_netns_exec.sh
@@ -1,5 +1,41 @@
#!/bin/bash
netns="$1"
shift
+
+child_ps=0
+forward_kill() {
+ sig="$1"
+ echo "Caught signal SIG$sig!"
+ if [ "$child_ps" != "0" ]; then
+ echo "Killing $child_ps with SIG$sig!"
+ kill -SIG${sig} $child_ps
+ else
+ exit 0
+ fi
+}
+forward_kill_int() {
+ forward_kill "INT"
+}
+forward_kill_term() {
+ forward_kill "TERM"
+}
+forward_kill_usr1() {
+ # Special signal received from osmo-gsm-tester to tell child to SIGKILL
+ echo "Converting SIGUSR1->SIGKILL"
+ forward_kill "KILL"
+}
+# Don't use 'set -e', otherwise traps are not triggered!
+trap forward_kill_int INT
+trap forward_kill_term TERM
+trap forward_kill_usr1 USR1
+
#TODO: Later on I may want to call myself with specific ENV and calling sudo in order to run inside the netns but with dropped privileges
-ip netns exec $netns "$@"
+ip netns exec $netns "$@" &
+child_ps=$!
+
+echo "$$: waiting for $child_ps"
+wait "$child_ps"
+child_exit_code="$?"
+echo "child exited with $child_exit_code"
+
+exit $child_exit_code