File indexing completed on 2026-04-27 07:41:45
0001
0002
0003
0004
0005
0006
0007 set -euo pipefail
0008
0009 TESTBED_DIR="/data/wenauseic/github/swf-testbed"
0010 AGENTS_CONF="agents.supervisord.conf"
0011 VENV="$TESTBED_DIR/.venv/bin"
0012 SUPERVISORCTL="$VENV/supervisorctl"
0013 STATUS=0
0014
0015 echo "=== Testbed Infrastructure Check ==="
0016 echo ""
0017
0018
0019 echo "--- Agent Manager ---"
0020 AM_PID=$(pgrep -f "testbed agent-manager" -u "$(whoami)" 2>/dev/null || true)
0021
0022 if [ -n "$AM_PID" ]; then
0023 echo "RUNNING (PID $AM_PID)"
0024 else
0025 echo "NOT RUNNING - starting..."
0026 cd "$TESTBED_DIR"
0027 source "$VENV/activate"
0028 source ~/.env 2>/dev/null || true
0029 nohup testbed agent-manager > /tmp/agent-manager.log 2>&1 &
0030
0031 STARTED=false
0032 for i in $(seq 1 6); do
0033 sleep 5
0034 AM_PID=$(pgrep -f "testbed agent-manager" -u "$(whoami)" 2>/dev/null || true)
0035 if [ -n "$AM_PID" ]; then
0036 echo "STARTED (PID $AM_PID) after ${i}0s"
0037 STARTED=true
0038 break
0039 fi
0040 echo " ...waiting (${i}0s elapsed)"
0041 tail -3 /tmp/agent-manager.log 2>/dev/null || true
0042 done
0043
0044 if [ "$STARTED" = false ]; then
0045 echo "ERROR: Agent manager failed to start after 30s"
0046 echo "Log output:"
0047 tail -20 /tmp/agent-manager.log 2>/dev/null || echo "(no log)"
0048 STATUS=1
0049 fi
0050 fi
0051
0052 echo ""
0053
0054
0055 echo "--- Supervisord ---"
0056 SV_OUTPUT=$("$SUPERVISORCTL" -c "$TESTBED_DIR/$AGENTS_CONF" status 2>&1) || true
0057
0058 if echo "$SV_OUTPUT" | grep -q "no such file"; then
0059 SV_EXIT=4
0060 elif echo "$SV_OUTPUT" | grep -q "refused"; then
0061 SV_EXIT=4
0062 else
0063
0064 SV_EXIT=0
0065 fi
0066
0067 if [ "$SV_EXIT" -eq 0 ]; then
0068 echo "REACHABLE"
0069 echo "$SV_OUTPUT"
0070 elif [ "$SV_EXIT" -eq 4 ]; then
0071 STALE_PID=$(pgrep -f "supervisord.*$AGENTS_CONF" -u "$(whoami)" 2>/dev/null || true)
0072 if [ -n "$STALE_PID" ]; then
0073 echo "Stale process found (PID $STALE_PID) - killing..."
0074 kill "$STALE_PID" 2>/dev/null || true
0075 sleep 2
0076 if kill -0 "$STALE_PID" 2>/dev/null; then
0077 echo "SIGTERM didn't work, sending SIGKILL..."
0078 kill -9 "$STALE_PID" 2>/dev/null || true
0079 sleep 1
0080 fi
0081 if kill -0 "$STALE_PID" 2>/dev/null; then
0082 echo "ERROR: Failed to kill stale supervisord (PID $STALE_PID)"
0083 STATUS=1
0084 else
0085 echo "Killed. Supervisord will be started fresh when testbed starts."
0086 fi
0087 else
0088 echo "NOT RUNNING (normal when testbed is stopped)"
0089 fi
0090 fi
0091
0092 echo ""
0093
0094
0095
0096
0097 AM_PID=$(pgrep -f "testbed agent-manager" -u "$(whoami)" 2>/dev/null || true)
0098 if [ -n "$AM_PID" ] && [ "$STATUS" -eq 0 ]; then
0099 kill -USR1 "$AM_PID" 2>/dev/null || true
0100 sleep 2
0101 fi
0102
0103
0104 echo "--- Summary ---"
0105 if [ "$STATUS" -eq 0 ]; then
0106 echo "Infrastructure OK. Ready for MCP operations."
0107 else
0108 echo "PROBLEMS DETECTED. Fix issues above before proceeding."
0109 fi
0110
0111 exit $STATUS