Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-04 08:56:26

0001 #!/usr/bin/env python3
0002 """Probe swf-monitor MCP and optionally restart the ASGI service on failure."""
0003 
0004 import argparse
0005 import json
0006 import os
0007 import subprocess
0008 import sys
0009 import time
0010 import urllib.error
0011 import urllib.request
0012 
0013 
0014 DEFAULT_MCP_URL = "http://127.0.0.1:8001/swf-monitor/mcp/"
0015 # /health is served by the FastMCP ASGI guard at the bare root (no
0016 # /swf-monitor prefix). The earlier Django-side /swf-monitor/api/mcp-health/
0017 # endpoint went away when the systemd unit was flipped to mcp_asgi.
0018 DEFAULT_HEALTH_URL = "http://127.0.0.1:8001/health"
0019 
0020 
0021 def post_json(url, payload, timeout, token=None):
0022     data = json.dumps(payload).encode("utf-8")
0023     headers = {
0024         "Content-Type": "application/json",
0025         "Accept": "application/json, text/event-stream",
0026         "User-Agent": "swf-monitor-mcp-watchdog/1.0",
0027     }
0028     if token:
0029         headers["Authorization"] = f"Bearer {token}"
0030     request = urllib.request.Request(
0031         url,
0032         data=data,
0033         headers=headers,
0034         method="POST",
0035     )
0036     with urllib.request.urlopen(request, timeout=timeout) as response:
0037         body = response.read().decode("utf-8")
0038         return response.status, json.loads(body)
0039 
0040 
0041 def get_json(url, timeout):
0042     request = urllib.request.Request(
0043         url,
0044         headers={
0045             "Accept": "application/json",
0046             "User-Agent": "swf-monitor-mcp-watchdog/1.0",
0047         },
0048         method="GET",
0049     )
0050     with urllib.request.urlopen(request, timeout=timeout) as response:
0051         body = response.read().decode("utf-8")
0052         return response.status, json.loads(body)
0053 
0054 
0055 def probe(health_url, mcp_url, timeout, token=None):
0056     started = time.monotonic()
0057     health_status, health = get_json(health_url, timeout)
0058     # The FastMCP /health endpoint returns {"status": "ok"}; the older
0059     # Django /api/mcp-health/ endpoint returned {"ok": true, ...}. Accept
0060     # both shapes so an accidental rollback doesn't break the watchdog.
0061     health_ok = (
0062         health.get("status") == "ok"
0063         or health.get("ok") is True
0064     )
0065     if health_status != 200 or not health_ok:
0066         raise RuntimeError(f"health failed: status={health_status} body={health}")
0067 
0068     init_status, init = post_json(
0069         mcp_url,
0070         {
0071             "jsonrpc": "2.0",
0072             "id": 1,
0073             "method": "initialize",
0074             "params": {
0075                 "protocolVersion": "2025-03-26",
0076                 "capabilities": {},
0077                 "clientInfo": {
0078                     "name": "swf-monitor-mcp-watchdog",
0079                     "version": "1.0",
0080                 },
0081             },
0082         },
0083         timeout,
0084         token=token,
0085     )
0086     if init_status != 200 or "result" not in init:
0087         raise RuntimeError(f"initialize failed: status={init_status} body={init}")
0088 
0089     tools_status, tools = post_json(
0090         mcp_url,
0091         {
0092             "jsonrpc": "2.0",
0093             "id": 2,
0094             "method": "tools/list",
0095         },
0096         timeout,
0097         token=token,
0098     )
0099     tool_list = tools.get("result", {}).get("tools")
0100     if tools_status != 200 or not isinstance(tool_list, list):
0101         raise RuntimeError(f"tools/list failed: status={tools_status} body={tools}")
0102 
0103     elapsed = time.monotonic() - started
0104     return elapsed, len(tool_list)
0105 
0106 
0107 def restart_service(service):
0108     result = subprocess.run(
0109         ["systemctl", "restart", service],
0110         check=False,
0111         capture_output=True,
0112         text=True,
0113     )
0114     if result.returncode != 0:
0115         raise RuntimeError(
0116             f"restart failed rc={result.returncode}: "
0117             f"{result.stdout.strip()} {result.stderr.strip()}"
0118         )
0119 
0120 
0121 def main():
0122     parser = argparse.ArgumentParser(description=__doc__)
0123     parser.add_argument("--mcp-url", default=DEFAULT_MCP_URL)
0124     parser.add_argument("--health-url", default=DEFAULT_HEALTH_URL)
0125     parser.add_argument("--timeout", type=float, default=5.0)
0126     parser.add_argument("--restart", action="store_true")
0127     parser.add_argument("--service", default="swf-monitor-mcp-asgi.service")
0128     parser.add_argument(
0129         "--token",
0130         default=os.environ.get("MCP_BEARER_TOKEN", ""),
0131         help="Bearer token for the MCP initialize/tools-list probes "
0132              "(default: MCP_BEARER_TOKEN env var, empty for none).",
0133     )
0134     args = parser.parse_args()
0135 
0136     try:
0137         elapsed, tool_count = probe(
0138             args.health_url, args.mcp_url, args.timeout, token=args.token or None,
0139         )
0140     except (urllib.error.URLError, TimeoutError, RuntimeError, json.JSONDecodeError) as e:
0141         print(f"MCP watchdog probe failed: {e}", file=sys.stderr)
0142         if args.restart:
0143             try:
0144                 restart_service(args.service)
0145                 print(f"Restarted {args.service}", file=sys.stderr)
0146             except RuntimeError as restart_error:
0147                 print(str(restart_error), file=sys.stderr)
0148                 return 2
0149         return 1
0150 
0151     print(f"MCP watchdog OK: {tool_count} tools in {elapsed:.3f}s")
0152     return 0
0153 
0154 
0155 if __name__ == "__main__":
0156     sys.exit(main())