File indexing completed on 2026-04-27 07:41:45
0001 """Query helpers for the alarm dashboard — ORM over the Entry / EntryVersion
0002 tables. No model logic beyond what the views need.
0003 """
0004 from __future__ import annotations
0005
0006 import json
0007 import time
0008 from datetime import datetime, timezone, timedelta
0009
0010 from django.db.models import Count, Max
0011
0012 from .models import Entry, EntryContext, EntryVersion
0013
0014
0015 CONTEXT_NAME = 'swf-alarms'
0016 TEAMS_CONTEXT = 'teams'
0017
0018
0019 def _active_alarm_configs_qs():
0020 return (Entry.objects
0021 .filter(context_id=CONTEXT_NAME, kind='alarm',
0022 archived=False, deleted_at__isnull=True)
0023 .order_by('timestamp_created'))
0024
0025
0026 def alarm_configs() -> list[dict]:
0027 out = []
0028 for e in _active_alarm_configs_qs():
0029 data = e.data or {}
0030 out.append({
0031 'id': e.id,
0032 'entry_id': data.get('entry_id', ''),
0033 'name': data.get('entry_id', '').replace('alarm_', '', 1) or e.id[:8],
0034 'title': e.title,
0035 'content': e.content,
0036 'enabled': bool(data.get('enabled', True)),
0037
0038
0039
0040
0041 'recipients_text': _recipients_display(data.get('recipients')),
0042 'params': dict(data.get('params') or {}),
0043 'created': e.timestamp_created,
0044 'modified': e.timestamp_modified,
0045 'data': data,
0046 })
0047 return out
0048
0049
0050 def _recipients_display(value) -> str:
0051 if value is None or value == '':
0052 return ''
0053 if isinstance(value, str):
0054 return value
0055 if isinstance(value, (list, tuple)):
0056 return ', '.join(str(v) for v in value)
0057 return str(value)
0058
0059
0060 def get_alarm_config_by_entry_id(entry_id: str) -> Entry | None:
0061 try:
0062 return (Entry.objects
0063 .filter(context_id=CONTEXT_NAME, kind='alarm',
0064 data__entry_id=entry_id,
0065 deleted_at__isnull=True)
0066 .first())
0067 except Entry.DoesNotExist:
0068 return None
0069
0070
0071 def events_since(alarm_entry_id: str, hours: int, limit: int = 500) -> list[dict]:
0072 """Event entries with fire_time within the last N hours.
0073
0074 Returns dicts with useful denormalised fields for the template.
0075 """
0076 event_entry_id = _event_entry_id_for(alarm_entry_id)
0077 cutoff = time.time() - hours * 3600
0078 qs = (Entry.objects
0079 .filter(context_id=CONTEXT_NAME, kind='event',
0080 data__entry_id=event_entry_id,
0081 archived=False, deleted_at__isnull=True,
0082 timestamp_created__gte=cutoff)
0083 .order_by('-timestamp_created')[:limit])
0084 return [_event_to_dict(e) for e in qs]
0085
0086
0087 def count_events_since(alarm_entry_id: str, hours: int) -> int:
0088 event_entry_id = _event_entry_id_for(alarm_entry_id)
0089 cutoff = time.time() - hours * 3600
0090 return (Entry.objects
0091 .filter(context_id=CONTEXT_NAME, kind='event',
0092 data__entry_id=event_entry_id,
0093 archived=False, deleted_at__isnull=True,
0094 timestamp_created__gte=cutoff)
0095 .count())
0096
0097
0098 def _active_events_qs(alarm_entry_id: str):
0099 """All non-archived event rows for this alarm, filtered in Python for
0100 JSON-null clear_time (Django's __isnull on JSON paths only catches
0101 missing keys, not ``null``-valued ones)."""
0102 event_entry_id = _event_entry_id_for(alarm_entry_id)
0103 rows = (Entry.objects
0104 .filter(context_id=CONTEXT_NAME, kind='event',
0105 data__entry_id=event_entry_id,
0106 archived=False, deleted_at__isnull=True)
0107 .order_by('-timestamp_created'))
0108 return [e for e in rows if (e.data or {}).get('clear_time') is None]
0109
0110
0111 def active_event_count(alarm_entry_id: str) -> int:
0112 return len(_active_events_qs(alarm_entry_id))
0113
0114
0115 def active_event_rows(alarm_entry_id: str) -> list:
0116 """Raw Entry objects for currently-active events of this alarm."""
0117 return _active_events_qs(alarm_entry_id)
0118
0119
0120 def active_events(alarm_entry_id: str) -> list[dict]:
0121 """Present-state view: one row per currently-active event for this alarm."""
0122 out: list[dict] = []
0123 for e in _active_events_qs(alarm_entry_id):
0124 d = e.data or {}
0125 ft = d.get('fire_time')
0126 out.append({
0127 'id': e.id,
0128 'subject': d.get('subject') or e.title or '?',
0129 'dedupe_key': d.get('dedupe_key') or '',
0130 'fire_time': ft,
0131 'fire_time_dt': _ts_to_dt(ft),
0132 'last_seen': d.get('last_seen'),
0133 'metric': _event_metric(d),
0134 })
0135 return out
0136
0137
0138 def _event_metric(data: dict) -> str:
0139 """Trigger metric as a display string.
0140
0141 Preferred: detection set an explicit `metric` key (formatted string).
0142 Fallback: derive from `computed_failurerate` for old rows that
0143 predate the metric field.
0144 """
0145 m = data.get('metric')
0146 if isinstance(m, str) and m:
0147 return m
0148 cfr = data.get('computed_failurerate')
0149 if isinstance(cfr, (int, float)):
0150 return f"{cfr*100:.1f}%"
0151 return ''
0152
0153
0154 def _ts_to_dt(ts):
0155 if ts is None or ts == '':
0156 return None
0157 try:
0158 return datetime.fromtimestamp(float(ts), tz=timezone.utc)
0159 except (TypeError, ValueError, OSError, OverflowError):
0160 return None
0161
0162
0163 def events_for_task(alarm_entry_id: str, dedupe_key: str,
0164 hours: int) -> list[dict]:
0165 """All events for one (alarm, entity) in the last N hours, reverse chron."""
0166 event_entry_id = _event_entry_id_for(alarm_entry_id)
0167 cutoff = time.time() - hours * 3600
0168 qs = (Entry.objects
0169 .filter(context_id=CONTEXT_NAME, kind='event',
0170 data__entry_id=event_entry_id,
0171 data__dedupe_key=dedupe_key,
0172 archived=False, deleted_at__isnull=True,
0173 timestamp_created__gte=cutoff)
0174 .order_by('-timestamp_created'))
0175 return [_event_to_dict(e) for e in qs]
0176
0177
0178 def task_history_bins(alarm_entry_id: str, dedupe_key: str,
0179 hours: int) -> list[dict]:
0180 """One row per engine tick in the last N hours: state of this (alarm,
0181 entity) at that tick.
0182
0183 state ∈ {'firing', 'clear', 'unknown'}:
0184 - 'firing' — at that tick, an event for this task had fire_time
0185 ≤ tick ≤ (clear_time or now), i.e. the alarm was active.
0186 - 'clear' — the tick ran cleanly and the task was not firing.
0187 - 'unknown' — the tick errored or didn't finish (no truth).
0188 """
0189 now = time.time()
0190 cutoff = now - hours * 3600
0191 event_entry_id = _event_entry_id_for(alarm_entry_id)
0192
0193
0194 runs = (Entry.objects
0195 .filter(context_id=CONTEXT_NAME, kind='engine_run',
0196 deleted_at__isnull=True,
0197 timestamp_created__gte=cutoff)
0198 .order_by('timestamp_created'))
0199
0200
0201
0202
0203 evs = (Entry.objects
0204 .filter(context_id=CONTEXT_NAME, kind='event',
0205 data__entry_id=event_entry_id,
0206 data__dedupe_key=dedupe_key,
0207 archived=False, deleted_at__isnull=True))
0208 intervals: list[tuple[float, float]] = []
0209 for e in evs:
0210 d = e.data or {}
0211 ft = float(d.get('fire_time') or 0)
0212 ct = d.get('clear_time')
0213 ct_f = float(ct) if ct is not None else now
0214 if ct_f < cutoff:
0215 continue
0216 intervals.append((ft, ct_f))
0217
0218 bins: list[dict] = []
0219 for run in runs:
0220 rd = run.data or {}
0221 tick = float(rd.get('started_at') or run.timestamp_created)
0222 per_alarm = rd.get('per_alarm') or {}
0223 pa = per_alarm.get(alarm_entry_id) or {}
0224 errored = bool(pa.get('errors')) or (rd.get('finished_at') is None)
0225 firing = any(ft <= tick <= ct for ft, ct in intervals)
0226 if errored:
0227 state = 'unknown'
0228 elif firing:
0229 state = 'firing'
0230 else:
0231 state = 'clear'
0232 bins.append({
0233 'tick': tick,
0234 'state': state,
0235 'run_id': run.id,
0236 })
0237 return bins
0238
0239
0240 def last_fired(alarm_entry_id: str):
0241 """Most recent moment this alarm was observed firing.
0242
0243 For active events this is the tick's last_seen (bumped every tick).
0244 For cleared events this is the clear_time. Taking the max across
0245 all events gives "the last tick at which anything was firing" —
0246 which for an alarm that's currently active is the most recent cron
0247 tick.
0248 """
0249 event_entry_id = _event_entry_id_for(alarm_entry_id)
0250 qs = (Entry.objects
0251 .filter(context_id=CONTEXT_NAME, kind='event',
0252 data__entry_id=event_entry_id,
0253 archived=False, deleted_at__isnull=True))
0254 best = 0.0
0255 for e in qs:
0256 d = e.data or {}
0257 for k in ('last_seen', 'clear_time', 'fire_time'):
0258 v = d.get(k)
0259 if v is None:
0260 continue
0261 try:
0262 fv = float(v)
0263 except (TypeError, ValueError):
0264 continue
0265 if fv > best:
0266 best = fv
0267 if e.timestamp_created and e.timestamp_created > best:
0268 best = float(e.timestamp_created)
0269 return best if best > 0 else None
0270
0271
0272 def get_event(event_uuid: str) -> dict | None:
0273 try:
0274 e = Entry.objects.get(id=event_uuid, context_id=CONTEXT_NAME,
0275 kind='event', deleted_at__isnull=True)
0276 except Entry.DoesNotExist:
0277 return None
0278 return _event_to_dict(e)
0279
0280
0281 def versions_for(entry_uuid: str, limit: int = 50) -> list[dict]:
0282 qs = (EntryVersion.objects
0283 .filter(entry_id=entry_uuid)
0284 .order_by('-version_num')[:limit])
0285 return [{
0286 'id': v.id,
0287 'version_num': v.version_num,
0288 'title': v.title,
0289 'content': v.content,
0290 'data': v.data,
0291 'changed_by': v.changed_by,
0292 'timestamp': v.timestamp,
0293 'preview': v.title or ((v.content or '').splitlines()[0][:120] if v.content else ''),
0294 'line_count': (v.content or '').count('\n') + (1 if (v.content or '') else 0),
0295 } for v in qs]
0296
0297
0298 def recent_runs(limit: int = 20) -> list[dict]:
0299 qs = (Entry.objects
0300 .filter(context_id=CONTEXT_NAME, kind='engine_run',
0301 archived=False, deleted_at__isnull=True)
0302 .order_by('-timestamp_created')[:limit])
0303 out = []
0304 for e in qs:
0305 data = dict(e.data or {})
0306
0307 if 'per_alarm' not in data and 'per_check' in data:
0308 data['per_alarm'] = data['per_check']
0309 if 'alarms_run' not in data and 'checks_run' in data:
0310 data['alarms_run'] = data['checks_run']
0311 out.append({'id': e.id, 'data': data})
0312 return out
0313
0314
0315 def quiet_alarms(quiet_ticks: int = 3, history_ticks: int = 12) -> set[str]:
0316 """Alarm entry_ids that look suspiciously silent.
0317
0318 An alarm is flagged quiet if:
0319 - All of the last `quiet_ticks` successful engine runs (errors==0
0320 for that alarm) saw zero detections for it, AND
0321 - At least one run in the last `history_ticks` DID see detections
0322 for it.
0323
0324 Purely heuristic. A broken alarm that returns nothing looks identical
0325 to a healthy quiet alarm until it has prior non-zero history, so this
0326 only catches recently-gone-silent cases. Good enough to surface.
0327 """
0328 recent = recent_runs(limit=history_ticks)
0329 if len(recent) < quiet_ticks:
0330 return set()
0331 by_alarm_recent: dict[str, list[int]] = {}
0332 by_alarm_history: dict[str, int] = {}
0333 for i, r in enumerate(recent):
0334 per = (r['data'].get('per_alarm') or {})
0335 for eid, pc in per.items():
0336 if (pc or {}).get('errors'):
0337 continue
0338 seen = int((pc or {}).get('alarms_seen') or 0)
0339 if i < quiet_ticks:
0340 by_alarm_recent.setdefault(eid, []).append(seen)
0341 by_alarm_history[eid] = by_alarm_history.get(eid, 0) + seen
0342 out: set[str] = set()
0343 for eid, recent_seens in by_alarm_recent.items():
0344 if len(recent_seens) < quiet_ticks:
0345 continue
0346 if any(recent_seens):
0347 continue
0348 if by_alarm_history.get(eid, 0) > 0:
0349 out.add(eid)
0350 return out
0351
0352
0353 def engine_health() -> dict:
0354 """Traffic light for the dashboard header."""
0355 try:
0356 last = (Entry.objects
0357 .filter(context_id=CONTEXT_NAME, kind='engine_run',
0358 deleted_at__isnull=True)
0359 .order_by('-timestamp_created').first())
0360 except Exception:
0361 return {'status': 'unknown', 'reasons': ['DB not reachable.']}
0362
0363 if last is None:
0364 return {'status': 'unknown', 'reasons': ['Engine has never run.']}
0365
0366 data = last.data or {}
0367 finished = data.get('finished_at')
0368 reasons: list[str] = []
0369 status = 'ok'
0370 if not finished:
0371 reasons.append('Last engine run did not finish.')
0372 status = 'warn'
0373 else:
0374 age = time.time() - float(finished)
0375 if age > 15 * 60:
0376 reasons.append(
0377 f'Engine stale: last run finished {int(age // 60)} min ago.')
0378 status = 'bad'
0379 if data.get('errors'):
0380 reasons.append(f"Last run had {data['errors']} error(s).")
0381 status = 'bad'
0382 if not reasons:
0383 reasons.append('All checks healthy.')
0384 return {'status': status, 'reasons': reasons, 'last_run': data,
0385 'last_run_id': last.id}
0386
0387
0388
0389
0390
0391
0392 def list_teams() -> list[dict]:
0393 """All non-archived teams in the 'teams' context."""
0394 qs = (Entry.objects
0395 .filter(context_id=TEAMS_CONTEXT, kind='team',
0396 archived=False, deleted_at__isnull=True)
0397 .order_by('name'))
0398 out = []
0399 for e in qs:
0400 out.append({
0401 'id': e.id,
0402 'name': e.name,
0403 'title': e.title,
0404 'content': e.content,
0405 'members': _parse_recipient_tokens(e.content),
0406 'created': e.timestamp_created,
0407 'modified': e.timestamp_modified,
0408 })
0409 return out
0410
0411
0412 def get_team(at_name: str) -> Entry | None:
0413 """Fetch a team by its @name. Accepts with-or-without leading '@'."""
0414 if not at_name:
0415 return None
0416 if not at_name.startswith('@'):
0417 at_name = '@' + at_name
0418 return (Entry.objects
0419 .filter(context_id=TEAMS_CONTEXT, kind='team', name=at_name,
0420 archived=False, deleted_at__isnull=True)
0421 .first())
0422
0423
0424 def get_team_by_id(entry_id: str) -> Entry | None:
0425 try:
0426 return Entry.objects.get(id=entry_id, context_id=TEAMS_CONTEXT,
0427 kind='team', deleted_at__isnull=True)
0428 except Entry.DoesNotExist:
0429 return None
0430
0431
0432
0433
0434 def _parse_recipient_tokens(raw) -> list[str]:
0435 """Split a string or list of strings into normalised recipient tokens.
0436
0437 Tokens may be separated by commas, whitespace, or both. Blank tokens
0438 dropped. Each token is either an email address or an @<teamname>.
0439 Returns the list in the order given, deduped (case-insensitive match
0440 for emails; @names kept as-is).
0441 """
0442 if raw is None:
0443 return []
0444 if isinstance(raw, (list, tuple)):
0445 parts: list[str] = []
0446 for chunk in raw:
0447 parts.extend(_parse_recipient_tokens(chunk))
0448 return _dedup_preserve(parts)
0449
0450 s = str(raw)
0451
0452 for sep in [',', ';', '\n', '\r', '\t']:
0453 s = s.replace(sep, ' ')
0454 tokens = [t.strip() for t in s.split(' ')]
0455 return _dedup_preserve([t for t in tokens if t])
0456
0457
0458 def _dedup_preserve(seq: list[str]) -> list[str]:
0459 seen: set[str] = set()
0460 out: list[str] = []
0461 for t in seq:
0462 key = t.lower() if '@' in t and not t.startswith('@') else t
0463 if key in seen:
0464 continue
0465 seen.add(key)
0466 out.append(t)
0467 return out
0468
0469
0470 def parse_recipients_input(text) -> list[str]:
0471 """Public entry point used by views/engine to normalise user input."""
0472 return _parse_recipient_tokens(text)
0473
0474
0475 def expand_recipients(tokens) -> tuple[list[str], list[str]]:
0476 """Expand @<team> tokens into their member emails.
0477
0478 Returns (emails, unresolved). `emails` is the final dedup'd list of
0479 deliverable addresses. `unresolved` is a list of @<team> tokens that
0480 didn't resolve — callers should log but not fail on those.
0481 """
0482 final: list[str] = []
0483 unresolved: list[str] = []
0484 for tok in _parse_recipient_tokens(tokens):
0485 if tok.startswith('@'):
0486 team = get_team(tok)
0487 if team is None or not team.content.strip():
0488 unresolved.append(tok)
0489 continue
0490 final.extend(_parse_recipient_tokens(team.content))
0491 else:
0492 final.append(tok)
0493 return _dedup_preserve(final), unresolved
0494
0495
0496
0497
0498 def _event_entry_id_for(alarm_entry_id: str) -> str:
0499 if alarm_entry_id.startswith('alarm_'):
0500 return 'event_' + alarm_entry_id[len('alarm_'):]
0501 return 'event_' + alarm_entry_id
0502
0503
0504 _EVENT_INTERNAL_KEYS = {
0505 'entry_id', 'fire_time', 'clear_time', 'last_seen', 'last_notified',
0506 'dedupe_key', 'subject', 'recipients', 'alarm_config_id', 'severity',
0507 }
0508
0509
0510 def _event_to_dict(e: Entry) -> dict:
0511 data = e.data or {}
0512 fire_time = data.get('fire_time')
0513 clear_time = data.get('clear_time')
0514
0515
0516 context_data = {k: v for k, v in data.items()
0517 if k not in _EVENT_INTERNAL_KEYS}
0518 return {
0519 'id': e.id,
0520 'title': e.title,
0521 'entry_id': data.get('entry_id'),
0522 'subject': data.get('subject', ''),
0523 'dedupe_key': data.get('dedupe_key'),
0524 'fire_time': fire_time,
0525 'clear_time': clear_time,
0526 'last_seen': data.get('last_seen'),
0527 'state': 'active' if clear_time is None else 'cleared',
0528 'recipients': data.get('recipients') or [],
0529 'content': e.content,
0530 'data': data,
0531 'context_data': context_data,
0532 'timestamp_created': e.timestamp_created,
0533 'timestamp_modified': e.timestamp_modified,
0534 }