File indexing completed on 2026-06-26 08:40:22
0001
0002
0003 import django.db.models.deletion
0004 import monitor_app.models
0005 import time
0006 import uuid
0007 from django.db import migrations, models
0008
0009
0010 ALARMS_CONTEXT = 'swf-alarms'
0011 TEAMS_CONTEXT = 'teams'
0012
0013 ALARM_CONFIGS = [
0014 {
0015 'title': "PanDA task failure rate - Sakib's tasks",
0016 'data': {
0017 'entry_id': 'alarm_panda_failure_rate_sakib',
0018 'enabled': True,
0019 'severity': 'warning',
0020 'recipients': ['@prodops'],
0021 'renotification_window_hours': 24,
0022 'params': {
0023 'threshold': 0.03,
0024 'since_days': 1,
0025 'username': 'Sakib Rahman',
0026 'min_terminal_jobs': 5,
0027 },
0028 },
0029 'content': (
0030 "Alert on PanDA tasks owned by Sakib Rahman whose computed "
0031 "failure rate exceeds the configured threshold over the "
0032 "configured window. Threshold, window, and minimum terminal "
0033 "jobs are in the Check params below.\n"
0034 "\n"
0035 "Dashboard: https://pandaserver02.sdcc.bnl.gov/swf-monitor/alarms/\n"
0036 ),
0037 },
0038 {
0039 'title': 'PanDA task failure rate - catch-all',
0040 'data': {
0041 'entry_id': 'alarm_panda_failure_rate_eic_all',
0042 'enabled': True,
0043 'severity': 'info',
0044 'recipients': ['wenaus@gmail.com'],
0045 'renotification_window_hours': 48,
0046 'params': {
0047 'threshold': 0.05,
0048 'since_days': 1,
0049 'min_terminal_jobs': 5,
0050 },
0051 },
0052 'content': (
0053 "Catch-all alert on any PanDA task whose computed failure rate "
0054 "exceeds the configured threshold over the configured window. "
0055 "Torre-only tuning channel for shaping future per-owner "
0056 "alarms. Threshold and window live in the Check params below.\n"
0057 ),
0058 },
0059 ]
0060
0061 TEAMS = [
0062 {
0063 'name': '@prodops',
0064 'title': 'Production ops',
0065 'content': 'srahman1@bnl.gov wenaus@gmail.com',
0066 'data': {'entry_id': 'team_prodops'},
0067 },
0068 ]
0069
0070
0071 def seed_alarm_entries(apps, schema_editor):
0072 Entry = apps.get_model('monitor_app', 'Entry')
0073 EntryContext = apps.get_model('monitor_app', 'EntryContext')
0074 now = time.time()
0075
0076 alarm_ctx, _ = EntryContext.objects.get_or_create(
0077 name=ALARMS_CONTEXT,
0078 defaults={
0079 'title': 'swf-alarms',
0080 'description': 'Alarm configs, firings, and engine-run records.',
0081 'timestamp_created': now,
0082 'timestamp_modified': now,
0083 },
0084 )
0085 for cfg in ALARM_CONFIGS:
0086 eid = cfg['data']['entry_id']
0087 if Entry.objects.filter(context=alarm_ctx, kind='alarm',
0088 data__entry_id=eid).exists():
0089 continue
0090 Entry.objects.create(
0091 id=str(uuid.uuid4()),
0092 title=cfg['title'],
0093 content=cfg['content'],
0094 kind='alarm',
0095 context=alarm_ctx,
0096 data=cfg['data'],
0097 status='active',
0098 archived=False,
0099 timestamp_created=now,
0100 timestamp_modified=now,
0101 )
0102
0103 teams_ctx, _ = EntryContext.objects.get_or_create(
0104 name=TEAMS_CONTEXT,
0105 defaults={
0106 'title': 'Teams',
0107 'description': (
0108 'Named recipient aliases. Referenced from alarm configs '
0109 'and elsewhere as @<teamname>; resolve at send-time to '
0110 'the whitespace-delimited email list in Entry.content.'
0111 ),
0112 'timestamp_created': now,
0113 'timestamp_modified': now,
0114 },
0115 )
0116 for team in TEAMS:
0117 if Entry.objects.filter(context=teams_ctx, kind='team',
0118 name=team['name']).exists():
0119 continue
0120 Entry.objects.create(
0121 id=str(uuid.uuid4()),
0122 title=team['title'],
0123 content=team['content'],
0124 kind='team',
0125 context=teams_ctx,
0126 name=team['name'],
0127 data=team['data'],
0128 status='active',
0129 archived=False,
0130 timestamp_created=now,
0131 timestamp_modified=now,
0132 )
0133
0134
0135 def unseed_alarm_entries(apps, schema_editor):
0136 Entry = apps.get_model('monitor_app', 'Entry')
0137 EntryContext = apps.get_model('monitor_app', 'EntryContext')
0138 Entry.objects.filter(context__name__in=[ALARMS_CONTEXT, TEAMS_CONTEXT]).delete()
0139 EntryContext.objects.filter(name__in=[ALARMS_CONTEXT, TEAMS_CONTEXT]).delete()
0140
0141
0142 class Migration(migrations.Migration):
0143
0144 dependencies = [
0145 ('monitor_app', '0037_systemstatus_systemstatushistory'),
0146 ]
0147
0148 operations = [
0149 migrations.CreateModel(
0150 name='EntryContext',
0151 fields=[
0152 ('name', models.CharField(max_length=255, primary_key=True, serialize=False)),
0153 ('title', models.CharField(blank=True, default='', max_length=255)),
0154 ('description', models.TextField(blank=True, default='')),
0155 ('timestamp_created', models.FloatField(default=time.time)),
0156 ('timestamp_modified', models.FloatField(default=time.time)),
0157 ('data', models.JSONField(blank=True, default=dict)),
0158 ],
0159 options={
0160 'db_table': 'entry_context',
0161 },
0162 ),
0163 migrations.CreateModel(
0164 name='Entry',
0165 fields=[
0166 ('id', models.CharField(default=monitor_app.models._new_entry_id, max_length=36, primary_key=True, serialize=False)),
0167 ('title', models.CharField(blank=True, default='', max_length=255)),
0168 ('content', models.TextField(blank=True, default='')),
0169 ('kind', models.CharField(max_length=50)),
0170 ('name', models.CharField(blank=True, max_length=255, null=True)),
0171 ('data', models.JSONField(blank=True, null=True)),
0172 ('priority', models.IntegerField(blank=True, null=True)),
0173 ('status', models.CharField(blank=True, max_length=50, null=True)),
0174 ('archived', models.BooleanField(default=False)),
0175 ('timestamp_created', models.FloatField(default=time.time)),
0176 ('timestamp_modified', models.FloatField(default=time.time)),
0177 ('deleted_at', models.FloatField(blank=True, null=True)),
0178 ('parent', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='children', to='monitor_app.entry')),
0179 ('context', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.PROTECT, related_name='entries', to='monitor_app.entrycontext')),
0180 ],
0181 options={
0182 'db_table': 'entry',
0183 },
0184 ),
0185 migrations.CreateModel(
0186 name='EntryVersion',
0187 fields=[
0188 ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
0189 ('version_num', models.IntegerField()),
0190 ('title', models.CharField(blank=True, default='', max_length=255)),
0191 ('content', models.TextField(blank=True, default='')),
0192 ('data', models.JSONField(blank=True, null=True)),
0193 ('changed_by', models.CharField(default='unknown', max_length=100)),
0194 ('timestamp', models.FloatField(default=time.time)),
0195 ('entry', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='versions', to='monitor_app.entry')),
0196 ],
0197 options={
0198 'db_table': 'entry_version',
0199 },
0200 ),
0201 migrations.AddIndex(
0202 model_name='entry',
0203 index=models.Index(fields=['kind', '-timestamp_created'], name='entry_kind_b71943_idx'),
0204 ),
0205 migrations.AddIndex(
0206 model_name='entry',
0207 index=models.Index(fields=['context', 'kind', '-timestamp_created'], name='entry_context_67ee85_idx'),
0208 ),
0209 migrations.AddIndex(
0210 model_name='entry',
0211 index=models.Index(fields=['archived'], name='entry_archive_579b73_idx'),
0212 ),
0213 migrations.AddIndex(
0214 model_name='entry',
0215 index=models.Index(fields=['status'], name='entry_status_cc9d43_idx'),
0216 ),
0217 migrations.AddConstraint(
0218 model_name='entry',
0219 constraint=models.UniqueConstraint(condition=models.Q(('name__isnull', False)), fields=('context', 'name'), name='uniq_context_name'),
0220 ),
0221 migrations.AddIndex(
0222 model_name='entryversion',
0223 index=models.Index(fields=['entry', '-timestamp'], name='entry_versi_entry_i_4ca17c_idx'),
0224 ),
0225 migrations.AddConstraint(
0226 model_name='entryversion',
0227 constraint=models.UniqueConstraint(fields=('entry', 'version_num'), name='uniq_entry_version_num'),
0228 ),
0229 migrations.RunPython(seed_alarm_entries, unseed_alarm_entries),
0230 ]