Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-25 08:29:11

0001 #!/bin/bash
0002 # Require bash (fail fast if invoked under another shell)
0003 if [ -z "${BASH_VERSION:-}" ]; then
0004     echo "This script must be run with bash. Try: bash $0 \"$@\"" >&2
0005     exit 1
0006 fi
0007 #
0008 # SWF Monitor Deployment Script
0009 # Usage: deploy-swf-monitor.sh [tag|branch] <reference>
0010 #
0011 # This script is used to deploy the specified branch to the production apache
0012 # system service on pandaserver02.sdcc.bnl.gov
0013 #
0014 # Examples:
0015 #   deploy-swf-monitor.sh branch infra/baseline-v18
0016 #   deploy-swf-monitor.sh branch main
0017 #   deploy-swf-monitor.sh tag tagName        (tags not in use as of 9/2025)
0018 #
0019 # See docs/PRODUCTION_DEPLOYMENT.md for complete documentation
0020 
0021 set -e
0022 
0023 DEPLOY_ROOT="/opt/swf-monitor"
0024 REPO_URL="https://github.com/BNLNPPS/swf-monitor.git"
0025 CURRENT_USER="wenauseic"
0026 
0027 log() {
0028     echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
0029 }
0030 
0031 if [[ $EUID -ne 0 ]]; then
0032    echo "This script must be run as root (with sudo)" 
0033    exit 1
0034 fi
0035 
0036 if [ $# -ne 2 ]; then
0037     echo "Usage: $0 {tag|branch} <reference>"
0038     echo ""
0039     echo "Examples:"
0040     echo "  $0 tag infra/baseline-v17"
0041     echo "  $0 branch infra/baseline-v18"
0042     exit 1
0043 fi
0044 
0045 REF_TYPE="$1"
0046 REF_VALUE="$2"
0047 
0048 case "$REF_TYPE" in
0049     tag)
0050         GIT_REF="$REF_VALUE"
0051         DEPLOY_NAME="$REF_VALUE"
0052         ;;
0053     branch)
0054         GIT_REF="$REF_VALUE"
0055         DEPLOY_NAME="branch-$(echo $REF_VALUE | sed 's|/|-|g')"
0056         ;;
0057     *)
0058         echo "Invalid reference type: $REF_TYPE. Use 'tag' or 'branch'"
0059         exit 1
0060         ;;
0061 esac
0062 
0063 RELEASE_DIR="$DEPLOY_ROOT/releases/$DEPLOY_NAME"
0064 
0065 log "Starting deployment of $REF_TYPE '$REF_VALUE' to '$DEPLOY_NAME'"
0066 
0067 # Verify the branch/tag exists before proceeding
0068 log "Verifying $REF_TYPE '$REF_VALUE' exists in repository..."
0069 if ! git ls-remote --exit-code --heads --tags "$REPO_URL" "$GIT_REF" >/dev/null 2>&1; then
0070     echo "ERROR: $REF_TYPE '$REF_VALUE' does not exist in repository $REPO_URL"
0071     exit 1
0072 fi
0073 
0074 # Create release directory
0075 if [ -d "$RELEASE_DIR" ]; then
0076     log "Release directory exists, removing..."
0077     rm -rf "$RELEASE_DIR"
0078 fi
0079 
0080 log "Creating release directory: $RELEASE_DIR"
0081 mkdir -p "$RELEASE_DIR"
0082 
0083 # Clone repository
0084 log "Cloning repository..."
0085 cd "$RELEASE_DIR"
0086 git clone --single-branch --branch "$GIT_REF" "$REPO_URL" . || {
0087     echo "ERROR: Failed to clone $REF_TYPE '$REF_VALUE'"
0088     rm -rf "$RELEASE_DIR"
0089     exit 1
0090 }
0091 
0092 log "Checked out: $(git rev-parse --short HEAD) - $(git log -1 --pretty=format:'%s')"
0093 
0094 # Copy development virtual environment
0095 log "Copying development virtual environment..."
0096 cp -r /eic/u/wenauseic/github/swf-testbed/.venv .venv
0097 source .venv/bin/activate
0098 
0099 # Verify production environment file exists
0100 if [ ! -f "$DEPLOY_ROOT/config/env/production.env" ]; then
0101     echo "ERROR: Production environment file not found at $DEPLOY_ROOT/config/env/production.env"
0102     echo "Please create this file with appropriate production configuration before deploying."
0103     echo "See docs/PRODUCTION_DEPLOYMENT.md for configuration details."
0104     exit 1
0105 fi
0106 
0107 # Validate subpath configuration for Apache deployment
0108 log "Validating subpath configuration..."
0109 if grep -q "WSGIScriptAlias /swf-monitor" /etc/httpd/conf.d/swf-monitor.conf 2>/dev/null; then
0110     if ! grep -q "SWF_DEPLOYMENT_SUBPATH=/swf-monitor" "$DEPLOY_ROOT/config/env/production.env"; then
0111         echo "ERROR: Apache configured for /swf-monitor subpath but production.env missing subpath configuration"
0112         echo "Required variables in production.env:"
0113         echo "  SWF_DEPLOYMENT_SUBPATH=/swf-monitor"
0114         echo "  SWF_STATIC_URL_BASE=/swf-monitor/static/"
0115         echo "  SWF_LOGIN_REDIRECT=/swf-monitor/home/"
0116         echo "See docs/PRODUCTION_DEPLOYMENT.md for complete configuration details."
0117         exit 1
0118     fi
0119     log "✅ Subpath configuration validated"
0120 else
0121     log "ℹ️ No subpath deployment detected in Apache config"
0122 fi
0123 
0124 # Link shared resources
0125 # NOTE: .env is NOT deployed from git (it's in .gitignore for security).
0126 # Production uses: $DEPLOY_ROOT/config/env/production.env
0127 # To update production .env settings, edit that file directly.
0128 log "Linking shared resources..."
0129 ln -sf "$DEPLOY_ROOT/shared/logs" "$RELEASE_DIR/logs"
0130 ln -sf "$DEPLOY_ROOT/config/env/production.env" "$RELEASE_DIR/.env"
0131 log "  .env source: $DEPLOY_ROOT/config/env/production.env (edit this file for config changes)"
0132 
0133 # Shared caches — writable by both httpd (WSGI) and service users
0134 mkdir -p "$DEPLOY_ROOT/shared/hf_cache"
0135 chmod 777 "$DEPLOY_ROOT/shared/hf_cache"
0136 grep -q '^HF_HOME=' "$DEPLOY_ROOT/config/env/production.env" 2>/dev/null || \
0137     echo "HF_HOME=$DEPLOY_ROOT/shared/hf_cache" >> "$DEPLOY_ROOT/config/env/production.env"
0138 
0139 # Install WSGI module configuration if it exists in repository
0140 if [ -f "$RELEASE_DIR/config/apache/20-swf-monitor-wsgi.conf" ]; then
0141     log "Installing WSGI module configuration..."
0142     cp "$RELEASE_DIR/config/apache/20-swf-monitor-wsgi.conf" /etc/httpd/conf.modules.d/20-swf-monitor-wsgi.conf
0143 fi
0144 
0145 # SSL certificate is already present from git clone if it exists in the repo
0146 if [ -f "$RELEASE_DIR/full-chain.pem" ]; then
0147     log "SSL certificate found in deployment..."
0148 fi
0149 
0150 # Collect static files
0151 log "Collecting static files..."
0152 cd "$RELEASE_DIR/src"
0153 export DJANGO_SETTINGS_MODULE=swf_monitor_project.settings
0154 python manage.py collectstatic --noinput --clear --settings=swf_monitor_project.settings
0155 
0156 # Copy static files to shared location
0157 log "Copying static files to shared location..."
0158 rsync -a --delete "$RELEASE_DIR/src/staticfiles/" "$DEPLOY_ROOT/shared/static/"
0159 
0160 # Pre-migration: rename 'emi' app to 'pcs' in migration history and content types
0161 # This is idempotent — safe to run even after the rename is complete.
0162 log "Pre-migration: updating app label emi → pcs in migration history..."
0163 python -c "
0164 import django, os
0165 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'swf_monitor_project.settings')
0166 django.setup()
0167 from django.db import connection
0168 with connection.cursor() as c:
0169     c.execute(\"UPDATE django_migrations SET app = 'pcs' WHERE app = 'emi'\")
0170     c.execute(\"UPDATE django_content_type SET app_label = 'pcs' WHERE app_label = 'emi'\")
0171 print('  Done (emi → pcs in django_migrations and django_content_type)')
0172 " 2>/dev/null || log "  (no emi records to update — already migrated)"
0173 
0174 # Run database migrations
0175 log "Running database migrations..."
0176 python manage.py migrate --settings=swf_monitor_project.settings
0177 
0178 # Set ownership
0179 log "Setting ownership..."
0180 chown -R "$CURRENT_USER:eic" "$DEPLOY_ROOT"
0181 
0182 # Update current symlink
0183 log "Updating current symlink..."
0184 ln -sfn "$RELEASE_DIR" "$DEPLOY_ROOT/current"
0185 
0186 # Apache virtual-host conf sync from repo canonical.
0187 # Repo apache-swf-monitor.conf is the source of truth; live is whatever was
0188 # last installed. This catches conf changes in the same deploy that brings
0189 # the code change they go with — no more 6-week drift like dce7abf.
0190 APACHE_CONF_SRC="$RELEASE_DIR/apache-swf-monitor.conf"
0191 APACHE_CONF_DST="/etc/httpd/conf.d/swf-monitor.conf"
0192 if [ -f "$APACHE_CONF_SRC" ]; then
0193     if ! cmp -s "$APACHE_CONF_SRC" "$APACHE_CONF_DST"; then
0194         TS=$(date +%s)
0195         log "Apache conf differs from repo — syncing (backup: ${APACHE_CONF_DST}.bak.$TS)"
0196         cp "$APACHE_CONF_DST" "${APACHE_CONF_DST}.bak.$TS"
0197         install -o root -g root -m 644 "$APACHE_CONF_SRC" "$APACHE_CONF_DST"
0198         if ! httpd -t >/dev/null 2>&1; then
0199             log "ERROR: httpd -t failed after conf sync — rolling back"
0200             cp "${APACHE_CONF_DST}.bak.$TS" "$APACHE_CONF_DST"
0201             httpd -t
0202             exit 1
0203         fi
0204     else
0205         log "Apache conf matches repo canonical — no sync needed"
0206     fi
0207 fi
0208 
0209 # Apache: reload if running, start if not. Reload is required on every deploy
0210 # to recycle mod_wsgi daemon processes so they pick up new Python code; any
0211 # conf sync just performed rides along on the same reload.
0212 if systemctl is-active httpd >/dev/null 2>&1; then
0213     log "Reloading Apache (graceful) to pick up new code..."
0214     systemctl reload httpd
0215 else
0216     log "Apache was not running — starting..."
0217     systemctl start httpd
0218 fi
0219 
0220 # ASGI worker (MCP endpoint) — uvicorn loads code once at startup and does
0221 # not re-read on file change, so new Python code requires a restart. Bot
0222 # code restarts below follow the same logic with more selective detection;
0223 # here we always restart since the ASGI worker imports the full Django app.
0224 if systemctl is-enabled swf-monitor-mcp-asgi.service >/dev/null 2>&1; then
0225     log "Restarting ASGI worker (swf-monitor-mcp-asgi) to pick up new code..."
0226     systemctl restart swf-monitor-mcp-asgi.service
0227 fi
0228 
0229 # Detect bot code changes before health check (bots restart after)
0230 PREV_RELEASE=$(ls -1t "$DEPLOY_ROOT/releases" | sed -n '2p')
0231 PANDA_BOT_CHANGED=false
0232 TESTBED_BOT_CHANGED=false
0233 
0234 if systemctl is-enabled swf-panda-bot.service >/dev/null 2>&1; then
0235     if [ -z "$PREV_RELEASE" ]; then
0236         PANDA_BOT_CHANGED=true
0237     elif ! diff -rq "$DEPLOY_ROOT/releases/$PREV_RELEASE/src/monitor_app/panda" \
0238                      "$RELEASE_DIR/src/monitor_app/panda" >/dev/null 2>&1; then
0239         PANDA_BOT_CHANGED=true
0240     elif ! diff -q "$DEPLOY_ROOT/releases/$PREV_RELEASE/src/monitor_app/management/commands/panda_bot.py" \
0241                     "$RELEASE_DIR/src/monitor_app/management/commands/panda_bot.py" >/dev/null 2>&1; then
0242         PANDA_BOT_CHANGED=true
0243     fi
0244 fi
0245 
0246 if systemctl is-enabled swf-testbed-bot.service >/dev/null 2>&1; then
0247     if [ -z "$PREV_RELEASE" ]; then
0248         TESTBED_BOT_CHANGED=true
0249     elif ! diff -rq "$DEPLOY_ROOT/releases/$PREV_RELEASE/src/monitor_app/testbed_bot" \
0250                      "$RELEASE_DIR/src/monitor_app/testbed_bot" >/dev/null 2>&1; then
0251         TESTBED_BOT_CHANGED=true
0252     elif ! diff -q "$DEPLOY_ROOT/releases/$PREV_RELEASE/src/monitor_app/management/commands/testbed_bot.py" \
0253                     "$RELEASE_DIR/src/monitor_app/management/commands/testbed_bot.py" >/dev/null 2>&1; then
0254         TESTBED_BOT_CHANGED=true
0255     fi
0256 fi
0257 
0258 # Health check — confirm Apache is serving before restarting bots
0259 log "Performing health check..."
0260 HEALTH_URL="https://pandaserver02.sdcc.bnl.gov/swf-monitor/api/"
0261 HTTP_STATUS=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" || echo "000")
0262 
0263 if [ "$HTTP_STATUS" = "200" ]; then
0264     log "✅ Health check PASSED - Application responding (HTTP $HTTP_STATUS)"
0265 else
0266     log "❌ Health check FAILED - Application not responding (HTTP $HTTP_STATUS)"
0267     echo "WARNING: Deployment completed but application may not be working correctly"
0268     echo "Check Apache error logs: sudo tail -f /var/log/httpd/error_log"
0269 fi
0270 
0271 # Restart bots AFTER health check confirms Apache is up
0272 if [ "$PANDA_BOT_CHANGED" = true ]; then
0273     log "Bot code changed — restarting PanDA Mattermost bot..."
0274     systemctl restart swf-panda-bot.service
0275 else
0276     log "Bot code unchanged — skipping PanDA bot restart"
0277 fi
0278 
0279 if [ "$TESTBED_BOT_CHANGED" = true ]; then
0280     log "Bot code changed — restarting Testbed Mattermost bot..."
0281     systemctl restart swf-testbed-bot.service
0282 else
0283     log "Bot code unchanged — skipping Testbed bot restart"
0284 fi
0285 
0286 # Cleanup old releases (keep last 5)
0287 log "Cleaning up old releases..."
0288 cd "$DEPLOY_ROOT/releases"
0289 ls -1t | tail -n +6 | xargs rm -rf 2>/dev/null || true
0290 
0291 log "Deployment completed successfully!"
0292 log "Active release: $DEPLOY_NAME"
0293 log "Git commit: $(cd $RELEASE_DIR && git rev-parse --short HEAD)"
0294 
0295 # Show status
0296 log "Current deployment status:"
0297 echo "  Release: $DEPLOY_NAME"
0298 echo "  Path: $RELEASE_DIR"
0299 echo "  Current: $(readlink $DEPLOY_ROOT/current)"
0300 
0301 # Check if deploy script in repo has diverged from production copy
0302 if [ -f "$RELEASE_DIR/deploy-swf-monitor.sh" ]; then
0303     if ! diff -q "$DEPLOY_ROOT/bin/deploy-swf-monitor.sh" "$RELEASE_DIR/deploy-swf-monitor.sh" >/dev/null 2>&1; then
0304         echo ""
0305         echo "NOTE: deploy-swf-monitor.sh in repo differs from $DEPLOY_ROOT/bin/deploy-swf-monitor.sh"
0306         echo "Review and update the production copy manually."
0307     fi
0308 fi