Health Probes

import time
import asyncio
from enum import Enum
from datetime import datetime

class ProbeStatus(Enum):
    HEALTHY = "healthy"
    DEGRADED = "degraded"
    UNHEALTHY = "unhealthy"

class HealthProbe:
    def __init__(self, timeout_sec=5, check_interval_sec=10):
        self.timeout_sec = timeout_sec
        self.check_interval_sec = check_interval_sec
        self.db_connected = True
        self.cache_connected = True
        self.request_latency_ms = 50
        self.queue_depth = 100
        self.max_queue = 10000

    async def check_database(self):
        """Simulate database connectivity check"""
        try:
            # In real code: execute SELECT 1 or equivalent
            await asyncio.sleep(0.01)
            return self.db_connected
        except Exception:
            return False

    async def check_cache(self):
        """Simulate cache connectivity check"""
        try:
            await asyncio.sleep(0.005)
            return self.cache_connected
        except Exception:
            return False

    async def check_latency(self):
        """Check if request latency is acceptable"""
        # Latency threshold: 500ms
        return self.request_latency_ms < 500

    async def check_resources(self):
        """Check queue depth and resource usage"""
        queue_utilization = self.queue_depth / self.max_queue
        return queue_utilization < 0.9

    async def liveness_check(self):
        """Full process check (restart if fails)"""
        try:
            db = await asyncio.wait_for(
                self.check_database(), timeout=self.timeout_sec
            )
            if not db:
                return ProbeStatus.UNHEALTHY

            # If database is reachable, process is alive
            return ProbeStatus.HEALTHY
        except asyncio.TimeoutError:
            return ProbeStatus.UNHEALTHY

    async def readiness_check(self):
        """Can serve traffic? (remove from LB if not)"""
        try:
            db = await asyncio.wait_for(
                self.check_database(), timeout=2
            )
            cache = await asyncio.wait_for(
                self.check_cache(), timeout=2
            )
            latency_ok = await self.check_latency()
            resources_ok = await self.check_resources()

            if not db or not cache:
                return ProbeStatus.DEGRADED

            if not latency_ok or not resources_ok:
                return ProbeStatus.DEGRADED

            return ProbeStatus.HEALTHY
        except asyncio.TimeoutError:
            return ProbeStatus.DEGRADED

    async def get_health_report(self):
        """Full health report for /health endpoint"""
        liveness = await self.liveness_check()
        readiness = await self.readiness_check()

        return {
            "timestamp": datetime.now().isoformat(),
            "liveness": liveness.value,
            "readiness": readiness.value,
            "uptime_seconds": 3600,
            "dependencies": {
                "database": "connected" if self.db_connected else "disconnected",
                "cache": "connected" if self.cache_connected else "disconnected"
            },
            "metrics": {
                "request_latency_ms": self.request_latency_ms,
                "queue_depth": self.queue_depth,
                "queue_utilization_percent": (self.queue_depth / self.max_queue) * 100
            }
        }

# Example usage
async def main():
    probe = HealthProbe()

    # Simulate normal operation
    report = await probe.get_health_report()
    print("Health Report:", report)

    # Simulate degradation
    probe.db_connected = False
    report = await probe.get_health_report()
    print("Degraded Report:", report)

asyncio.run(main())

package main

import (
    "fmt"
    "net/http"
    "time"
)

type ProbeStatus string

const (
    Healthy   ProbeStatus = "healthy"
    Degraded  ProbeStatus = "degraded"
    Unhealthy ProbeStatus = "unhealthy"
)

type HealthProbe struct {
    dbConnected       bool
    cacheConnected    bool
    requestLatencyMs  int64
    queueDepth        int64
    maxQueue          int64
    lastFailureTime   time.Time
}

func NewHealthProbe() *HealthProbe {
    return &HealthProbe{
        dbConnected:      true,
        cacheConnected:   true,
        requestLatencyMs: 50,
        queueDepth:       100,
        maxQueue:         10000,
    }
}

func (hp *HealthProbe) CheckDatabase() bool {
    // In real code: execute SELECT 1
    return hp.dbConnected
}

func (hp *HealthProbe) CheckCache() bool {
    // In real code: ping Redis/Memcached
    return hp.cacheConnected
}

func (hp *HealthProbe) CheckLatency() bool {
    return hp.requestLatencyMs < 500
}

func (hp *HealthProbe) CheckResources() bool {
    utilization := float64(hp.queueDepth) / float64(hp.maxQueue)
    return utilization < 0.9
}

func (hp *HealthProbe) LivenessCheck() ProbeStatus {
    if !hp.CheckDatabase() {
        hp.lastFailureTime = time.Now()
        return Unhealthy
    }
    return Healthy
}

func (hp *HealthProbe) ReadinessCheck() ProbeStatus {
    if !hp.CheckDatabase() || !hp.CheckCache() {
        return Degraded
    }

    if !hp.CheckLatency() || !hp.CheckResources() {
        return Degraded
    }

    return Healthy
}

func (hp *HealthProbe) ServeHealth(w http.ResponseWriter, r *http.Request) {
    liveness := hp.LivenessCheck()
    readiness := hp.ReadinessCheck()

    health := map[string]interface{}{
        "timestamp": time.Now().Format(time.RFC3339),
        "liveness":  liveness,
        "readiness": readiness,
        "uptime_seconds": 3600,
        "dependencies": map[string]string{
            "database": map[bool]string{true: "connected", false: "disconnected"}[hp.dbConnected],
            "cache":    map[bool]string{true: "connected", false: "disconnected"}[hp.cacheConnected],
        },
        "metrics": map[string]interface{}{
            "request_latency_ms":   hp.requestLatencyMs,
            "queue_depth":          hp.queueDepth,
            "queue_utilization_pct": (hp.queueDepth * 100) / hp.maxQueue,
        },
    }

    w.Header().Set("Content-Type", "application/json")

    // Return 503 if unhealthy
    if liveness == Unhealthy {
        w.WriteHeader(http.StatusServiceUnavailable)
    } else if readiness == Degraded {
        w.WriteHeader(http.StatusServiceUnavailable)
    } else {
        w.WriteHeader(http.StatusOK)
    }

    fmt.Fprintf(w, "%v\n", health)
}

func main() {
    probe := NewHealthProbe()
    http.HandleFunc("/health", probe.ServeHealth)
    http.ListenAndServe(":8080", nil)
}

const http = require('http');

const ProbeStatus = {
    HEALTHY: 'healthy',
    DEGRADED: 'degraded',
    UNHEALTHY: 'unhealthy'
};

class HealthProbe {
    constructor() {
        this.dbConnected = true;
        this.cacheConnected = true;
        this.requestLatencyMs = 50;
        this.queueDepth = 100;
        this.maxQueue = 10000;
    }

    checkDatabase() {
        // In real code: query database
        return this.dbConnected;
    }

    checkCache() {
        // In real code: ping Redis
        return this.cacheConnected;
    }

    checkLatency() {
        return this.requestLatencyMs < 500;
    }

    checkResources() {
        const utilization = this.queueDepth / this.maxQueue;
        return utilization < 0.9;
    }

    livenessCheck() {
        if (!this.checkDatabase()) {
            return ProbeStatus.UNHEALTHY;
        }
        return ProbeStatus.HEALTHY;
    }

    readinessCheck() {
        if (!this.checkDatabase() || !this.checkCache()) {
            return ProbeStatus.DEGRADED;
        }

        if (!this.checkLatency() || !this.checkResources()) {
            return ProbeStatus.DEGRADED;
        }

        return ProbeStatus.HEALTHY;
    }

    getHealthReport() {
        const liveness = this.livenessCheck();
        const readiness = this.readinessCheck();

        return {
            timestamp: new Date().toISOString(),
            liveness,
            readiness,
            uptime_seconds: 3600,
            dependencies: {
                database: this.dbConnected ? 'connected' : 'disconnected',
                cache: this.cacheConnected ? 'connected' : 'disconnected'
            },
            metrics: {
                request_latency_ms: this.requestLatencyMs,
                queue_depth: this.queueDepth,
                queue_utilization_percent: (this.queueDepth / this.maxQueue) * 100
            }
        };
    }
}

const probe = new HealthProbe();

const server = http.createServer((req, res) => {
    if (req.url === '/health') {
        const report = probe.getHealthReport();

        res.setHeader('Content-Type', 'application/json');

        if (report.liveness === ProbeStatus.UNHEALTHY ||
            report.readiness === ProbeStatus.DEGRADED) {
            res.writeHead(503);
        } else {
            res.writeHead(200);
        }

        res.end(JSON.stringify(report, null, 2));
    } else {
        res.writeHead(404);
        res.end('Not found');
    }
});

server.listen(8080, () => {
    console.log('Health probe listening on port 8080');
});

TL;DR

Learning Objectives

Motivating Scenario

Core Concepts

Practical Example

When to Use vs. When NOT to Use

Patterns and Pitfalls

Design Review Checklist

Self-Check

Next Steps

References

Health Probes

TL;DR​

Learning Objectives​

Motivating Scenario​

Core Concepts​

Practical Example​

When to Use vs. When NOT to Use​

Patterns and Pitfalls​

Design Review Checklist​

Self-Check​

Next Steps​

References​

TL;DR

Learning Objectives

Motivating Scenario

Core Concepts

Practical Example

When to Use vs. When NOT to Use

Patterns and Pitfalls

Design Review Checklist

Self-Check

Next Steps

References