Skip to content

Metrics

Collect and visualize application metrics with Prometheus.

Metric Types

Type Use Case Example
Counter Cumulative count Total requests
Gauge Current value Active connections
Histogram Distribution Response times
Summary Distribution with quantiles Request duration

Setting Up Prometheus Metrics

Installation

pip install prometheus-client

Basic Setup

from prometheus_client import Counter, Gauge, Histogram, generate_latest, CONTENT_TYPE_LATEST
from fastapi import FastAPI, Response

app = FastAPI()

# Define metrics
REQUEST_COUNT = Counter(
    'http_requests_total',
    'Total HTTP requests',
    ['method', 'endpoint', 'status']
)

REQUEST_LATENCY = Histogram(
    'http_request_duration_seconds',
    'HTTP request latency',
    ['endpoint'],
    buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
)

ACTIVE_REQUESTS = Gauge(
    'http_requests_in_progress',
    'HTTP requests currently in progress',
    ['endpoint']
)

# Metrics endpoint
@app.get("/metrics")
async def metrics():
    return Response(
        content=generate_latest(),
        media_type=CONTENT_TYPE_LATEST
    )

Middleware for Request Metrics

import time
from fastapi import Request

@app.middleware("http")
async def metrics_middleware(request: Request, call_next):
    endpoint = request.url.path
    method = request.method

    ACTIVE_REQUESTS.labels(endpoint=endpoint).inc()
    start_time = time.time()

    try:
        response = await call_next(request)
        status = response.status_code
    except Exception:
        status = 500
        raise
    finally:
        duration = time.time() - start_time
        ACTIVE_REQUESTS.labels(endpoint=endpoint).dec()
        REQUEST_COUNT.labels(method=method, endpoint=endpoint, status=status).inc()
        REQUEST_LATENCY.labels(endpoint=endpoint).observe(duration)

    return response

Custom Business Metrics

# Business metrics
ORDERS_CREATED = Counter(
    'orders_created_total',
    'Total orders created',
    ['payment_method']
)

ORDER_TOTAL = Histogram(
    'order_total_dollars',
    'Order total in dollars',
    buckets=[10, 25, 50, 100, 250, 500, 1000]
)

ACTIVE_USERS = Gauge(
    'active_users',
    'Currently active users'
)

# Usage in code
@app.post("/orders")
async def create_order(order: OrderCreate):
    # Create order...
    ORDERS_CREATED.labels(payment_method=order.payment_method).inc()
    ORDER_TOTAL.observe(order.total)
    return order

# Track active users
async def user_connected(user_id: int):
    ACTIVE_USERS.inc()

async def user_disconnected(user_id: int):
    ACTIVE_USERS.dec()

Database Metrics

from sqlalchemy import event

DB_QUERY_DURATION = Histogram(
    'db_query_duration_seconds',
    'Database query duration',
    ['operation']
)

DB_CONNECTIONS = Gauge(
    'db_connections',
    'Database connections',
    ['state']
)

# SQLAlchemy event listeners
@event.listens_for(engine.sync_engine, "before_cursor_execute")
def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
    conn.info['query_start'] = time.time()

@event.listens_for(engine.sync_engine, "after_cursor_execute")
def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
    duration = time.time() - conn.info['query_start']
    operation = statement.split()[0].upper()  # SELECT, INSERT, etc.
    DB_QUERY_DURATION.labels(operation=operation).observe(duration)

# Track connection pool
def update_pool_metrics():
    pool = engine.pool
    DB_CONNECTIONS.labels(state='active').set(pool.checkedout())
    DB_CONNECTIONS.labels(state='idle').set(pool.checkedin())

Cache Metrics

CACHE_HITS = Counter('cache_hits_total', 'Cache hits', ['cache'])
CACHE_MISSES = Counter('cache_misses_total', 'Cache misses', ['cache'])

async def get_cached(key: str, fetch_fn):
    value = await cache.get(key)
    if value is not None:
        CACHE_HITS.labels(cache='redis').inc()
        return value

    CACHE_MISSES.labels(cache='redis').inc()
    value = await fetch_fn()
    await cache.set(key, value)
    return value

External API Metrics

EXTERNAL_API_REQUESTS = Counter(
    'external_api_requests_total',
    'External API requests',
    ['service', 'endpoint', 'status']
)

EXTERNAL_API_LATENCY = Histogram(
    'external_api_duration_seconds',
    'External API latency',
    ['service']
)

async def call_external_api(service: str, endpoint: str, **kwargs):
    start = time.time()
    try:
        response = await httpx.get(endpoint, **kwargs)
        status = response.status_code
        return response
    except Exception as e:
        status = 'error'
        raise
    finally:
        EXTERNAL_API_REQUESTS.labels(
            service=service,
            endpoint=endpoint,
            status=status
        ).inc()
        EXTERNAL_API_LATENCY.labels(service=service).observe(time.time() - start)

Prometheus Configuration

# prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

scrape_configs:
  - job_name: 'api'
    static_configs:
      - targets: ['api:8000']
    metrics_path: /metrics

  - job_name: 'postgres'
    static_configs:
      - targets: ['postgres-exporter:9187']

Grafana Dashboards

Essential Panels

Request Rate:

rate(http_requests_total[5m])

Error Rate:

rate(http_requests_total{status=~"5.."}[5m])
/ rate(http_requests_total[5m])

Latency P95:

histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))

Requests in Progress:

http_requests_in_progress

Dashboard JSON Example

{
  "title": "API Dashboard",
  "panels": [
    {
      "title": "Request Rate",
      "type": "graph",
      "targets": [
        {
          "expr": "sum(rate(http_requests_total[5m])) by (endpoint)",
          "legendFormat": "{{endpoint}}"
        }
      ]
    },
    {
      "title": "Error Rate",
      "type": "stat",
      "targets": [
        {
          "expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100"
        }
      ],
      "thresholds": [
        {"value": 0, "color": "green"},
        {"value": 1, "color": "yellow"},
        {"value": 5, "color": "red"}
      ]
    },
    {
      "title": "Latency",
      "type": "graph",
      "targets": [
        {
          "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket[5m]))",
          "legendFormat": "p50"
        },
        {
          "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",
          "legendFormat": "p95"
        },
        {
          "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))",
          "legendFormat": "p99"
        }
      ]
    }
  ]
}

Best Practices

  1. Use labels wisely — High cardinality kills performance
  2. Choose right buckets — Match your SLOs
  3. Name consistentlynoun_verb_unit pattern
  4. Include units_seconds, _bytes, _total
  5. Document metrics — Help text is important
  6. Set up alerts — Metrics are useless without alerting

Common Pitfalls

# Bad: High cardinality label
REQUEST_COUNT.labels(user_id=user.id)  # Don't!

# Good: Use broader categories
REQUEST_COUNT.labels(user_tier=user.tier)

# Bad: Counter for current value
current_users = Counter('current_users', ...)  # Don't!

# Good: Gauge for current value
current_users = Gauge('current_users', ...)

See Also

  • Alerting -- Setting up alert rules based on the metrics collected here
  • Database Monitoring -- PostgreSQL-specific metrics, pg_stat views, and connection pool tracking