Skip to content

Backup & Recovery

Protect your data with reliable backup strategies.

Backup Types

Type Tool Speed Size Point-in-Time Use Case
Logical pg_dump Slow Large No Small DBs, selective restore
Physical pg_basebackup Fast Full Yes (with WAL) Large DBs, disaster recovery
Continuous WAL archiving Real-time Incremental Yes Production systems

Logical Backups (pg_dump)

Full Database Backup

# SQL format (readable, slowest restore)
pg_dump -h localhost -U postgres sartiq > backup.sql

# Custom format (compressed, parallel restore)
pg_dump -h localhost -U postgres -Fc sartiq > backup.dump

# Directory format (parallel dump and restore)
pg_dump -h localhost -U postgres -Fd -j 4 sartiq -f backup_dir/

Backup Options

# Data only (no schema)
pg_dump -h localhost -U postgres --data-only sartiq > data.sql

# Schema only (no data)
pg_dump -h localhost -U postgres --schema-only sartiq > schema.sql

# Specific tables
pg_dump -h localhost -U postgres -t users -t bookings sartiq > tables.sql

# Exclude tables
pg_dump -h localhost -U postgres --exclude-table='logs*' sartiq > backup.sql

# With compression
pg_dump -h localhost -U postgres sartiq | gzip > backup.sql.gz

Restore from pg_dump

# SQL format
psql -h localhost -U postgres sartiq < backup.sql

# Custom format (parallel restore)
pg_restore -h localhost -U postgres -d sartiq -j 4 backup.dump

# Directory format
pg_restore -h localhost -U postgres -d sartiq -j 4 backup_dir/

# Restore to new database
createdb -h localhost -U postgres sartiq_restored
pg_restore -h localhost -U postgres -d sartiq_restored backup.dump

Restore Options

# Clean (drop objects before recreating)
pg_restore -h localhost -U postgres -d sartiq --clean backup.dump

# Data only
pg_restore -h localhost -U postgres -d sartiq --data-only backup.dump

# Specific tables
pg_restore -h localhost -U postgres -d sartiq -t users backup.dump

# List contents without restoring
pg_restore --list backup.dump

Physical Backups (pg_basebackup)

Basic Backup

# Backup to directory
pg_basebackup -h localhost -U postgres -D /backups/base -Fp -Xs -P

# Options:
# -D: Target directory
# -Fp: Plain format (directory)
# -Xs: Stream WAL during backup
# -P: Show progress

Compressed Backup

# Tar with gzip
pg_basebackup -h localhost -U postgres -D - -Ft | gzip > backup.tar.gz

# With progress
pg_basebackup -h localhost -U postgres -D /backups/base -Fp -Xs -P -c fast

Restore from Base Backup

# 1. Stop PostgreSQL
sudo systemctl stop postgresql

# 2. Clear data directory
rm -rf /var/lib/postgresql/15/main/*

# 3. Restore backup
cp -r /backups/base/* /var/lib/postgresql/15/main/

# 4. Set permissions
chown -R postgres:postgres /var/lib/postgresql/15/main

# 5. Start PostgreSQL
sudo systemctl start postgresql

Point-in-Time Recovery (PITR)

Enable WAL Archiving

# postgresql.conf
archive_mode = on
archive_command = 'cp %p /archive/%f'
# Or with compression
# archive_command = 'gzip < %p > /archive/%f.gz'

wal_level = replica
max_wal_senders = 3

Backup Procedure

# 1. Take base backup
pg_basebackup -h localhost -U postgres -D /backups/base -Fp -Xs -P

# 2. WAL files are continuously archived by archive_command

Restore to Point in Time

# 1. Stop PostgreSQL
sudo systemctl stop postgresql

# 2. Restore base backup
rm -rf /var/lib/postgresql/15/main/*
cp -r /backups/base/* /var/lib/postgresql/15/main/

# 3. Copy archived WAL files
cp /archive/* /var/lib/postgresql/15/main/pg_wal/

# 4. Create recovery signal file
touch /var/lib/postgresql/15/main/recovery.signal

# 5. Configure recovery target in postgresql.conf
# restore_command = 'cp /archive/%f %p'
# recovery_target_time = '2024-01-15 14:30:00'
# recovery_target_action = 'promote'

# 6. Start PostgreSQL
sudo systemctl start postgresql

Recovery Targets

# postgresql.conf recovery options

# Recover to specific time
recovery_target_time = '2024-01-15 14:30:00 UTC'

# Recover to specific transaction
recovery_target_xid = '12345'

# Recover to named restore point
recovery_target_name = 'before_migration'

# Recover to end of available WAL
recovery_target = 'immediate'

Backup Scripts

Daily Backup Script

#!/bin/bash
# /scripts/backup_db.sh

set -e

DB_NAME="sartiq"
DB_USER="postgres"
DB_HOST="localhost"
BACKUP_DIR="/backups"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7

# Create backup
BACKUP_FILE="${BACKUP_DIR}/${DB_NAME}_${DATE}.dump"
pg_dump -h "$DB_HOST" -U "$DB_USER" -Fc "$DB_NAME" > "$BACKUP_FILE"

# Compress
gzip "$BACKUP_FILE"

# Remove old backups
find "$BACKUP_DIR" -name "*.dump.gz" -mtime +$RETENTION_DAYS -delete

# Verify backup
if [ -f "${BACKUP_FILE}.gz" ]; then
    echo "Backup created: ${BACKUP_FILE}.gz"
    ls -lh "${BACKUP_FILE}.gz"
else
    echo "Backup failed!" >&2
    exit 1
fi

Cron Schedule

# Daily backup at 3 AM
0 3 * * * /scripts/backup_db.sh >> /var/log/db_backup.log 2>&1

# Hourly WAL archive check
0 * * * * /scripts/check_wal_archive.sh >> /var/log/wal_archive.log 2>&1

Cloud Provider Backups

AWS RDS

# Automated backups are enabled by default
# Manual snapshot
aws rds create-db-snapshot \
    --db-instance-identifier sartiq-prod \
    --db-snapshot-identifier sartiq-manual-20240115

# Restore from snapshot
aws rds restore-db-instance-from-db-snapshot \
    --db-instance-identifier sartiq-restored \
    --db-snapshot-identifier sartiq-manual-20240115

# Point-in-time restore
aws rds restore-db-instance-to-point-in-time \
    --source-db-instance-identifier sartiq-prod \
    --target-db-instance-identifier sartiq-pitr \
    --restore-time 2024-01-15T14:30:00Z

GCP Cloud SQL

# Create backup
gcloud sql backups create \
    --instance=sartiq-prod \
    --description="Manual backup"

# List backups
gcloud sql backups list --instance=sartiq-prod

# Restore from backup
gcloud sql backups restore BACKUP_ID \
    --restore-instance=sartiq-restored \
    --backup-instance=sartiq-prod

# Point-in-time restore
gcloud sql instances clone sartiq-prod sartiq-restored \
    --point-in-time=2024-01-15T14:30:00Z

Verification

Test Backup Integrity

#!/bin/bash
# /scripts/verify_backup.sh

BACKUP_FILE=$1
TEST_DB="backup_test_$(date +%s)"

# Create test database
createdb "$TEST_DB"

# Restore backup
pg_restore -d "$TEST_DB" "$BACKUP_FILE"

# Verify table counts
psql -d "$TEST_DB" -c "
    SELECT schemaname, relname, n_live_tup
    FROM pg_stat_user_tables
    ORDER BY n_live_tup DESC;
"

# Run integrity checks
psql -d "$TEST_DB" -c "
    SELECT COUNT(*) as users FROM users;
    SELECT COUNT(*) as bookings FROM bookings;
"

# Cleanup
dropdb "$TEST_DB"

Automated Backup Testing

# tests/test_backup_restore.py
import subprocess
import psycopg2

def test_backup_restores_successfully():
    """Verify backup can be restored."""
    backup_file = "/backups/latest.dump"
    test_db = "backup_test"

    try:
        # Create test database
        subprocess.run(["createdb", test_db], check=True)

        # Restore
        subprocess.run([
            "pg_restore", "-d", test_db, backup_file
        ], check=True)

        # Verify data
        conn = psycopg2.connect(dbname=test_db)
        cur = conn.cursor()
        cur.execute("SELECT COUNT(*) FROM users")
        count = cur.fetchone()[0]
        assert count > 0, "No users found in backup"

    finally:
        subprocess.run(["dropdb", test_db])

Disaster Recovery

Recovery Checklist

  1. Identify failure point — Database corruption, deletion, or hardware failure?
  2. Determine target time — What point do we need to restore to?
  3. Locate backups — Latest base backup + WAL archives
  4. Prepare recovery environment — New server or same server?
  5. Execute restore — Follow PITR procedure
  6. Verify data integrity — Run verification queries
  7. Update connection strings — Point applications to recovered database
  8. Document incident — Record what happened and how it was resolved

Recovery Time Objectives (RTO)

Backup Strategy RTO Data Loss
Daily pg_dump Hours Up to 24h
pg_basebackup + WAL Minutes Up to last archive
Streaming replication Seconds Near zero
Synchronous replication Seconds Zero

Best Practices

  1. Test restores regularly — A backup you can't restore is worthless
  2. Store backups off-site — Different region/provider
  3. Encrypt backups — Protect sensitive data
  4. Monitor backup jobs — Alert on failures
  5. Document procedures — Runbooks for recovery
  6. Practice disaster recovery — Regular drills
  7. Keep multiple generations — Daily, weekly, monthly retention
  8. Automate everything — Manual backups get forgotten