Skip to content

Soft Deletes

Keep deleted records in the database, hidden from normal queries.

Basic Implementation

Schema

CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    email VARCHAR(255) NOT NULL,
    name VARCHAR(255),
    created_at TIMESTAMP DEFAULT NOW(),
    deleted_at TIMESTAMP NULL  -- NULL = active, non-NULL = deleted
);

-- Unique constraint only on active records
CREATE UNIQUE INDEX idx_users_email_active
ON users(email)
WHERE deleted_at IS NULL;

-- Index for soft-deleted records (if needed)
CREATE INDEX idx_users_deleted
ON users(deleted_at)
WHERE deleted_at IS NOT NULL;

SQLAlchemy Model

from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declared_attr

class SoftDeleteMixin:
    """Mixin for soft delete functionality."""

    deleted_at = Column(DateTime, nullable=True, index=True)

    @property
    def is_deleted(self) -> bool:
        return self.deleted_at is not None

    def soft_delete(self):
        """Mark record as deleted."""
        self.deleted_at = datetime.utcnow()

    def restore(self):
        """Restore deleted record."""
        self.deleted_at = None


class User(Base, SoftDeleteMixin):
    __tablename__ = "users"

    id = Column(Integer, primary_key=True)
    email = Column(String(255), nullable=False)
    name = Column(String(255))
    created_at = Column(DateTime, default=datetime.utcnow)

Query Patterns

Default: Exclude Deleted

from sqlalchemy import select

# Active users only
stmt = select(User).where(User.deleted_at.is_(None))
result = await db.execute(stmt)
users = result.scalars().all()

Global Filter

Apply soft delete filter to all queries automatically.

from sqlalchemy.orm import Query

class SoftDeleteQuery(Query):
    """Query class that filters soft-deleted records by default."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def _get_base_query(self):
        # Add soft delete filter
        return super()._get_base_query().filter(
            self._entity_from_pre_ent_zero().class_.deleted_at.is_(None)
        )

    def with_deleted(self):
        """Include soft-deleted records."""
        return super()._get_base_query()

    def only_deleted(self):
        """Only soft-deleted records."""
        return super()._get_base_query().filter(
            self._entity_from_pre_ent_zero().class_.deleted_at.isnot(None)
        )

Repository Pattern

from typing import Generic, TypeVar, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

T = TypeVar("T")


class SoftDeleteRepository(Generic[T]):
    """Repository with soft delete support."""

    def __init__(self, db: AsyncSession, model: type[T]):
        self.db = db
        self.model = model

    def _base_query(self, include_deleted: bool = False):
        """Base query with optional soft delete filter."""
        stmt = select(self.model)
        if not include_deleted:
            stmt = stmt.where(self.model.deleted_at.is_(None))
        return stmt

    async def get(self, id: int, include_deleted: bool = False) -> Optional[T]:
        """Get record by ID."""
        stmt = self._base_query(include_deleted).where(self.model.id == id)
        result = await self.db.execute(stmt)
        return result.scalar_one_or_none()

    async def list(self, include_deleted: bool = False) -> list[T]:
        """List all records."""
        stmt = self._base_query(include_deleted)
        result = await self.db.execute(stmt)
        return result.scalars().all()

    async def delete(self, record: T) -> None:
        """Soft delete a record."""
        record.deleted_at = datetime.utcnow()
        await self.db.flush()

    async def restore(self, record: T) -> None:
        """Restore a soft-deleted record."""
        record.deleted_at = None
        await self.db.flush()

    async def hard_delete(self, record: T) -> None:
        """Permanently delete a record."""
        await self.db.delete(record)
        await self.db.flush()

Index Strategies

Partial Index on Active Records

Most queries target active records only.

-- Faster lookups for active users
CREATE INDEX idx_users_email_active
ON users(email)
WHERE deleted_at IS NULL;

-- Query uses this index
SELECT * FROM users WHERE email = 'test@example.com' AND deleted_at IS NULL;

Unique Constraint on Active Records

Allow reusing values after deletion.

-- Email unique among active users only
CREATE UNIQUE INDEX idx_users_email_unique_active
ON users(email)
WHERE deleted_at IS NULL;

-- Now possible:
-- 1. User with email@example.com (deleted_at = NULL)
-- 2. Delete user (deleted_at = '2024-01-01')
-- 3. Create new user with email@example.com (deleted_at = NULL)

Composite Index for Admin Views

-- For "show all deleted users" view
CREATE INDEX idx_users_deleted_at
ON users(deleted_at)
WHERE deleted_at IS NOT NULL;

Cascading Soft Deletes

Option 1: Application-Level Cascade

async def delete_user(db: AsyncSession, user_id: int):
    """Soft delete user and related records."""
    user = await db.get(User, user_id)
    if not user:
        raise NotFoundError()

    now = datetime.utcnow()

    # Soft delete user
    user.deleted_at = now

    # Cascade to bookings
    await db.execute(
        update(Booking)
        .where(Booking.user_id == user_id)
        .where(Booking.deleted_at.is_(None))
        .values(deleted_at=now)
    )

    await db.commit()

Option 2: Database Trigger

CREATE OR REPLACE FUNCTION cascade_soft_delete()
RETURNS TRIGGER AS $$
BEGIN
    IF NEW.deleted_at IS NOT NULL AND OLD.deleted_at IS NULL THEN
        -- User was soft deleted, cascade to bookings
        UPDATE bookings
        SET deleted_at = NEW.deleted_at
        WHERE user_id = NEW.id
          AND deleted_at IS NULL;
    END IF;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER user_soft_delete_cascade
AFTER UPDATE OF deleted_at ON users
FOR EACH ROW
EXECUTE FUNCTION cascade_soft_delete();

Querying with JOINs

Problem: JOINs Include Deleted Records

-- This might return deleted bookings!
SELECT u.name, b.date
FROM users u
JOIN bookings b ON b.user_id = u.id
WHERE u.deleted_at IS NULL;

Solution: Filter Both Tables

SELECT u.name, b.date
FROM users u
JOIN bookings b ON b.user_id = u.id
WHERE u.deleted_at IS NULL
  AND b.deleted_at IS NULL;

SQLAlchemy with Relationships

from sqlalchemy.orm import relationship

class User(Base, SoftDeleteMixin):
    __tablename__ = "users"

    # Relationship that filters deleted
    bookings = relationship(
        "Booking",
        primaryjoin="and_(User.id==Booking.user_id, Booking.deleted_at.is_(None))",
        lazy="selectin"
    )

    # All bookings including deleted (for admin)
    all_bookings = relationship(
        "Booking",
        primaryjoin="User.id==Booking.user_id",
        lazy="selectin"
    )

Hard Delete for Compliance

Sometimes you must truly delete data (GDPR, etc.).

async def gdpr_delete_user(db: AsyncSession, user_id: int):
    """Permanently delete user data for GDPR compliance."""
    # Hard delete personal data
    await db.execute(
        delete(User).where(User.id == user_id)
    )

    # Anonymize related records instead of deleting
    await db.execute(
        update(Booking)
        .where(Booking.user_id == user_id)
        .values(
            user_id=None,
            customer_name="[REDACTED]",
            customer_email="[REDACTED]"
        )
    )

    await db.commit()

Migration Pattern

Adding Soft Delete to Existing Table

"""Add soft delete to users table

Revision ID: abc123
"""
from alembic import op
import sqlalchemy as sa

def upgrade():
    # Add column
    op.add_column('users', sa.Column('deleted_at', sa.DateTime(), nullable=True))

    # Add partial index
    op.create_index(
        'idx_users_email_active',
        'users',
        ['email'],
        unique=True,
        postgresql_where=sa.text('deleted_at IS NULL')
    )

    # Drop old unique constraint
    op.drop_constraint('uq_users_email', 'users', type_='unique')


def downgrade():
    # Verify no soft-deleted records
    # (or decide how to handle them)
    op.drop_index('idx_users_email_active')
    op.create_unique_constraint('uq_users_email', 'users', ['email'])
    op.drop_column('users', 'deleted_at')

Best Practices

  1. Use partial indexes — Most queries target active records
  2. Filter in JOINs — Don't forget related tables
  3. Consider compliance — Some data must be truly deleted
  4. Add deleted_by — Track who deleted if auditing needed
  5. Archive old deleted records — Move to separate table periodically
  6. Test restore flows — Make sure restoration actually works
  7. Document behavior — Team should know about soft deletes