Soft Deletes¶
Keep deleted records in the database, hidden from normal queries.
Basic Implementation¶
Schema¶
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) NOT NULL,
name VARCHAR(255),
created_at TIMESTAMP DEFAULT NOW(),
deleted_at TIMESTAMP NULL -- NULL = active, non-NULL = deleted
);
-- Unique constraint only on active records
CREATE UNIQUE INDEX idx_users_email_active
ON users(email)
WHERE deleted_at IS NULL;
-- Index for soft-deleted records (if needed)
CREATE INDEX idx_users_deleted
ON users(deleted_at)
WHERE deleted_at IS NOT NULL;
SQLAlchemy Model¶
from datetime import datetime
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declared_attr
class SoftDeleteMixin:
"""Mixin for soft delete functionality."""
deleted_at = Column(DateTime, nullable=True, index=True)
@property
def is_deleted(self) -> bool:
return self.deleted_at is not None
def soft_delete(self):
"""Mark record as deleted."""
self.deleted_at = datetime.utcnow()
def restore(self):
"""Restore deleted record."""
self.deleted_at = None
class User(Base, SoftDeleteMixin):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
email = Column(String(255), nullable=False)
name = Column(String(255))
created_at = Column(DateTime, default=datetime.utcnow)
Query Patterns¶
Default: Exclude Deleted¶
from sqlalchemy import select
# Active users only
stmt = select(User).where(User.deleted_at.is_(None))
result = await db.execute(stmt)
users = result.scalars().all()
Global Filter¶
Apply soft delete filter to all queries automatically.
from sqlalchemy.orm import Query
class SoftDeleteQuery(Query):
"""Query class that filters soft-deleted records by default."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _get_base_query(self):
# Add soft delete filter
return super()._get_base_query().filter(
self._entity_from_pre_ent_zero().class_.deleted_at.is_(None)
)
def with_deleted(self):
"""Include soft-deleted records."""
return super()._get_base_query()
def only_deleted(self):
"""Only soft-deleted records."""
return super()._get_base_query().filter(
self._entity_from_pre_ent_zero().class_.deleted_at.isnot(None)
)
Repository Pattern¶
from typing import Generic, TypeVar, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
T = TypeVar("T")
class SoftDeleteRepository(Generic[T]):
"""Repository with soft delete support."""
def __init__(self, db: AsyncSession, model: type[T]):
self.db = db
self.model = model
def _base_query(self, include_deleted: bool = False):
"""Base query with optional soft delete filter."""
stmt = select(self.model)
if not include_deleted:
stmt = stmt.where(self.model.deleted_at.is_(None))
return stmt
async def get(self, id: int, include_deleted: bool = False) -> Optional[T]:
"""Get record by ID."""
stmt = self._base_query(include_deleted).where(self.model.id == id)
result = await self.db.execute(stmt)
return result.scalar_one_or_none()
async def list(self, include_deleted: bool = False) -> list[T]:
"""List all records."""
stmt = self._base_query(include_deleted)
result = await self.db.execute(stmt)
return result.scalars().all()
async def delete(self, record: T) -> None:
"""Soft delete a record."""
record.deleted_at = datetime.utcnow()
await self.db.flush()
async def restore(self, record: T) -> None:
"""Restore a soft-deleted record."""
record.deleted_at = None
await self.db.flush()
async def hard_delete(self, record: T) -> None:
"""Permanently delete a record."""
await self.db.delete(record)
await self.db.flush()
Index Strategies¶
Partial Index on Active Records¶
Most queries target active records only.
-- Faster lookups for active users
CREATE INDEX idx_users_email_active
ON users(email)
WHERE deleted_at IS NULL;
-- Query uses this index
SELECT * FROM users WHERE email = 'test@example.com' AND deleted_at IS NULL;
Unique Constraint on Active Records¶
Allow reusing values after deletion.
-- Email unique among active users only
CREATE UNIQUE INDEX idx_users_email_unique_active
ON users(email)
WHERE deleted_at IS NULL;
-- Now possible:
-- 1. User with email@example.com (deleted_at = NULL)
-- 2. Delete user (deleted_at = '2024-01-01')
-- 3. Create new user with email@example.com (deleted_at = NULL)
Composite Index for Admin Views¶
-- For "show all deleted users" view
CREATE INDEX idx_users_deleted_at
ON users(deleted_at)
WHERE deleted_at IS NOT NULL;
Cascading Soft Deletes¶
Option 1: Application-Level Cascade¶
async def delete_user(db: AsyncSession, user_id: int):
"""Soft delete user and related records."""
user = await db.get(User, user_id)
if not user:
raise NotFoundError()
now = datetime.utcnow()
# Soft delete user
user.deleted_at = now
# Cascade to bookings
await db.execute(
update(Booking)
.where(Booking.user_id == user_id)
.where(Booking.deleted_at.is_(None))
.values(deleted_at=now)
)
await db.commit()
Option 2: Database Trigger¶
CREATE OR REPLACE FUNCTION cascade_soft_delete()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.deleted_at IS NOT NULL AND OLD.deleted_at IS NULL THEN
-- User was soft deleted, cascade to bookings
UPDATE bookings
SET deleted_at = NEW.deleted_at
WHERE user_id = NEW.id
AND deleted_at IS NULL;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER user_soft_delete_cascade
AFTER UPDATE OF deleted_at ON users
FOR EACH ROW
EXECUTE FUNCTION cascade_soft_delete();
Querying with JOINs¶
Problem: JOINs Include Deleted Records¶
-- This might return deleted bookings!
SELECT u.name, b.date
FROM users u
JOIN bookings b ON b.user_id = u.id
WHERE u.deleted_at IS NULL;
Solution: Filter Both Tables¶
SELECT u.name, b.date
FROM users u
JOIN bookings b ON b.user_id = u.id
WHERE u.deleted_at IS NULL
AND b.deleted_at IS NULL;
SQLAlchemy with Relationships¶
from sqlalchemy.orm import relationship
class User(Base, SoftDeleteMixin):
__tablename__ = "users"
# Relationship that filters deleted
bookings = relationship(
"Booking",
primaryjoin="and_(User.id==Booking.user_id, Booking.deleted_at.is_(None))",
lazy="selectin"
)
# All bookings including deleted (for admin)
all_bookings = relationship(
"Booking",
primaryjoin="User.id==Booking.user_id",
lazy="selectin"
)
Hard Delete for Compliance¶
Sometimes you must truly delete data (GDPR, etc.).
async def gdpr_delete_user(db: AsyncSession, user_id: int):
"""Permanently delete user data for GDPR compliance."""
# Hard delete personal data
await db.execute(
delete(User).where(User.id == user_id)
)
# Anonymize related records instead of deleting
await db.execute(
update(Booking)
.where(Booking.user_id == user_id)
.values(
user_id=None,
customer_name="[REDACTED]",
customer_email="[REDACTED]"
)
)
await db.commit()
Migration Pattern¶
Adding Soft Delete to Existing Table¶
"""Add soft delete to users table
Revision ID: abc123
"""
from alembic import op
import sqlalchemy as sa
def upgrade():
# Add column
op.add_column('users', sa.Column('deleted_at', sa.DateTime(), nullable=True))
# Add partial index
op.create_index(
'idx_users_email_active',
'users',
['email'],
unique=True,
postgresql_where=sa.text('deleted_at IS NULL')
)
# Drop old unique constraint
op.drop_constraint('uq_users_email', 'users', type_='unique')
def downgrade():
# Verify no soft-deleted records
# (or decide how to handle them)
op.drop_index('idx_users_email_active')
op.create_unique_constraint('uq_users_email', 'users', ['email'])
op.drop_column('users', 'deleted_at')
Best Practices¶
- Use partial indexes — Most queries target active records
- Filter in JOINs — Don't forget related tables
- Consider compliance — Some data must be truly deleted
- Add
deleted_by— Track who deleted if auditing needed - Archive old deleted records — Move to separate table periodically
- Test restore flows — Make sure restoration actually works
- Document behavior — Team should know about soft deletes