Skip to content

SQL Fundamentals

Master the SQL operations you'll use daily with PostgreSQL.

JOINs

Visual Reference

Join Type Diagram Returns
INNER JOIN A ∩ B Only matching rows
LEFT JOIN A + (A ∩ B) All of A, matches from B
RIGHT JOIN (A ∩ B) + B All of B, matches from A
FULL OUTER JOIN A ∪ B All rows from both tables

INNER JOIN

Returns rows only when there's a match in both tables.

-- Users with bookings
SELECT u.name, b.date, b.amount
FROM users u
INNER JOIN bookings b ON b.user_id = u.id;

LEFT JOIN

Returns all rows from left table, with matches from right (NULL if no match).

-- All users, with their bookings (if any)
SELECT u.name, b.date, b.amount
FROM users u
LEFT JOIN bookings b ON b.user_id = u.id;

-- Users WITHOUT bookings
SELECT u.name
FROM users u
LEFT JOIN bookings b ON b.user_id = u.id
WHERE b.id IS NULL;

RIGHT JOIN

Returns all rows from right table. Rarely used (rewrite as LEFT JOIN).

-- Same as LEFT JOIN with tables swapped
SELECT u.name, b.date
FROM bookings b
RIGHT JOIN users u ON b.user_id = u.id;

FULL OUTER JOIN

Returns all rows from both tables, with NULLs where no match.

-- All users and all bookings, matched where possible
SELECT u.name, b.date
FROM users u
FULL OUTER JOIN bookings b ON b.user_id = u.id;

CROSS JOIN

Cartesian product — every row paired with every other row.

-- Generate all time slot combinations
SELECT d.date, t.time_slot
FROM dates d
CROSS JOIN time_slots t;

-- Implicit syntax (comma)
SELECT d.date, t.time_slot
FROM dates d, time_slots t;

Self JOIN

Join a table to itself.

-- Find employees and their managers
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

Common Table Expressions (CTEs)

CTEs make complex queries readable by breaking them into named steps.

Basic CTE

WITH active_users AS (
    SELECT id, name, email
    FROM users
    WHERE status = 'active'
      AND last_login > NOW() - INTERVAL '30 days'
)
SELECT au.name, COUNT(b.id) AS booking_count
FROM active_users au
LEFT JOIN bookings b ON b.user_id = au.id
GROUP BY au.id, au.name;

Multiple CTEs

WITH
monthly_revenue AS (
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        SUM(amount) AS revenue
    FROM bookings
    WHERE status = 'completed'
    GROUP BY DATE_TRUNC('month', created_at)
),
monthly_costs AS (
    SELECT
        DATE_TRUNC('month', date) AS month,
        SUM(amount) AS costs
    FROM expenses
    GROUP BY DATE_TRUNC('month', date)
)
SELECT
    r.month,
    r.revenue,
    c.costs,
    r.revenue - COALESCE(c.costs, 0) AS profit
FROM monthly_revenue r
LEFT JOIN monthly_costs c ON c.month = r.month
ORDER BY r.month;

Recursive CTEs

For hierarchical data (org charts, categories, threads).

-- Get all subcategories of a category
WITH RECURSIVE category_tree AS (
    -- Base case: start with parent category
    SELECT id, name, parent_id, 1 AS depth
    FROM categories
    WHERE id = 5

    UNION ALL

    -- Recursive case: get children
    SELECT c.id, c.name, c.parent_id, ct.depth + 1
    FROM categories c
    INNER JOIN category_tree ct ON c.parent_id = ct.id
    WHERE ct.depth < 10  -- Prevent infinite loops
)
SELECT * FROM category_tree;

Window Functions

Perform calculations across related rows without grouping.

ROW_NUMBER

Assign sequential numbers to rows.

-- Number bookings per user
SELECT
    user_id,
    created_at,
    ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at) AS booking_num
FROM bookings;

-- Get first booking per user
WITH numbered AS (
    SELECT
        *,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at) AS rn
    FROM bookings
)
SELECT * FROM numbered WHERE rn = 1;

RANK and DENSE_RANK

Handle ties differently than ROW_NUMBER.

-- Rank users by total spending
SELECT
    user_id,
    total_spent,
    RANK() OVER (ORDER BY total_spent DESC) AS rank,
    DENSE_RANK() OVER (ORDER BY total_spent DESC) AS dense_rank
FROM (
    SELECT user_id, SUM(amount) AS total_spent
    FROM bookings
    GROUP BY user_id
) t;

-- RANK: 1, 2, 2, 4 (skips 3)
-- DENSE_RANK: 1, 2, 2, 3 (no gaps)

LAG and LEAD

Access previous/next row values.

-- Compare each booking to previous
SELECT
    user_id,
    created_at,
    amount,
    LAG(amount) OVER (PARTITION BY user_id ORDER BY created_at) AS prev_amount,
    amount - LAG(amount) OVER (PARTITION BY user_id ORDER BY created_at) AS diff
FROM bookings;

-- Get next scheduled appointment
SELECT
    user_id,
    date,
    LEAD(date) OVER (PARTITION BY user_id ORDER BY date) AS next_date
FROM appointments;

Running Totals and Averages

-- Running total of revenue
SELECT
    date,
    amount,
    SUM(amount) OVER (ORDER BY date) AS running_total,
    AVG(amount) OVER (ORDER BY date) AS running_avg
FROM daily_revenue;

-- Running total within each month
SELECT
    date,
    amount,
    SUM(amount) OVER (
        PARTITION BY DATE_TRUNC('month', date)
        ORDER BY date
    ) AS monthly_running_total
FROM daily_revenue;

Window Frame Clauses

Control which rows are included in the window.

-- 7-day moving average
SELECT
    date,
    amount,
    AVG(amount) OVER (
        ORDER BY date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS moving_avg_7d
FROM daily_revenue;

-- Include all preceding rows (default for running totals)
SUM(amount) OVER (ORDER BY date ROWS UNBOUNDED PRECEDING)

-- Centered window (3 before, current, 3 after)
AVG(amount) OVER (
    ORDER BY date
    ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
)

Subqueries

Scalar Subqueries

Return a single value.

-- Users with above-average bookings
SELECT name
FROM users
WHERE id IN (
    SELECT user_id
    FROM bookings
    GROUP BY user_id
    HAVING COUNT(*) > (SELECT AVG(cnt) FROM (
        SELECT COUNT(*) AS cnt FROM bookings GROUP BY user_id
    ) t)
);

Correlated Subqueries

Reference the outer query.

-- Users with their latest booking date
SELECT
    u.name,
    (SELECT MAX(created_at)
     FROM bookings b
     WHERE b.user_id = u.id) AS last_booking
FROM users u;

EXISTS

Check if rows exist (often faster than IN).

-- Users who have made at least one booking
SELECT u.name
FROM users u
WHERE EXISTS (
    SELECT 1
    FROM bookings b
    WHERE b.user_id = u.id
);

-- Users who have NEVER booked
SELECT u.name
FROM users u
WHERE NOT EXISTS (
    SELECT 1
    FROM bookings b
    WHERE b.user_id = u.id
);

PostgreSQL-Specific Features

DISTINCT ON

Get first row per group (simpler than window functions).

-- Latest booking per user
SELECT DISTINCT ON (user_id) *
FROM bookings
ORDER BY user_id, created_at DESC;

LATERAL Joins

Subquery can reference columns from preceding tables.

-- Top 3 bookings per user
SELECT u.name, b.date, b.amount
FROM users u
CROSS JOIN LATERAL (
    SELECT date, amount
    FROM bookings
    WHERE user_id = u.id
    ORDER BY amount DESC
    LIMIT 3
) b;

Array Operations

-- Users with specific tags
SELECT * FROM users
WHERE tags @> ARRAY['premium', 'verified'];

-- Unnest array to rows
SELECT unnest(tags) AS tag FROM users WHERE id = 1;

-- Aggregate into array
SELECT user_id, array_agg(tag ORDER BY tag) AS all_tags
FROM user_tags
GROUP BY user_id;

JSONB Operations

-- Query JSON field
SELECT * FROM users
WHERE preferences->>'theme' = 'dark';

-- Deep access
SELECT preferences->'notifications'->>'email' AS email_pref
FROM users;

-- Contains operator
SELECT * FROM products
WHERE metadata @> '{"category": "photo"}';

-- Update JSON field
UPDATE users
SET preferences = preferences || '{"theme": "light"}'
WHERE id = 1;

-- Remove JSON key
UPDATE users
SET preferences = preferences - 'deprecated_key'
WHERE id = 1;

UPSERT (INSERT ... ON CONFLICT)

-- Insert or update
INSERT INTO user_preferences (user_id, key, value)
VALUES (1, 'theme', 'dark')
ON CONFLICT (user_id, key)
DO UPDATE SET value = EXCLUDED.value, updated_at = NOW();

-- Insert or ignore
INSERT INTO tags (name)
VALUES ('new-tag')
ON CONFLICT (name) DO NOTHING;

RETURNING Clause

Get affected rows back.

-- Get inserted row
INSERT INTO users (name, email)
VALUES ('John', 'john@example.com')
RETURNING id, created_at;

-- Get deleted rows
DELETE FROM sessions
WHERE expires_at < NOW()
RETURNING user_id, id;

-- Get updated rows
UPDATE bookings
SET status = 'confirmed'
WHERE status = 'pending' AND created_at < NOW() - INTERVAL '1 hour'
RETURNING *;

Generate Series

Create sequences of values.

-- Generate dates
SELECT generate_series(
    '2024-01-01'::date,
    '2024-12-31'::date,
    '1 day'::interval
) AS date;

-- Fill gaps in time series data
SELECT
    d.date,
    COALESCE(r.revenue, 0) AS revenue
FROM generate_series(
    '2024-01-01'::date,
    '2024-01-31'::date,
    '1 day'::interval
) d(date)
LEFT JOIN daily_revenue r ON r.date = d.date;

Aggregations

Basic Aggregates

SELECT
    COUNT(*) AS total_rows,
    COUNT(DISTINCT user_id) AS unique_users,
    SUM(amount) AS total_amount,
    AVG(amount) AS avg_amount,
    MIN(amount) AS min_amount,
    MAX(amount) AS max_amount
FROM bookings;

GROUP BY with Aggregates

-- Revenue by month
SELECT
    DATE_TRUNC('month', created_at) AS month,
    COUNT(*) AS bookings,
    SUM(amount) AS revenue
FROM bookings
GROUP BY DATE_TRUNC('month', created_at)
ORDER BY month;

HAVING

Filter groups (WHERE filters rows before grouping).

-- Users with more than 5 bookings
SELECT user_id, COUNT(*) AS booking_count
FROM bookings
GROUP BY user_id
HAVING COUNT(*) > 5;

FILTER Clause

Conditional aggregation (cleaner than CASE).

SELECT
    COUNT(*) AS total,
    COUNT(*) FILTER (WHERE status = 'completed') AS completed,
    COUNT(*) FILTER (WHERE status = 'pending') AS pending,
    SUM(amount) FILTER (WHERE status = 'completed') AS completed_revenue
FROM bookings;

GROUPING SETS, CUBE, ROLLUP

Multiple grouping levels in one query.

-- Multiple grouping levels
SELECT
    COALESCE(region, 'All Regions') AS region,
    COALESCE(category, 'All Categories') AS category,
    SUM(amount) AS total
FROM sales
GROUP BY GROUPING SETS (
    (region, category),
    (region),
    (category),
    ()
);

-- ROLLUP: hierarchical totals
SELECT region, city, SUM(amount)
FROM sales
GROUP BY ROLLUP (region, city);
-- Produces: (region, city), (region), ()

-- CUBE: all combinations
SELECT region, category, SUM(amount)
FROM sales
GROUP BY CUBE (region, category);
-- Produces: (region, category), (region), (category), ()

Best Practices

  1. Use CTEs for readability — Break complex queries into named steps
  2. Prefer EXISTS over IN — For subqueries checking existence
  3. Use window functions — Instead of self-joins for row comparisons
  4. DISTINCT ON for first-per-group — Simpler than ROW_NUMBER
  5. JSONB for flexible data — But index it properly
  6. RETURNING for efficiency — Get data back without separate SELECT