GDPR Article 17 grants users the “right to erasure” — the ability to request that all their personal data be deleted. Implementing this correctly across a distributed system requires: identifying every location where user data exists, deleting or anonymizing it within 30 days, handling data that must be retained for legal reasons (invoices, fraud records), and providing a verifiable audit trail of the deletion. Getting this wrong risks fines up to €20M or 4% of global annual revenue.
Core Data Model
CREATE TYPE erasure_status AS ENUM (
'requested','in_progress','completed','partial','failed'
);
CREATE TABLE ErasureRequest (
request_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL UNIQUE, -- one pending request per user
email TEXT NOT NULL, -- capture at request time (PII)
status erasure_status NOT NULL DEFAULT 'requested',
requested_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deadline TIMESTAMPTZ NOT NULL DEFAULT NOW() + interval '30 days',
completed_at TIMESTAMPTZ,
-- Track progress per data category
categories_done TEXT[] NOT NULL DEFAULT '{}',
categories_failed TEXT[] NOT NULL DEFAULT '{}'
);
-- Audit trail: what was deleted/anonymized and when
CREATE TABLE ErasureAudit (
audit_id BIGSERIAL PRIMARY KEY,
request_id UUID NOT NULL REFERENCES ErasureRequest(request_id),
category TEXT NOT NULL, -- 'profile', 'orders', 'sessions', 'backups'
action TEXT NOT NULL, -- 'deleted', 'anonymized', 'retained_legal'
record_count INT,
details TEXT,
performed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Legal hold registry: data that must be retained despite erasure request
CREATE TABLE LegalRetentionHold (
hold_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
category TEXT NOT NULL,
reason TEXT NOT NULL, -- 'tax_record_7yr', 'fraud_investigation', 'court_order'
retain_until TIMESTAMPTZ NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
Erasure Pipeline
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Callable
import psycopg2
@dataclass
class ErasureCategory:
name: str
handler: Callable
can_anonymize: bool # True if data can be anonymized instead of deleted
def delete_user_profile(conn, user_id: str, request_id: str) -> int:
"""Delete or anonymize core user profile."""
with conn.cursor() as cur:
# Anonymize: replace PII with placeholder, keep user_id for FK integrity
cur.execute("""
UPDATE User SET
email = %s,
name = 'Deleted User',
phone = NULL,
avatar_url = NULL,
date_of_birth = NULL,
address = NULL
WHERE user_id = %s
""", (f"deleted_{user_id}@erasure.invalid", user_id))
count = cur.rowcount
conn.commit()
return count
def delete_user_sessions(conn, user_id: str, request_id: str) -> int:
"""Delete all sessions — revoke all active logins."""
with conn.cursor() as cur:
cur.execute("DELETE FROM UserSession WHERE user_id = %s", (user_id,))
count = cur.rowcount
conn.commit()
return count
def delete_user_content(conn, user_id: str, request_id: str) -> int:
"""Delete user-generated content (posts, comments, reviews)."""
with conn.cursor() as cur:
cur.execute("DELETE FROM Review WHERE user_id = %s", (user_id,))
reviews = cur.rowcount
cur.execute("DELETE FROM Comment WHERE user_id = %s", (user_id,))
comments = cur.rowcount
# Posts: anonymize instead of delete (maintains thread integrity)
cur.execute("""
UPDATE Post SET user_id = NULL, author_name = '[Deleted]'
WHERE user_id = %s
""", (user_id,))
posts = cur.rowcount
conn.commit()
return reviews + comments + posts
def handle_orders_with_legal_hold(conn, user_id: str, request_id: str) -> int:
"""
Orders within the legal retention window (7 years for tax records) must be retained.
Anonymize PII fields but keep financial data.
"""
with conn.cursor() as cur:
# Anonymize customer details, keep amounts and dates for accounting
cur.execute("""
UPDATE Order SET
shipping_name = 'Deleted User',
shipping_address = NULL,
billing_address = NULL,
customer_email = %s
WHERE user_id = %s
AND placed_at > NOW() - interval '7 years'
""", (f"deleted_{user_id}@erasure.invalid", user_id))
retained_count = cur.rowcount
# Orders older than 7 years can be deleted
cur.execute("DELETE FROM Order WHERE user_id = %s AND placed_at <= NOW() - interval '7 years'", (user_id,))
deleted_count = cur.rowcount
conn.commit()
# Record the legal hold
with conn.cursor() as cur:
cur.execute("""
INSERT INTO LegalRetentionHold (user_id, category, reason, retain_until)
VALUES (%s, 'orders', 'tax_record_7yr', NOW() + interval '7 years')
ON CONFLICT DO NOTHING
""", (user_id,))
conn.commit()
return retained_count + deleted_count
ERASURE_CATEGORIES = [
ErasureCategory('sessions', delete_user_sessions, can_anonymize=False),
ErasureCategory('content', delete_user_content, can_anonymize=True),
ErasureCategory('orders', handle_orders_with_legal_hold, can_anonymize=True),
ErasureCategory('profile', delete_user_profile, can_anonymize=True),
]
def process_erasure_request(conn, request_id: str):
"""Run the full erasure pipeline for a request. Checkpoints progress."""
with conn.cursor() as cur:
cur.execute(
"SELECT user_id, categories_done FROM ErasureRequest WHERE request_id=%s FOR UPDATE",
(request_id,)
)
row = cur.fetchone()
if not row:
return
user_id, done = row
with conn.cursor() as cur:
cur.execute("UPDATE ErasureRequest SET status='in_progress' WHERE request_id=%s", (request_id,))
conn.commit()
for category in ERASURE_CATEGORIES:
if category.name in done:
continue # already processed — skip
try:
count = category.handler(conn, user_id, request_id)
with conn.cursor() as cur:
cur.execute("""
UPDATE ErasureRequest
SET categories_done = array_append(categories_done, %s)
WHERE request_id=%s
""", (category.name, request_id))
cur.execute("""
INSERT INTO ErasureAudit (request_id, category, action, record_count)
VALUES (%s,%s,'deleted',%s)
""", (request_id, category.name, count))
conn.commit()
except Exception as e:
with conn.cursor() as cur:
cur.execute("""
UPDATE ErasureRequest
SET categories_failed = array_append(categories_failed, %s)
WHERE request_id=%s
""", (category.name, request_id))
conn.commit()
# Check completion
with conn.cursor() as cur:
cur.execute(
"SELECT categories_done, categories_failed FROM ErasureRequest WHERE request_id=%s",
(request_id,)
)
done, failed = cur.fetchone()
all_cats = {c.name for c in ERASURE_CATEGORIES}
if not failed:
final_status = 'completed'
elif set(done) == all_cats:
final_status = 'completed'
else:
final_status = 'partial'
with conn.cursor() as cur:
cur.execute(
"UPDATE ErasureRequest SET status=%s, completed_at=NOW() WHERE request_id=%s",
(final_status, request_id)
)
conn.commit()
Key Interview Points
- Anonymization vs deletion: Physical deletion is impossible for data that other records reference (a FK violation breaks the DB). Anonymize instead: replace PII (email, name, address) with a non-identifying placeholder, keeping the row for referential integrity. The GDPR considers anonymized data to no longer be personal data — once a user cannot be re-identified, the obligation is fulfilled.
- Legal retention exceptions: GDPR Article 17(3) explicitly allows retaining data for legal obligations. Tax law in most EU countries requires invoice and transaction records for 7 years. Fraud investigation records may need to be retained indefinitely. Document every exception in the LegalRetentionHold table — regulators want proof that retained data has a legitimate basis, not that you simply failed to delete it.
- Backup data: Database backups retain a full snapshot. After erasure, the user’s data persists in backup files for the backup retention period. GDPR guidance: backups are exempt during the retention window, but when backups are restored (e.g., disaster recovery), the erasure must be re-applied before the restored data enters production. Document this policy and test it — it is a common audit finding.
- Cascading deletions across systems: User data exists in: main database, Elasticsearch index, S3 (profile pictures, uploaded files), email marketing lists, analytics platforms (Mixpanel, Amplitude), CDN cache. Each system needs an erasure handler. Use a GDPR inventory document to list all data stores. Automate where possible (APIs to delete from email lists, S3 object deletion, search index updates). Some third-party processors have their own 30-day SLAs — trigger them immediately on request receipt.
- The 30-day deadline: GDPR Article 12 requires responding within 1 month (extendable to 3 months for complex requests). Store deadline = requested_at + 30 days. Monitor: alert when requests approach deadline with >0 uncompleted categories. Prioritize erasure jobs over other background work. Report monthly: “X erasure requests received, Y completed on time, Z completed late.” Late completions must be documented with a reason.
GDPR erasure and data privacy system design is discussed in LinkedIn system design interview questions.
GDPR erasure and user data deletion design is covered in Meta system design interview preparation.
GDPR right to erasure and data anonymization design is discussed in Google system design interview guide.