A follower graph stores directed relationships between users — Alice follows Bob, but Bob may not follow Alice. It powers feed generation, notification targeting, and social recommendations. Core challenges: efficiently querying follower/following counts, generating personalized feeds without O(followers) fanout per write, handling celebrity accounts with millions of followers, and supporting mutual follow (friendship) detection.
Core Data Model
-- Directed follow relationship
CREATE TABLE Follow (
follower_id UUID NOT NULL,
followee_id UUID NOT NULL,
followed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (follower_id, followee_id)
);
-- Lookup both directions efficiently
CREATE INDEX idx_follow_followee ON Follow (followee_id, followed_at DESC);
-- Denormalized counts for O(1) reads
CREATE TABLE UserStats (
user_id UUID PRIMARY KEY,
follower_count INT NOT NULL DEFAULT 0,
following_count INT NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Feed: pre-computed fan-out (for non-celebrity accounts)
-- Key in Redis: feed:{user_id} — sorted set scored by post_timestamp
-- Member: post_id
-- Only maintained for users with < CELEBRITY_THRESHOLD followers
Follow and Unfollow
import redis
from uuid import uuid4
r = redis.Redis(host='redis', decode_responses=True)
CELEBRITY_THRESHOLD = 10_000 # users with > 10K followers use pull-based feed
def follow_user(conn, follower_id: str, followee_id: str) -> bool:
"""
Returns True if newly followed, False if already following (idempotent).
Updates denormalized counts atomically.
"""
if follower_id == followee_id:
raise ValueError("Users cannot follow themselves")
with conn.cursor() as cur:
cur.execute("""
INSERT INTO Follow (follower_id, followee_id)
VALUES (%s, %s)
ON CONFLICT (follower_id, followee_id) DO NOTHING
RETURNING followed_at
""", (follower_id, followee_id))
inserted = cur.fetchone() is not None
if not inserted:
return False # Already following
with conn.cursor() as cur:
cur.execute("""
INSERT INTO UserStats (user_id, following_count) VALUES (%s, 1)
ON CONFLICT (user_id) DO UPDATE SET following_count = UserStats.following_count + 1
""", (follower_id,))
cur.execute("""
INSERT INTO UserStats (user_id, follower_count) VALUES (%s, 1)
ON CONFLICT (user_id) DO UPDATE SET follower_count = UserStats.follower_count + 1
""", (followee_id,))
conn.commit()
return True
def unfollow_user(conn, follower_id: str, followee_id: str) -> bool:
with conn.cursor() as cur:
cur.execute(
"DELETE FROM Follow WHERE follower_id=%s AND followee_id=%s RETURNING followed_at",
(follower_id, followee_id)
)
deleted = cur.fetchone() is not None
if not deleted:
return False
with conn.cursor() as cur:
cur.execute(
"UPDATE UserStats SET following_count = GREATEST(0, following_count - 1) WHERE user_id=%s",
(follower_id,)
)
cur.execute(
"UPDATE UserStats SET follower_count = GREATEST(0, follower_count - 1) WHERE user_id=%s",
(followee_id,)
)
conn.commit()
return True
Feed Fan-out on Post
import time
FEED_MAX_SIZE = 200 # keep only the 200 most recent posts in feed
def fan_out_post(conn, post_id: str, author_id: str, post_timestamp: float):
"""
When a non-celebrity user posts, push the post_id into each follower's feed.
For celebrity users, skip fan-out — followers pull on read instead.
"""
# Check if author is a celebrity
with conn.cursor() as cur:
cur.execute("SELECT follower_count FROM UserStats WHERE user_id = %s", (author_id,))
row = cur.fetchone()
follower_count = row[0] if row else 0
if follower_count >= CELEBRITY_THRESHOLD:
return # Celebrity: pull-based feed handles this
# Get all follower IDs (batch for large follower counts)
with conn.cursor() as cur:
cur.execute("SELECT follower_id FROM Follow WHERE followee_id = %s", (author_id,))
followers = [row[0] for row in cur.fetchall()]
if not followers:
return
# Fan-out using Redis pipeline for efficiency
pipeline = r.pipeline()
for follower_id in followers:
feed_key = f"feed:{follower_id}"
pipeline.zadd(feed_key, {post_id: post_timestamp})
# Trim to most recent FEED_MAX_SIZE posts
pipeline.zremrangebyrank(feed_key, 0, -(FEED_MAX_SIZE + 1))
pipeline.execute()
def get_user_feed(conn, user_id: str, before_ts: float | None = None, limit: int = 20) -> list[str]:
"""
Get post IDs for a user's feed. Merges pre-computed (push) feed with
celebrity posts (pull).
"""
now_ts = before_ts or time.time()
feed_key = f"feed:{user_id}"
# Push-based posts
push_post_ids = r.zrangebyscore(
feed_key, '-inf', f'({now_ts}',
start=0, num=limit, withscores=False
)[::-1] # reverse: newest first
# Pull-based: get celebrity accounts this user follows
celebrity_post_ids = get_celebrity_posts(conn, user_id, before_ts=now_ts, limit=limit)
# Merge and sort by timestamp, return top `limit`
all_ids = merge_by_timestamp(push_post_ids, celebrity_post_ids, limit)
return all_ids
def get_celebrity_posts(conn, user_id: str, before_ts: float, limit: int) -> list[str]:
"""Fetch recent posts from celebrity accounts the user follows."""
with conn.cursor() as cur:
cur.execute("""
SELECT p.post_id FROM Follow f
JOIN UserStats s ON f.followee_id = s.user_id
JOIN Post p ON f.followee_id = p.author_id
WHERE f.follower_id = %s
AND s.follower_count >= %s
AND p.created_at < to_timestamp(%s)
ORDER BY p.created_at DESC
LIMIT %s
""", (user_id, CELEBRITY_THRESHOLD, before_ts, limit))
return [row[0] for row in cur.fetchall()]
Mutual Follow (Friendship) Detection
def are_mutual_followers(conn, user_a: str, user_b: str) -> bool:
"""Check if A follows B AND B follows A — mutual follow = friendship."""
with conn.cursor() as cur:
cur.execute("""
SELECT COUNT(*) FROM Follow
WHERE (follower_id=%s AND followee_id=%s)
OR (follower_id=%s AND followee_id=%s)
""", (user_a, user_b, user_b, user_a))
count = cur.fetchone()[0]
return count == 2
def get_mutual_followers(conn, user_id: str, limit: int = 20) -> list[str]:
"""Get users who follow user_id AND user_id follows back."""
with conn.cursor() as cur:
cur.execute("""
SELECT f1.follower_id
FROM Follow f1
JOIN Follow f2 ON f1.follower_id = f2.followee_id
AND f2.follower_id = %s
AND f1.followee_id = %s
ORDER BY f1.followed_at DESC
LIMIT %s
""", (user_id, user_id, limit))
return [row[0] for row in cur.fetchall()]
Key Interview Points
- The celebrity problem: When Cristiano Ronaldo (500M followers) posts, fan-out to 500M Redis sorted sets takes hours. Solution: hybrid push-pull. Define a celebrity threshold (10K followers). Non-celebrities: push fan-out on post (writes post_id to each follower’s feed sorted set). Celebrities: no fan-out — each follower pulls celebrity posts on feed read by querying the Post table filtered by celebrity followees. Most users follow few celebrities, so the per-user pull query is fast.
- Feed sorted set design: Score by Unix timestamp, member is post_id. ZRANGEBYSCORE feed:{user_id} -inf (before_ts gives cursor-paginated reads (newest first). ZREMRANGEBYRANK trims to MAX_SIZE entries to bound memory: each feed uses ~20 bytes/post × 200 posts = 4KB/user × 10M users = 40GB Redis — manageable with a dedicated Redis instance.
- Denormalized counts: follower_count and following_count are updated on every follow/unfollow. This is a write-time cost that buys O(1) read performance. Alternative: COUNT(*) at read time — accurate but adds a query for every profile page load. At scale (10M profile views/day), pre-computed counts are necessary. Ensure GREATEST(0, …) in decrement to prevent negative counts from race conditions.
- Graph pagination: GET /users/{id}/followers should return cursor-paginated results, not offset-paginated. The followers list changes in real time — offset pagination skips or repeats users as the list shifts. Cursor: ORDER BY followed_at DESC, follower_id — opaque cursor encodes (followed_at, follower_id) for the last seen row.
- “People you may know” recommendations: Find second-degree connections (users followed by people you follow but not by you): SELECT followee_id FROM Follow WHERE follower_id IN (my followees) AND followee_id NOT IN (my followees) GROUP BY followee_id ORDER BY COUNT(*) DESC LIMIT 10. This is expensive — run offline daily and cache in Redis as a recommendation list.
Follower graph and social feed design is discussed in Twitter system design interview questions.
Follower graph and friend network design is covered in Snap system design interview preparation.
Follower graph and professional network design is discussed in LinkedIn system design interview guide.