Designing the Twitter timeline is a classic system design interview question that appears at Twitter/X, Meta, LinkedIn, and many other social platforms. The core tension is between fan-out strategies for generating home timelines.
Requirements Clarification
- Scale: 300M MAU, 500M tweets/day, ~5k writes/sec, ~300k reads/sec (read-heavy 60:1)
- Features: Post tweet, follow users, home timeline, user timeline, search, trending
- Latency: Timeline load <200ms p99; tweet post <500ms
- Consistency: Eventual OK for timeline (followers see tweet within a few seconds)
The Fan-out Problem
"""
When user A (100M followers) posts a tweet, how do followers see it?
Option 1: Fan-out on Write (Push model)
- On post: write tweet to all followers' timelines immediately
- Home timeline read: O(1) — already pre-computed
- Problem: Katy Perry (100M followers) posting = 100M writes per tweet
- Celebrity problem: their writes saturate the timeline write queue
Option 2: Fan-out on Read (Pull model)
- On post: store tweet in author's table only
- Home timeline read: fetch tweets from all followees, merge, sort
- Problem: reading timeline of user following 1000 people = 1000 reads
- Too slow for read path
Option 3: Hybrid (Twitter's actual approach)
- Regular users: fan-out on write (push to followers' timelines)
- Celebrities (> ~1M followers): fan-out on read for their tweets
- Home timeline = pre-computed timeline (followers only) + merge(celebrities followed)
"""
import heapq
from typing import List, Tuple
def merge_celebrity_tweets(
precomputed_timeline: List[Tuple[int, str]], # (timestamp, tweet_id)
celebrity_tweets: List[List[Tuple[int, str]]], # per celebrity
limit: int = 50
) -> List[str]:
"""
Merge pre-computed timeline with celebrity tweet streams.
Uses k-way merge (min-heap) for O(n log k) merge.
Timestamps are negated for max-heap behavior.
"""
heap = []
# Add sentinel iterators from each source
all_sources = [iter(precomputed_timeline)] + [iter(ct) for ct in celebrity_tweets]
for src_idx, src in enumerate(all_sources):
try:
ts, tid = next(src)
heapq.heappush(heap, (-ts, tid, src_idx, src))
except StopIteration:
pass
result = []
while heap and len(result) < limit:
neg_ts, tid, src_idx, src = heapq.heappop(heap)
result.append(tid)
try:
ts, next_tid = next(src)
heapq.heappush(heap, (-ts, next_tid, src_idx, src))
except StopIteration:
pass
return result
Timeline Storage — Redis + Cassandra
import redis
import time
from typing import List
class TimelineService:
"""
Hot timeline in Redis sorted set (score = timestamp).
Cold/historical timeline in Cassandra.
Timeline capped at 800 entries — beyond that, use pagination against tweet store.
"""
MAX_TIMELINE_SIZE = 800
HOT_THRESHOLD_FOLLOWERS = 1_000_000 # Above this = celebrity, no fan-out
def __init__(self, redis_client: redis.Redis, cassandra, user_service):
self.redis = redis_client
self.cassandra = cassandra
self.users = user_service
def post_tweet(self, author_id: str, tweet_id: str, timestamp: float):
follower_count = self.users.get_follower_count(author_id)
if follower_count > self.HOT_THRESHOLD_FOLLOWERS:
# Celebrity: only write to author tweet table; merge on read
self._store_tweet(author_id, tweet_id, timestamp)
else:
# Regular user: fan-out to followers
self._store_tweet(author_id, tweet_id, timestamp)
self._fanout_to_followers(author_id, tweet_id, timestamp)
def _fanout_to_followers(self, author_id: str, tweet_id: str, timestamp: float):
"""Async fan-out — typically done by Kafka consumers, not synchronously."""
followers = self.users.get_follower_ids(author_id)
pipe = self.redis.pipeline(transaction=False)
for follower_id in followers:
tl_key = f"timeline:{follower_id}"
pipe.zadd(tl_key, {tweet_id: timestamp})
pipe.zremrangebyrank(tl_key, 0, -(self.MAX_TIMELINE_SIZE + 1))
pipe.expire(tl_key, 7 * 24 * 3600) # TTL: 7 days
pipe.execute()
def get_home_timeline(self, user_id: str, count: int = 20, before: float = None) -> List[str]:
"""Read pre-computed timeline, then merge celebrity tweets."""
before = before or time.time()
tl_key = f"timeline:{user_id}"
tweet_ids = self.redis.zrevrangebyscore(
tl_key, before, "-inf", start=0, num=count, withscores=False
)
# Merge with celebrities this user follows
celebrities = self.users.get_followed_celebrities(user_id)
if celebrities:
celeb_tweets = [
self._get_user_tweets(cid, count=count, before=before)
for cid in celebrities
]
tweet_ids = merge_celebrity_tweets(
[(float(ts), tid) for tid, ts in self.redis.zrevrangebyscore(
tl_key, before, "-inf", start=0, num=count, withscores=True)],
celeb_tweets, count
)
return tweet_ids
def _get_user_tweets(self, user_id: str, count: int, before: float):
key = f"user_tweets:{user_id}"
return [(float(ts), tid) for tid, ts in
self.redis.zrevrangebyscore(key, before, "-inf", start=0, num=count, withscores=True)]
def _store_tweet(self, author_id: str, tweet_id: str, timestamp: float):
key = f"user_tweets:{author_id}"
self.redis.zadd(key, {tweet_id: timestamp})
self.redis.zremrangebyrank(key, 0, -(self.MAX_TIMELINE_SIZE + 1))
Trending Topics
import redis
from collections import defaultdict
class TrendingService:
"""
Count hashtag/topic frequency in sliding windows.
Strategy: Redis sorted set per time bucket; merge across buckets.
"""
BUCKET_SECONDS = 300 # 5-minute buckets
def __init__(self, redis_client: redis.Redis):
self.redis = redis_client
def record_tweet(self, tweet_id: str, hashtags: List[str], timestamp: int):
bucket = timestamp - (timestamp % self.BUCKET_SECONDS)
key = f"trends:{bucket}"
pipe = self.redis.pipeline()
for tag in hashtags:
pipe.zincrby(key, 1, tag)
pipe.expire(key, 24 * 3600) # Keep 24h of buckets
pipe.execute()
def get_trending(self, lookback_seconds: int = 3600, top_k: int = 10) -> List[tuple]:
"""Aggregate top topics over lookback window."""
now = int(time.time())
buckets = range(
now - lookback_seconds,
now + self.BUCKET_SECONDS,
self.BUCKET_SECONDS
)
counts = defaultdict(float)
for bucket_start in buckets:
bucket = bucket_start - (bucket_start % self.BUCKET_SECONDS)
key = f"trends:{bucket}"
entries = self.redis.zrangebyscore(key, "-inf", "+inf", withscores=True)
for tag, score in entries:
# Apply time decay: recent buckets weighted more
age = now - bucket
weight = 1.0 / (1 + age / 3600)
counts[tag.decode()] += score * weight
return sorted(counts.items(), key=lambda x: -x[1])[:top_k]
Key Design Decisions
| Decision | Choice | Rationale |
|---|---|---|
| Fan-out strategy | Hybrid push+pull | Celebrity tweets: read-time merge; regular: write-time fan-out |
| Timeline storage | Redis sorted set + Cassandra | Hot: Redis O(log n); cold: Cassandra for historical |
| Tweet store | Cassandra (wide row per user) | Append-only, time-series, linear scale |
| Search | Elasticsearch | Full-text + recent tweet search; Kafka ingest pipeline |
| Media | S3 + CDN | Images/video stored separately; tweet references URL |
| Counting (likes/RT) | Redis INCR + async batch persist | O(1) increment; batch flush to DB prevents write storms |
Companies That Ask This System Design Question
This problem type commonly appears in interviews at:
See our company interview guides for full interview process, compensation, and preparation tips.