System Design Interview: Twitter/X Timeline and Tweet Feed

Designing the Twitter timeline is a classic system design interview question that appears at Twitter/X, Meta, LinkedIn, and many other social platforms. The core tension is between fan-out strategies for generating home timelines.

Requirements Clarification

  • Scale: 300M MAU, 500M tweets/day, ~5k writes/sec, ~300k reads/sec (read-heavy 60:1)
  • Features: Post tweet, follow users, home timeline, user timeline, search, trending
  • Latency: Timeline load <200ms p99; tweet post <500ms
  • Consistency: Eventual OK for timeline (followers see tweet within a few seconds)

The Fan-out Problem

"""
When user A (100M followers) posts a tweet, how do followers see it?

Option 1: Fan-out on Write (Push model)
  - On post: write tweet to all followers' timelines immediately
  - Home timeline read: O(1) — already pre-computed
  - Problem: Katy Perry (100M followers) posting = 100M writes per tweet
  - Celebrity problem: their writes saturate the timeline write queue

Option 2: Fan-out on Read (Pull model)
  - On post: store tweet in author's table only
  - Home timeline read: fetch tweets from all followees, merge, sort
  - Problem: reading timeline of user following 1000 people = 1000 reads
  - Too slow for read path

Option 3: Hybrid (Twitter's actual approach)
  - Regular users: fan-out on write (push to followers' timelines)
  - Celebrities (> ~1M followers): fan-out on read for their tweets
  - Home timeline = pre-computed timeline (followers only) + merge(celebrities followed)
"""

import heapq
from typing import List, Tuple

def merge_celebrity_tweets(
    precomputed_timeline: List[Tuple[int, str]],  # (timestamp, tweet_id)
    celebrity_tweets: List[List[Tuple[int, str]]], # per celebrity
    limit: int = 50
) -> List[str]:
    """
    Merge pre-computed timeline with celebrity tweet streams.
    Uses k-way merge (min-heap) for O(n log k) merge.
    Timestamps are negated for max-heap behavior.
    """
    heap = []
    # Add sentinel iterators from each source
    all_sources = [iter(precomputed_timeline)] + [iter(ct) for ct in celebrity_tweets]

    for src_idx, src in enumerate(all_sources):
        try:
            ts, tid = next(src)
            heapq.heappush(heap, (-ts, tid, src_idx, src))
        except StopIteration:
            pass

    result = []
    while heap and len(result) < limit:
        neg_ts, tid, src_idx, src = heapq.heappop(heap)
        result.append(tid)
        try:
            ts, next_tid = next(src)
            heapq.heappush(heap, (-ts, next_tid, src_idx, src))
        except StopIteration:
            pass

    return result

Timeline Storage — Redis + Cassandra

import redis
import time
from typing import List

class TimelineService:
    """
    Hot timeline in Redis sorted set (score = timestamp).
    Cold/historical timeline in Cassandra.
    Timeline capped at 800 entries — beyond that, use pagination against tweet store.
    """
    MAX_TIMELINE_SIZE = 800
    HOT_THRESHOLD_FOLLOWERS = 1_000_000  # Above this = celebrity, no fan-out

    def __init__(self, redis_client: redis.Redis, cassandra, user_service):
        self.redis = redis_client
        self.cassandra = cassandra
        self.users = user_service

    def post_tweet(self, author_id: str, tweet_id: str, timestamp: float):
        follower_count = self.users.get_follower_count(author_id)

        if follower_count > self.HOT_THRESHOLD_FOLLOWERS:
            # Celebrity: only write to author tweet table; merge on read
            self._store_tweet(author_id, tweet_id, timestamp)
        else:
            # Regular user: fan-out to followers
            self._store_tweet(author_id, tweet_id, timestamp)
            self._fanout_to_followers(author_id, tweet_id, timestamp)

    def _fanout_to_followers(self, author_id: str, tweet_id: str, timestamp: float):
        """Async fan-out — typically done by Kafka consumers, not synchronously."""
        followers = self.users.get_follower_ids(author_id)
        pipe = self.redis.pipeline(transaction=False)
        for follower_id in followers:
            tl_key = f"timeline:{follower_id}"
            pipe.zadd(tl_key, {tweet_id: timestamp})
            pipe.zremrangebyrank(tl_key, 0, -(self.MAX_TIMELINE_SIZE + 1))
            pipe.expire(tl_key, 7 * 24 * 3600)  # TTL: 7 days
        pipe.execute()

    def get_home_timeline(self, user_id: str, count: int = 20, before: float = None) -> List[str]:
        """Read pre-computed timeline, then merge celebrity tweets."""
        before = before or time.time()
        tl_key = f"timeline:{user_id}"
        tweet_ids = self.redis.zrevrangebyscore(
            tl_key, before, "-inf", start=0, num=count, withscores=False
        )

        # Merge with celebrities this user follows
        celebrities = self.users.get_followed_celebrities(user_id)
        if celebrities:
            celeb_tweets = [
                self._get_user_tweets(cid, count=count, before=before)
                for cid in celebrities
            ]
            tweet_ids = merge_celebrity_tweets(
                [(float(ts), tid) for tid, ts in self.redis.zrevrangebyscore(
                    tl_key, before, "-inf", start=0, num=count, withscores=True)],
                celeb_tweets, count
            )

        return tweet_ids

    def _get_user_tweets(self, user_id: str, count: int, before: float):
        key = f"user_tweets:{user_id}"
        return [(float(ts), tid) for tid, ts in
                self.redis.zrevrangebyscore(key, before, "-inf", start=0, num=count, withscores=True)]

    def _store_tweet(self, author_id: str, tweet_id: str, timestamp: float):
        key = f"user_tweets:{author_id}"
        self.redis.zadd(key, {tweet_id: timestamp})
        self.redis.zremrangebyrank(key, 0, -(self.MAX_TIMELINE_SIZE + 1))

Trending Topics

import redis
from collections import defaultdict

class TrendingService:
    """
    Count hashtag/topic frequency in sliding windows.
    Strategy: Redis sorted set per time bucket; merge across buckets.
    """
    BUCKET_SECONDS = 300  # 5-minute buckets

    def __init__(self, redis_client: redis.Redis):
        self.redis = redis_client

    def record_tweet(self, tweet_id: str, hashtags: List[str], timestamp: int):
        bucket = timestamp - (timestamp % self.BUCKET_SECONDS)
        key = f"trends:{bucket}"
        pipe = self.redis.pipeline()
        for tag in hashtags:
            pipe.zincrby(key, 1, tag)
        pipe.expire(key, 24 * 3600)  # Keep 24h of buckets
        pipe.execute()

    def get_trending(self, lookback_seconds: int = 3600, top_k: int = 10) -> List[tuple]:
        """Aggregate top topics over lookback window."""
        now = int(time.time())
        buckets = range(
            now - lookback_seconds,
            now + self.BUCKET_SECONDS,
            self.BUCKET_SECONDS
        )
        counts = defaultdict(float)
        for bucket_start in buckets:
            bucket = bucket_start - (bucket_start % self.BUCKET_SECONDS)
            key = f"trends:{bucket}"
            entries = self.redis.zrangebyscore(key, "-inf", "+inf", withscores=True)
            for tag, score in entries:
                # Apply time decay: recent buckets weighted more
                age = now - bucket
                weight = 1.0 / (1 + age / 3600)
                counts[tag.decode()] += score * weight

        return sorted(counts.items(), key=lambda x: -x[1])[:top_k]

Key Design Decisions

Decision Choice Rationale
Fan-out strategy Hybrid push+pull Celebrity tweets: read-time merge; regular: write-time fan-out
Timeline storage Redis sorted set + Cassandra Hot: Redis O(log n); cold: Cassandra for historical
Tweet store Cassandra (wide row per user) Append-only, time-series, linear scale
Search Elasticsearch Full-text + recent tweet search; Kafka ingest pipeline
Media S3 + CDN Images/video stored separately; tweet references URL
Counting (likes/RT) Redis INCR + async batch persist O(1) increment; batch flush to DB prevents write storms

Companies That Ask This System Design Question

This problem type commonly appears in interviews at:

See our company interview guides for full interview process, compensation, and preparation tips.

Scroll to Top