Low-Level Design: Social Media Feed (Follow, Post, Fan-out, Likes)

Low-Level Design: Social Media Feed (Twitter/Instagram)

A social media feed system manages posts, follows, likes, and generates a personalized home timeline. It is a common LLD question at Meta, Twitter, LinkedIn, and Snap.

Core Entities


from dataclasses import dataclass, field
from enum import Enum
from datetime import datetime
from typing import Optional
import uuid

class PostType(Enum):
    TEXT = "text"
    IMAGE = "image"
    VIDEO = "video"
    REPOST = "repost"

@dataclass
class User:
    user_id: str
    username: str
    display_name: str
    bio: str = ""
    follower_count: int = 0
    following_count: int = 0
    is_verified: bool = False

@dataclass
class Post:
    post_id: str
    author_id: str
    content: str
    post_type: PostType = PostType.TEXT
    media_urls: list[str] = field(default_factory=list)
    like_count: int = 0
    repost_count: int = 0
    reply_count: int = 0
    parent_post_id: Optional[str] = None  # for replies
    created_at: datetime = field(default_factory=datetime.utcnow)
    is_deleted: bool = False

@dataclass
class Follow:
    follower_id: str
    followee_id: str
    created_at: datetime = field(default_factory=datetime.utcnow)

Follow/Unfollow Service


class FollowService:
    def __init__(self):
        # followee_id -> set of follower_ids
        self._followers: dict[str, set] = {}
        # follower_id -> set of followee_ids
        self._following: dict[str, set] = {}
        self._users: dict[str, User] = {}

    def follow(self, follower_id: str, followee_id: str) -> None:
        if follower_id == followee_id:
            raise ValueError("Cannot follow yourself")
        if followee_id in self._following.get(follower_id, set()):
            return  # already following
        self._following.setdefault(follower_id, set()).add(followee_id)
        self._followers.setdefault(followee_id, set()).add(follower_id)
        # Update counts
        if follower_id in self._users:
            self._users[follower_id].following_count += 1
        if followee_id in self._users:
            self._users[followee_id].follower_count += 1

    def unfollow(self, follower_id: str, followee_id: str) -> None:
        if followee_id not in self._following.get(follower_id, set()):
            return  # not following
        self._following[follower_id].discard(followee_id)
        self._followers[followee_id].discard(follower_id)
        if follower_id in self._users:
            self._users[follower_id].following_count -= 1
        if followee_id in self._users:
            self._users[followee_id].follower_count -= 1

    def get_followers(self, user_id: str) -> set[str]:
        return self._followers.get(user_id, set())

    def get_following(self, user_id: str) -> set[str]:
        return self._following.get(user_id, set())

    def is_following(self, follower_id: str, followee_id: str) -> bool:
        return followee_id in self._following.get(follower_id, set())

Feed Service — Fan-out on Write (Push Model)


import heapq

class FeedService:
    """
    Push model: on post creation, write to all followers' feeds immediately.
    Pros: fast reads (O(1) feed lookup).
    Cons: expensive writes for high-follower accounts (celebrities with 10M+ followers).
    Solution: hybrid — push for regular users, pull for celebrities.
    """
    CELEBRITY_THRESHOLD = 1_000_000  # followers

    def __init__(self, follow_svc, post_store):
        self.follow_svc = follow_svc
        self.posts = post_store
        # user_id -> list of (timestamp, post_id) for their home feed
        self._feeds: dict[str, list] = {}

    def publish_post(self, post: Post, user: User) -> None:
        """Fan-out post to all followers' feeds (push model)."""
        followers = self.follow_svc.get_followers(post.author_id)
        entry = (-post.created_at.timestamp(), post.post_id)
        # Also add to author's own feed
        heapq.heappush(self._feeds.setdefault(post.author_id, []), entry)
        if user.follower_count >= self.CELEBRITY_THRESHOLD:
            return  # skip fan-out for celebrities; use pull on read
        for follower_id in followers:
            heapq.heappush(self._feeds.setdefault(follower_id, []), entry)

    def get_feed(self, user_id: str, limit: int = 20) -> list[str]:
        """Return top-limit post IDs for user's home feed."""
        heap = self._feeds.get(user_id, [])
        # Pull celebrity posts (not pre-pushed)
        following = self.follow_svc.get_following(user_id)
        celebrity_posts = []
        for followee_id in following:
            followee = self.posts.get_user(followee_id)
            if followee and followee.follower_count >= self.CELEBRITY_THRESHOLD:
                recent = self.posts.get_recent_posts(followee_id, limit=5)
                for p in recent:
                    celebrity_posts.append((-p.created_at.timestamp(), p.post_id))

        # Merge heap + celebrity posts
        all_posts = list(heap) + celebrity_posts
        all_posts.sort()
        seen = set()
        result = []
        for _, post_id in all_posts:
            if post_id not in seen and len(result) < limit:
                seen.add(post_id)
                result.append(post_id)
        return result

Like Service with Idempotency


class LikeService:
    def __init__(self, post_store):
        self.posts = post_store
        self._likes: dict[str, set] = {}  # post_id -> set of user_ids

    def like(self, user_id: str, post_id: str) -> bool:
        liked_by = self._likes.setdefault(post_id, set())
        if user_id in liked_by:
            return False  # already liked, idempotent
        liked_by.add(user_id)
        post = self.posts.get(post_id)
        if post:
            post.like_count += 1
        return True

    def unlike(self, user_id: str, post_id: str) -> bool:
        liked_by = self._likes.get(post_id, set())
        if user_id not in liked_by:
            return False  # not liked
        liked_by.discard(user_id)
        post = self.posts.get(post_id)
        if post:
            post.like_count = max(0, post.like_count - 1)
        return True

    def is_liked(self, user_id: str, post_id: str) -> bool:
        return user_id in self._likes.get(post_id, set())

    def like_count(self, post_id: str) -> int:
        return len(self._likes.get(post_id, set()))

Design Trade-offs

Decision Choice Trade-off
Feed generation Hybrid push/pull Fast reads for regular users; celebrities use pull to avoid 10M fan-out writes
Like storage Set per post O(1) toggle and lookup; in production use Redis SET or DB with unique constraint
Follow graph In-memory sets For LLD; production uses Graph DB (Neo4j) or adjacency list in Cassandra
Feed ordering Reverse chronological (min-heap) Simple; algorithmic ranking adds engagement signals

{
“@context”: “https://schema.org”,
“@type”: “FAQPage”,
“mainEntity”: [
{
“@type”: “Question”,
“name”: “What is the difference between fan-out on write and fan-out on read?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Fan-out on write (push): when a user posts, immediately push the post ID to all followers’ feed caches. Feed reads are O(1) u2014 just read the pre-built list. Write is expensive: O(followers) writes per post. Problematic for celebrities (10M followers = 10M writes). Fan-out on read (pull): feed is assembled at read time by fetching posts from all followed users. Read is expensive: O(following) queries merged and sorted. Push is cheap. Hybrid: push for regular users, pull for celebrities (above a follower threshold, e.g., 1M).”
}
},
{
“@type”: “Question”,
“name”: “How do you handle the celebrity problem in a social feed system?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Define a “celebrity” threshold (e.g., >1M followers). For celebrity accounts, skip fan-out on post creation. On feed generation, for each celebrity the user follows, query the celebrity’s recent posts directly (they are not in the pre-built feed cache). Merge celebrity posts with the pre-built feed using a min-heap sorted by timestamp, deduplicate, and return top-k. The cost: a small number of additional queries per feed load (users typically follow few celebrities). Celebrities’ posts are cached separately by post ID for fast random access.”
}
},
{
“@type”: “Question”,
“name”: “How do you ensure likes are idempotent?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Store a set of user_ids per post_id (in Redis: SADD post:{id}:likes user_id returns 0 if already member, 1 if added). Like is a no-op if the user is already in the set. Unlike removes from the set (SREM). Like count is SCARD of the set. This approach handles concurrent requests safely u2014 two simultaneous likes from the same user result in exactly one like. For the database-backed version: use a unique constraint on (post_id, user_id) in the likes table; duplicate insert raises IntegrityError which the application handles as “already liked”.”
}
},
{
“@type”: “Question”,
“name”: “How would you add an algorithmic ranking layer to the feed?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “After generating the reverse-chronological candidate set (top-200 posts), score each post with a ranking model: score = recency_weight * time_decay + engagement_weight * (likes + reposts + comments) + relationship_weight * closeness_score + relevance_weight * topic_affinity. Return the top-k by score. Features come from fast stores: post engagement from Redis counters, user relationship score from a graph feature store, topic affinity from the user’s recent interaction history. The ranking model runs in milliseconds (lightweight ML model or heuristic).”
}
},
{
“@type”: “Question”,
“name”: “How would you scale follows and the social graph to billions of users?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Store the follow graph in a distributed adjacency list: Cassandra with partition key = user_id, clustering key = followee_id (for following list) and a reverse table with partition key = followee_id (for followers list). Each row is one edge u2014 compact, O(1) follow/unfollow. For fan-out, use a message queue (Kafka): post creation emits an event, a fan-out worker reads followers from Cassandra in batches and writes to Redis feed caches. This decouples post creation latency from fan-out time. Follower counts are cached in Redis counters (INCR/DECR on follow/unfollow).”
}
}
]
}

Asked at: Snap Interview Guide

Asked at: Twitter/X Interview Guide

Asked at: LinkedIn Interview Guide

Asked at: Airbnb Interview Guide

Scroll to Top