Push Notification System Low-Level Design
A push notification system must reliably deliver time-sensitive messages to millions of devices across Android (FCM) and iOS (APNs), respect user quiet hours, fan out to topic subscribers efficiently, and track delivery receipts so product teams can measure engagement. This article covers the full low-level design from device token registration through fan-out workers, retry logic, and analytics.
Device Token Registration
Every device receives a push token from the OS after the user grants permission. The token changes on app reinstall and periodically on iOS. The registration endpoint must handle upserts gracefully:
from dataclasses import dataclass
from enum import Enum
from typing import Optional
import uuid
class Platform(str, Enum):
FCM = "fcm"
APNS = "apns"
@dataclass
class DeviceTokenRegistration:
user_id: int
device_id: str # stable app-level UUID stored on device
token: str # FCM registration token or APNs device token
platform: Platform
app_version: str
locale: str # e.g. "en-US"
timezone: str # e.g. "America/New_York"
def register_device_token(db, reg: DeviceTokenRegistration) -> None:
"""Upsert device token; handles token rotation transparently."""
db.execute(
"""
INSERT INTO device_token
(device_id, user_id, token, platform, app_version, locale, timezone, updated_at)
VALUES
(%s, %s, %s, %s, %s, %s, %s, NOW())
ON CONFLICT (device_id) DO UPDATE SET
token = EXCLUDED.token,
app_version = EXCLUDED.app_version,
locale = EXCLUDED.locale,
timezone = EXCLUDED.timezone,
updated_at = NOW(),
invalid_at = NULL -- clear any previous invalidation
""",
(reg.device_id, reg.user_id, reg.token, reg.platform.value,
reg.app_version, reg.locale, reg.timezone)
)
SQL Schema
CREATE TABLE device_token (
id BIGSERIAL PRIMARY KEY,
device_id UUID NOT NULL UNIQUE, -- app-assigned stable ID
user_id BIGINT NOT NULL,
token TEXT NOT NULL,
platform TEXT NOT NULL CHECK (platform IN ('fcm','apns')),
app_version TEXT,
locale TEXT NOT NULL DEFAULT 'en-US',
timezone TEXT NOT NULL DEFAULT 'UTC',
invalid_at TIMESTAMPTZ DEFAULT NULL, -- set when APNs/FCM reports token dead
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_dt_user_valid ON device_token (user_id)
WHERE invalid_at IS NULL;
CREATE TABLE notification_template (
id BIGSERIAL PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
title_tmpl TEXT NOT NULL, -- Jinja2 template
body_tmpl TEXT NOT NULL,
data_schema JSONB, -- expected payload keys
ttl_seconds INT NOT NULL DEFAULT 86400,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE notification_delivery (
id BIGSERIAL PRIMARY KEY,
device_id UUID NOT NULL REFERENCES device_token(device_id),
template_id BIGINT REFERENCES notification_template(id),
idempotency_key TEXT NOT NULL UNIQUE, -- prevents double-send
payload JSONB NOT NULL,
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','sent','delivered','failed','expired')),
attempt_count INT NOT NULL DEFAULT 0,
next_attempt TIMESTAMPTZ,
sent_at TIMESTAMPTZ,
delivered_at TIMESTAMPTZ,
failed_at TIMESTAMPTZ,
provider_msg_id TEXT, -- FCM message_id or APNs apns-id
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_nd_pending ON notification_delivery (next_attempt)
WHERE status = 'pending';
FCM/APNs Abstraction Layer
Abstract both providers behind a single interface so worker code is provider-agnostic:
import httpx
import jwt
import time
from typing import Optional
class PushResult:
def __init__(self, success: bool, provider_id: Optional[str], error: Optional[str], token_invalid: bool = False):
self.success = success
self.provider_id = provider_id
self.error = error
self.token_invalid = token_invalid # signals token should be marked dead
async def send_fcm(token: str, title: str, body: str, data: dict, ttl: int) -> PushResult:
payload = {
"message": {
"token": token,
"notification": {"title": title, "body": body},
"data": {k: str(v) for k, v in data.items()},
"android": {"ttl": f"{ttl}s", "priority": "high"}
}
}
async with httpx.AsyncClient() as client:
resp = await client.post(
"https://fcm.googleapis.com/v1/projects/MY_PROJECT/messages:send",
json=payload,
headers={"Authorization": f"Bearer {get_fcm_access_token()}"}
)
if resp.status_code == 200:
return PushResult(True, resp.json().get("name"), None)
body_json = resp.json()
err = body_json.get("error", {})
invalid = err.get("status") in ("UNREGISTERED", "INVALID_ARGUMENT")
return PushResult(False, None, err.get("message"), invalid)
async def send_push(db, delivery_id: int) -> None:
"""Fetch delivery record and dispatch to correct provider."""
row = db.fetchone("SELECT * FROM notification_delivery WHERE id=%s", (delivery_id,))
device = db.fetchone("SELECT * FROM device_token WHERE device_id=%s", (row['device_id'],))
payload = row['payload']
if device['platform'] == 'fcm':
result = await send_fcm(device['token'], payload['title'], payload['body'],
payload.get('data', {}), payload.get('ttl', 86400))
else:
result = await send_apns(device['token'], payload)
if result.success:
db.execute("UPDATE notification_delivery SET status='sent', sent_at=NOW(), "
"provider_msg_id=%s WHERE id=%s", (result.provider_id, delivery_id))
elif result.token_invalid:
db.execute("UPDATE device_token SET invalid_at=NOW() WHERE device_id=%s", (device['device_id'],))
db.execute("UPDATE notification_delivery SET status='failed', failed_at=NOW() WHERE id=%s", (delivery_id,))
else:
schedule_retry(db, delivery_id, row['attempt_count'])
Topic Fan-Out Workers
Sending a notification to a topic (e.g., “breaking_news”) means fanning out to potentially millions of devices. Use a PostgreSQL job queue with SKIP LOCKED so multiple workers share the load without contention:
async def fan_out_topic(db, topic: str, template_name: str, context: dict) -> int:
"""
Enqueues one notification_delivery row per active device subscribed to topic.
Returns count of enqueued deliveries.
"""
template = db.fetchone(
"SELECT * FROM notification_template WHERE name=%s", (template_name,)
)
title = render_template(template['title_tmpl'], context)
body = render_template(template['body_tmpl'], context)
# Insert in batches of 1000 to avoid lock escalation
count = 0
cursor = None
while True:
rows = db.fetchall(
"""
SELECT dt.device_id FROM topic_subscription ts
JOIN device_token dt ON dt.user_id = ts.user_id
WHERE ts.topic = %s
AND dt.invalid_at IS NULL
AND (NOT ts.quiet_hours_enabled OR NOT is_quiet_hour(dt.timezone))
AND (%s IS NULL OR dt.device_id > %s)
ORDER BY dt.device_id
LIMIT 1000
""",
(topic, cursor, cursor)
)
if not rows:
break
values = [
(row['device_id'], template['id'],
f"{topic}:{context.get('event_id','?')}:{row['device_id']}",
json.dumps({"title": title, "body": body, "data": context}),
datetime.utcnow())
for row in rows
]
db.executemany(
"""INSERT INTO notification_delivery
(device_id, template_id, idempotency_key, payload, next_attempt)
VALUES (%s,%s,%s,%s,%s)
ON CONFLICT (idempotency_key) DO NOTHING""",
values
)
count += len(rows)
cursor = rows[-1]['device_id']
return count
Quiet Hours Enforcement
Users in timezone “America/Los_Angeles” should not receive marketing pushes at 2 AM. The is_quiet_hour function is a PostgreSQL helper:
CREATE OR REPLACE FUNCTION is_quiet_hour(tz TEXT)
RETURNS BOOLEAN LANGUAGE sql STABLE AS $$
SELECT EXTRACT(HOUR FROM NOW() AT TIME ZONE tz) BETWEEN 22 AND 23
OR EXTRACT(HOUR FROM NOW() AT TIME ZONE tz) BETWEEN 0 AND 7;
$$;
Retry with Exponential Backoff
import math
MAX_ATTEMPTS = 5
def schedule_retry(db, delivery_id: int, attempt_count: int) -> None:
if attempt_count >= MAX_ATTEMPTS:
db.execute(
"UPDATE notification_delivery SET status='failed', failed_at=NOW() WHERE id=%s",
(delivery_id,)
)
return
delay_seconds = min(30 * (2 ** attempt_count), 3600) # cap at 1 hour
db.execute(
"""UPDATE notification_delivery
SET attempt_count = attempt_count + 1,
next_attempt = NOW() + (%s || ' seconds')::INTERVAL
WHERE id = %s""",
(delay_seconds, delivery_id)
)
{
“@context”: “https://schema.org”,
“@type”: “FAQPage”,
“mainEntity”: [
{
“@type”: “Question”,
“name”: “How do you handle stale or invalid device tokens in a push system?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Both FCM and APNs return specific error codes when a token is no longer valid (FCM: UNREGISTERED; APNs: 410 Gone). On receiving these errors the worker sets invalid_at on the device_token row so future fan-outs skip it. The token is cleared on next app launch when a fresh registration upsert arrives.”
}
},
{
“@type”: “Question”,
“name”: “What prevents duplicate pushes during a fan-out retry?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Each notification_delivery row has an idempotency_key composed of topic + event_id + device_id. The INSERT uses ON CONFLICT (idempotency_key) DO NOTHING, so re-running a fan-out job after a partial failure only inserts rows that were not already created.”
}
},
{
“@type”: “Question”,
“name”: “How should quiet hours be enforced without storing per-user schedules?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Store the device timezone during registration. At fan-out time, filter out devices where the current UTC time maps to quiet hours in their timezone using a lightweight SQL function. This avoids per-user schedule tables and keeps the logic in the database close to the data.”
}
},
{
“@type”: “Question”,
“name”: “How do you scale fan-out to millions of devices?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Partition fan-out across multiple workers using cursor-based pagination over device_id. Each worker claims a batch using SELECT … SKIP LOCKED and inserts delivery rows independently. For very large topics (100M+ devices), pre-shard the device_token table by user_id mod N and run one fan-out worker per shard.”
}
}
]
}
{
“@context”: “https://schema.org”,
“@type”: “FAQPage”,
“mainEntity”: [
{
“@type”: “Question”,
“name”: “How is FCM/APNs abstraction implemented?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “A PushProvider interface is implemented by FCMProvider and APNsProvider classes; the dispatcher selects the provider based on the device platform field.”
}
},
{
“@type”: “Question”,
“name”: “How does topic fan-out work at scale?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “A fan-out worker uses SELECT FOR UPDATE SKIP LOCKED to claim batches of DeviceToken rows for a topic and dispatches to the push provider in parallel threads or async tasks.”
}
},
{
“@type”: “Question”,
“name”: “How are delivery receipts tracked?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “The push provider's response (success/failure/token_invalid) is written to NotificationDelivery; invalid tokens are deactivated to prevent future sends.”
}
},
{
“@type”: “Question”,
“name”: “What triggers quiet hours enforcement?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Before dispatch, the system checks the user's quiet_hours_start/end (stored in UTC) against the current time; notifications below a priority threshold are queued rather than sent immediately.”
}
}
]
}
See also: Meta Interview Guide 2026: Facebook, Instagram, WhatsApp Engineering
See also: Apple Interview Guide 2026: iOS Systems, Hardware-Software Integration, and iCloud Architecture
See also: Snap Interview Guide