A referral tracking system attributes new user signups to the existing user who referred them, enabling reward programs and measuring growth loop effectiveness. Core challenges: persisting referral attribution across browser sessions and device switches, handling multi-touch attribution (user clicked multiple referral links), preventing self-referral and referral fraud, and issuing rewards reliably exactly once.
Core Data Model
CREATE TABLE ReferralCode (
code TEXT PRIMARY KEY, -- short human-readable code: "ALICE2024"
referrer_id UUID NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
is_active BOOLEAN NOT NULL DEFAULT TRUE,
click_count INT NOT NULL DEFAULT 0, -- denormalized for display
conversion_count INT NOT NULL DEFAULT 0
);
CREATE INDEX idx_referral_referrer ON ReferralCode (referrer_id);
CREATE TABLE ReferralClick (
click_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
code TEXT NOT NULL REFERENCES ReferralCode(code),
ip_address INET,
user_agent TEXT,
clicked_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
converted BOOLEAN NOT NULL DEFAULT FALSE,
converted_at TIMESTAMPTZ
);
CREATE INDEX idx_click_code ON ReferralClick (code, clicked_at DESC);
CREATE TABLE ReferralConversion (
conversion_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
click_id UUID REFERENCES ReferralClick(click_id),
referral_code TEXT NOT NULL,
referrer_id UUID NOT NULL,
referee_id UUID NOT NULL UNIQUE, -- one conversion per new user
reward_issued BOOLEAN NOT NULL DEFAULT FALSE,
reward_issued_at TIMESTAMPTZ,
converted_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_conversion_referrer ON ReferralConversion (referrer_id);
Link Click and Attribution Cookie
import secrets, hashlib
from datetime import datetime, timezone, timedelta
from uuid import uuid4
COOKIE_TTL_DAYS = 30
def handle_referral_click(conn, code: str, ip_address: str,
user_agent: str) -> dict:
"""
Record a referral link click and return cookie data to set in the browser.
"""
with conn.cursor() as cur:
cur.execute(
"SELECT referrer_id, is_active FROM ReferralCode WHERE code = %s",
(code,)
)
row = cur.fetchone()
if not row or not row[1]:
raise ValueError("Invalid or inactive referral code")
click_id = str(uuid4())
with conn.cursor() as cur:
cur.execute(
"INSERT INTO ReferralClick (click_id, code, ip_address, user_agent) VALUES (%s,%s,%s,%s)",
(click_id, code, ip_address, user_agent)
)
cur.execute(
"UPDATE ReferralCode SET click_count = click_count + 1 WHERE code = %s",
(code,)
)
conn.commit()
# Return cookie payload for the client to set
return {
"click_id": click_id,
"code": code,
"expires": (datetime.now(timezone.utc) + timedelta(days=COOKIE_TTL_DAYS)).isoformat()
}
def attribute_referral_on_signup(conn, new_user_id: str, cookie_data: dict | None) -> dict | None:
"""
Called during user registration. Attributes the signup to the referrer
identified by the referral cookie (set when the user clicked the link).
Returns conversion record if attributed, None if no valid referral.
"""
if not cookie_data:
return None
code = cookie_data.get("code")
click_id = cookie_data.get("click_id")
with conn.cursor() as cur:
cur.execute(
"SELECT referrer_id FROM ReferralCode WHERE code=%s AND is_active=TRUE",
(code,)
)
row = cur.fetchone()
if not row:
return None
referrer_id = row[0]
# Prevent self-referral
if str(referrer_id) == str(new_user_id):
return None
# Insert conversion — UNIQUE(referee_id) prevents double attribution
conversion_id = str(uuid4())
try:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO ReferralConversion
(conversion_id, click_id, referral_code, referrer_id, referee_id)
VALUES (%s,%s,%s,%s,%s)
""", (conversion_id, click_id, code, referrer_id, new_user_id))
# Mark click as converted
if click_id:
cur.execute(
"UPDATE ReferralClick SET converted=TRUE, converted_at=NOW() WHERE click_id=%s",
(click_id,)
)
cur.execute(
"UPDATE ReferralCode SET conversion_count = conversion_count + 1 WHERE code=%s",
(code,)
)
conn.commit()
except Exception:
conn.rollback()
return None # Duplicate conversion attempt — silently ignore
# Enqueue reward job (do not issue synchronously — decouple from registration)
enqueue_reward_job(conversion_id)
return {"conversion_id": conversion_id, "referrer_id": str(referrer_id)}
Idempotent Reward Issuance
def issue_referral_reward(conn, conversion_id: str):
"""
Issue reward to referrer for a verified conversion.
Idempotent: safe to retry; reward issued exactly once.
"""
with conn.cursor() as cur:
cur.execute("""
SELECT referrer_id, referee_id, reward_issued
FROM ReferralConversion
WHERE conversion_id = %s
""", (conversion_id,))
row = cur.fetchone()
if not row:
return # Unknown conversion
referrer_id, referee_id, reward_issued = row
if reward_issued:
return # Already issued — idempotent no-op
# Check qualifying conditions (e.g., referee completed first purchase)
if not referee_has_qualified(conn, referee_id):
# Re-enqueue for later check
enqueue_reward_job(conversion_id, delay_minutes=60)
return
# Atomic reward issuance: update flag + credit in same transaction
with conn.cursor() as cur:
cur.execute("""
UPDATE ReferralConversion
SET reward_issued=TRUE, reward_issued_at=NOW()
WHERE conversion_id=%s AND reward_issued=FALSE
""", (conversion_id,))
if cur.rowcount == 0:
return # Race condition — another process already issued it
credit_account(conn, referrer_id, reward_amount=1000) # $10 credit
conn.commit()
Fraud Detection
def detect_referral_fraud(conn) -> list[dict]:
"""
Identify suspicious referral patterns:
- Same IP generating many conversions for one referrer
- Referee accounts created within seconds of each other (bot signup)
- Referee accounts with no activity after joining
"""
suspicious = []
with conn.cursor() as cur:
# Pattern 1: Multiple conversions from same IP for same referrer
cur.execute("""
SELECT rc.referrer_id, cl.ip_address, COUNT(*) as cnt
FROM ReferralConversion rc
JOIN ReferralClick cl USING (click_id)
WHERE rc.converted_at > NOW() - interval '24 hours'
GROUP BY rc.referrer_id, cl.ip_address
HAVING COUNT(*) > 5
""")
for referrer_id, ip, cnt in cur.fetchall():
suspicious.append({"type": "same_ip", "referrer_id": referrer_id,
"ip": str(ip), "count": cnt})
# Pattern 2: Referee accounts with no activity (zombie accounts)
cur.execute("""
SELECT rc.conversion_id, rc.referee_id
FROM ReferralConversion rc
LEFT JOIN UserActivity ua ON rc.referee_id = ua.user_id
WHERE rc.converted_at < NOW() - interval '7 days'
AND ua.user_id IS NULL
AND rc.reward_issued = TRUE
""")
for conv_id, referee_id in cur.fetchall():
suspicious.append({"type": "zombie_referee", "conversion_id": conv_id,
"referee_id": referee_id})
return suspicious
Key Interview Points
- Last-click attribution: When a user clicks referral link from Alice, then later from Bob, which referrer gets credit? Last-click (most recent cookie wins) is simplest and most common. First-click (Alice keeps credit) rewards the originator. Multi-touch splits credit proportionally. For most referral programs, last-click is correct — the most recent referral is most likely the deciding influence. Implement by overwriting the referral cookie on each click.
- Cookie persistence across devices: A user clicks a referral link on mobile but signs up on desktop — no cookie is present. Mitigation: (1) if the referral link has the code in the URL parameter and the user enters the code manually at signup, attribute it; (2) email-based referral links encode the code in the email itself — the user carries it across devices; (3) some systems use fingerprinting (IP + user agent hash) as a fallback attribution signal. Never use fingerprinting as the primary signal — it generates false positives.
- Reward qualification gating: Issue rewards only after the referee has demonstrated intent: completed a purchase, verified email, or been active for 7 days. This prevents reward farming with throwaway accounts. The enqueue_reward_job polls until qualification — retry up to 30 days, then expire the pending reward. Store the qualification check result to avoid repeated DB queries on every retry.
- Fraud rings: Sophisticated fraud involves real users who create multiple accounts with different emails and IPs. Detection signals: account age (referee accounts less than 1 day old), behavioral patterns (no activity after joining), velocity (referrer accumulated 100 conversions in 24 hours when average is 2/month). Flag for manual review rather than auto-blocking — false positives harm legitimate power users.
- Referral program economics: Track cost per acquisition (CPA) through the referral channel: total rewards issued / conversions. Compare with paid acquisition channels. A well-designed referral program typically achieves CPA 50–70% below paid channels because referrals come pre-qualified (existing users refer people similar to themselves). Track cohort LTV of referred users vs. non-referred — referred users typically have 20–40% higher LTV.
Referral tracking and growth attribution system design is discussed in Airbnb system design interview questions.
Referral tracking and ride credit attribution design is covered in Uber system design interview preparation.
Referral tracking and crypto reward attribution design is discussed in Coinbase system design interview guide.