A referral tracking system attributes new user signups to the existing user who referred them, enabling reward programs and measuring growth loop effectiveness. Core challenges: persisting referral attribution across browser sessions and device switches, handling multi-touch attribution (user clicked multiple referral links), preventing self-referral and referral fraud, and issuing rewards reliably exactly once.
Core Data Model
CREATE TABLE ReferralCode (
code TEXT PRIMARY KEY, -- short human-readable code: "ALICE2024"
referrer_id UUID NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
is_active BOOLEAN NOT NULL DEFAULT TRUE,
click_count INT NOT NULL DEFAULT 0, -- denormalized for display
conversion_count INT NOT NULL DEFAULT 0
);
CREATE INDEX idx_referral_referrer ON ReferralCode (referrer_id);
CREATE TABLE ReferralClick (
click_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
code TEXT NOT NULL REFERENCES ReferralCode(code),
ip_address INET,
user_agent TEXT,
clicked_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
converted BOOLEAN NOT NULL DEFAULT FALSE,
converted_at TIMESTAMPTZ
);
CREATE INDEX idx_click_code ON ReferralClick (code, clicked_at DESC);
CREATE TABLE ReferralConversion (
conversion_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
click_id UUID REFERENCES ReferralClick(click_id),
referral_code TEXT NOT NULL,
referrer_id UUID NOT NULL,
referee_id UUID NOT NULL UNIQUE, -- one conversion per new user
reward_issued BOOLEAN NOT NULL DEFAULT FALSE,
reward_issued_at TIMESTAMPTZ,
converted_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_conversion_referrer ON ReferralConversion (referrer_id);
Link Click and Attribution Cookie
import secrets, hashlib
from datetime import datetime, timezone, timedelta
from uuid import uuid4
COOKIE_TTL_DAYS = 30
def handle_referral_click(conn, code: str, ip_address: str,
user_agent: str) -> dict:
"""
Record a referral link click and return cookie data to set in the browser.
"""
with conn.cursor() as cur:
cur.execute(
"SELECT referrer_id, is_active FROM ReferralCode WHERE code = %s",
(code,)
)
row = cur.fetchone()
if not row or not row[1]:
raise ValueError("Invalid or inactive referral code")
click_id = str(uuid4())
with conn.cursor() as cur:
cur.execute(
"INSERT INTO ReferralClick (click_id, code, ip_address, user_agent) VALUES (%s,%s,%s,%s)",
(click_id, code, ip_address, user_agent)
)
cur.execute(
"UPDATE ReferralCode SET click_count = click_count + 1 WHERE code = %s",
(code,)
)
conn.commit()
# Return cookie payload for the client to set
return {
"click_id": click_id,
"code": code,
"expires": (datetime.now(timezone.utc) + timedelta(days=COOKIE_TTL_DAYS)).isoformat()
}
def attribute_referral_on_signup(conn, new_user_id: str, cookie_data: dict | None) -> dict | None:
"""
Called during user registration. Attributes the signup to the referrer
identified by the referral cookie (set when the user clicked the link).
Returns conversion record if attributed, None if no valid referral.
"""
if not cookie_data:
return None
code = cookie_data.get("code")
click_id = cookie_data.get("click_id")
with conn.cursor() as cur:
cur.execute(
"SELECT referrer_id FROM ReferralCode WHERE code=%s AND is_active=TRUE",
(code,)
)
row = cur.fetchone()
if not row:
return None
referrer_id = row[0]
# Prevent self-referral
if str(referrer_id) == str(new_user_id):
return None
# Insert conversion — UNIQUE(referee_id) prevents double attribution
conversion_id = str(uuid4())
try:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO ReferralConversion
(conversion_id, click_id, referral_code, referrer_id, referee_id)
VALUES (%s,%s,%s,%s,%s)
""", (conversion_id, click_id, code, referrer_id, new_user_id))
# Mark click as converted
if click_id:
cur.execute(
"UPDATE ReferralClick SET converted=TRUE, converted_at=NOW() WHERE click_id=%s",
(click_id,)
)
cur.execute(
"UPDATE ReferralCode SET conversion_count = conversion_count + 1 WHERE code=%s",
(code,)
)
conn.commit()
except Exception:
conn.rollback()
return None # Duplicate conversion attempt — silently ignore
# Enqueue reward job (do not issue synchronously — decouple from registration)
enqueue_reward_job(conversion_id)
return {"conversion_id": conversion_id, "referrer_id": str(referrer_id)}
Idempotent Reward Issuance
def issue_referral_reward(conn, conversion_id: str):
"""
Issue reward to referrer for a verified conversion.
Idempotent: safe to retry; reward issued exactly once.
"""
with conn.cursor() as cur:
cur.execute("""
SELECT referrer_id, referee_id, reward_issued
FROM ReferralConversion
WHERE conversion_id = %s
""", (conversion_id,))
row = cur.fetchone()
if not row:
return # Unknown conversion
referrer_id, referee_id, reward_issued = row
if reward_issued:
return # Already issued — idempotent no-op
# Check qualifying conditions (e.g., referee completed first purchase)
if not referee_has_qualified(conn, referee_id):
# Re-enqueue for later check
enqueue_reward_job(conversion_id, delay_minutes=60)
return
# Atomic reward issuance: update flag + credit in same transaction
with conn.cursor() as cur:
cur.execute("""
UPDATE ReferralConversion
SET reward_issued=TRUE, reward_issued_at=NOW()
WHERE conversion_id=%s AND reward_issued=FALSE
""", (conversion_id,))
if cur.rowcount == 0:
return # Race condition — another process already issued it
credit_account(conn, referrer_id, reward_amount=1000) # $10 credit
conn.commit()
Fraud Detection
def detect_referral_fraud(conn) -> list[dict]:
"""
Identify suspicious referral patterns:
- Same IP generating many conversions for one referrer
- Referee accounts created within seconds of each other (bot signup)
- Referee accounts with no activity after joining
"""
suspicious = []
with conn.cursor() as cur:
# Pattern 1: Multiple conversions from same IP for same referrer
cur.execute("""
SELECT rc.referrer_id, cl.ip_address, COUNT(*) as cnt
FROM ReferralConversion rc
JOIN ReferralClick cl USING (click_id)
WHERE rc.converted_at > NOW() - interval '24 hours'
GROUP BY rc.referrer_id, cl.ip_address
HAVING COUNT(*) > 5
""")
for referrer_id, ip, cnt in cur.fetchall():
suspicious.append({"type": "same_ip", "referrer_id": referrer_id,
"ip": str(ip), "count": cnt})
# Pattern 2: Referee accounts with no activity (zombie accounts)
cur.execute("""
SELECT rc.conversion_id, rc.referee_id
FROM ReferralConversion rc
LEFT JOIN UserActivity ua ON rc.referee_id = ua.user_id
WHERE rc.converted_at < NOW() - interval '7 days'
AND ua.user_id IS NULL
AND rc.reward_issued = TRUE
""")
for conv_id, referee_id in cur.fetchall():
suspicious.append({"type": "zombie_referee", "conversion_id": conv_id,
"referee_id": referee_id})
return suspicious
Key Interview Points
- Last-click attribution: When a user clicks referral link from Alice, then later from Bob, which referrer gets credit? Last-click (most recent cookie wins) is simplest and most common. First-click (Alice keeps credit) rewards the originator. Multi-touch splits credit proportionally. For most referral programs, last-click is correct — the most recent referral is most likely the deciding influence. Implement by overwriting the referral cookie on each click.
- Cookie persistence across devices: A user clicks a referral link on mobile but signs up on desktop — no cookie is present. Mitigation: (1) if the referral link has the code in the URL parameter and the user enters the code manually at signup, attribute it; (2) email-based referral links encode the code in the email itself — the user carries it across devices; (3) some systems use fingerprinting (IP + user agent hash) as a fallback attribution signal. Never use fingerprinting as the primary signal — it generates false positives.
- Reward qualification gating: Issue rewards only after the referee has demonstrated intent: completed a purchase, verified email, or been active for 7 days. This prevents reward farming with throwaway accounts. The enqueue_reward_job polls until qualification — retry up to 30 days, then expire the pending reward. Store the qualification check result to avoid repeated DB queries on every retry.
- Fraud rings: Sophisticated fraud involves real users who create multiple accounts with different emails and IPs. Detection signals: account age (referee accounts less than 1 day old), behavioral patterns (no activity after joining), velocity (referrer accumulated 100 conversions in 24 hours when average is 2/month). Flag for manual review rather than auto-blocking — false positives harm legitimate power users.
- Referral program economics: Track cost per acquisition (CPA) through the referral channel: total rewards issued / conversions. Compare with paid acquisition channels. A well-designed referral program typically achieves CPA 50–70% below paid channels because referrals come pre-qualified (existing users refer people similar to themselves). Track cohort LTV of referred users vs. non-referred — referred users typically have 20–40% higher LTV.
{“@context”:”https://schema.org”,”@type”:”FAQPage”,”mainEntity”:[{“@type”:”Question”,”name”:”How does last-click attribution work and when should you use first-click instead?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Last-click attribution: the most recently clicked referral link gets credit for the conversion. Implementation: each referral link click overwrites the referral cookie in the browser. On signup, the most recent cookie is used for attribution. Last-click rewards the referrer who most immediately influenced the decision — appropriate for most consumer referral programs (the person who reminded you to sign up deserves the credit). First-click attribution: the first referral link ever clicked gets permanent credit. Implementation: only set the cookie if no referral cookie already exists. Use first-click for: (1) programs where awareness is the primary value (the first person who introduced you to the product); (2) B2B sales with long consideration cycles where the initial introduction matters most. Most consumer products use last-click for simplicity.”}},{“@type”:”Question”,”name”:”How do you persist referral attribution across different browsers and devices?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”A user clicks a referral link on mobile Chrome, then signs up on desktop Firefox — no cookie is present. Attribution fails. Mitigations: (1) URL parameter persistence — if the referral code is in the URL (?ref=ALICE), the user can share or copy the URL to another device. Detect the ref parameter at signup and attribute it even without a cookie; (2) email-based referral — the referral link is embedded in an email, and users often open email on the same device they sign up with; (3) device fingerprinting (IP + User-Agent hash) as a weak signal — cross-device attribution if both devices share an IP (same household); (4) manual code entry — show "Got a referral code? Enter it here" at signup. These layered approaches recover attribution for most cross-device scenarios.”}},{“@type”:”Question”,”name”:”What makes a referral reward safe from the double-payment race condition?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Two scenarios cause double payment: (1) the reward job is retried after a partial failure; (2) two worker processes pick up the same conversion simultaneously. Safe pattern: UPDATE ReferralConversion SET reward_issued=TRUE, reward_issued_at=NOW() WHERE conversion_id=%s AND reward_issued=FALSE. If reward_issued is already TRUE (prior run completed), rowcount=0 — no action taken. If two workers run concurrently: PostgreSQL row-level locking ensures only one UPDATE succeeds; the other gets rowcount=0. The credit_account() call must be inside the same transaction as the UPDATE — either both commit or neither does. This transactional reward pattern is the same as the ledger pattern used in loyalty systems: atomically write the event and update the balance.”}},{“@type”:”Question”,”name”:”How do you detect and handle self-referral fraud?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Self-referral: a user shares their own referral code, opens a browser tab in incognito, creates a new account with a temporary email, and earns the referral bonus. Prevention: (1) check referrer_id != referee_id at attribution time — the most obvious case where the user somehow signs up with their own code; (2) device fingerprint match: if the referee device fingerprint matches the referrer’s recently seen fingerprint, flag for review; (3) email domain: if both accounts use the same custom domain, flag; (4) payment method: if the referee’s payment card matches the referrer’s, flag (for programs that require a first purchase); (5) IP match within 24 hours: referrer and referee sharing an IP is suspicious. Auto-block obvious cases; send the rest to a manual review queue rather than auto-blocking (to avoid false positives from family members).”}},{“@type”:”Question”,”name”:”How do you measure the ROI of a referral program?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Key metrics: (1) Cost per acquisition (CPA): total rewards paid / total conversions. Compare with paid channel CPA (typical: referral CPA is 40–60% lower); (2) Conversion rate: clicks / signups through referral links (typically 15–30% vs 2–5% for paid ads — referred users are pre-qualified); (3) Cohort LTV comparison: compare 12-month LTV of referred users vs organic users (referred users typically 20–40% higher LTV — they come with higher trust); (4) k-factor: average number of referrals per user × conversion rate. k > 1 means viral growth. (5) Payback period: time for referred user LTV to exceed the reward cost. Track these metrics in a dashboard querying ReferralConversion, ReferralCode.conversion_count, and user activity data. A/B test reward amounts to find the optimal incentive.”}}]}
Referral tracking and growth attribution system design is discussed in Airbnb system design interview questions.
Referral tracking and ride credit attribution design is covered in Uber system design interview preparation.
Referral tracking and crypto reward attribution design is discussed in Coinbase system design interview guide.