Location Tracking Low-Level Design: GPS Updates, Geofencing, and Proximity Queries

A real-time location tracking system records device positions, supports geofencing, proximity queries, and stores movement history at scale. Core challenges: high-frequency write throughput (GPS updates every 5 seconds per device), efficient spatial queries without full-table scans, and expiring raw position data while retaining summarized traces.

Core Data Model

-- Raw location updates (high write volume, partition by day)
CREATE TABLE LocationUpdate (
    update_id   BIGSERIAL,
    device_id   UUID        NOT NULL,
    user_id     UUID        NOT NULL,
    lat         NUMERIC(9,6) NOT NULL,
    lng         NUMERIC(9,6) NOT NULL,
    accuracy_m  SMALLINT,
    recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    PRIMARY KEY (update_id, recorded_at)
) PARTITION BY RANGE (recorded_at);

CREATE INDEX idx_location_device_time
    ON LocationUpdate (device_id, recorded_at DESC);

-- Geofences
CREATE TABLE Geofence (
    fence_id    UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    owner_id    UUID NOT NULL,
    name        TEXT NOT NULL,
    center_lat  NUMERIC(9,6) NOT NULL,
    center_lng  NUMERIC(9,6) NOT NULL,
    radius_m    INT NOT NULL,
    is_active   BOOLEAN NOT NULL DEFAULT TRUE
);

-- Geofence events (entry/exit)
CREATE TABLE GeofenceEvent (
    event_id    BIGSERIAL PRIMARY KEY,
    fence_id    UUID NOT NULL REFERENCES Geofence(fence_id),
    device_id   UUID NOT NULL,
    event_type  TEXT NOT NULL CHECK (event_type IN ('enter','exit')),
    occurred_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_geofence_event ON GeofenceEvent (fence_id, occurred_at DESC);

Redis GEO Index for Proximity Queries

Store the latest position of every active device in a Redis GEO set. This enables O(log N) radius queries without touching PostgreSQL.

import redis
from decimal import Decimal

r = redis.Redis(host='redis', port=6379, decode_responses=True)

GEO_KEY = "devices:live"  # Redis GEO sorted set

def update_device_location(device_id: str, lat: float, lng: float):
    """Write to Postgres (durable) and Redis GEO (fast queries)."""
    # Postgres write is async via job queue; Redis is synchronous for query freshness
    r.geoadd(GEO_KEY, [lng, lat, device_id])
    # TTL: expire device from live set after 5 minutes of inactivity
    r.expire(GEO_KEY, 300)  # NOTE: sets TTL on whole key; use per-member approach below

def get_nearby_devices(lat: float, lng: float, radius_m: int) -> list[dict]:
    """Find all devices within radius_m meters of (lat, lng)."""
    results = r.geosearch(
        GEO_KEY,
        longitude=lng,
        latitude=lat,
        radius=radius_m,
        unit='m',
        withcoord=True,
        withdist=True,
        sort='ASC',
        count=100
    )
    # results: [(device_id, distance_m, (lng, lat)), ...]
    return [
        {"device_id": item[0], "distance_m": float(item[1]),
         "lng": item[2][0], "lat": item[2][1]}
        for item in results
    ]

Location Ingestion Pipeline

from dataclasses import dataclass
from datetime import datetime, timezone
import psycopg2

@dataclass
class LocationBatch:
    device_id: str
    user_id: str
    updates: list[tuple[float, float, int, datetime]]  # lat, lng, accuracy, ts

DISTANCE_THRESHOLD_M = 10  # only store updates if device moved > 10m

def haversine_m(lat1, lng1, lat2, lng2) -> float:
    """Approximate distance in meters between two GPS points."""
    from math import radians, sin, cos, sqrt, atan2
    R = 6_371_000
    phi1, phi2 = radians(lat1), radians(lat2)
    dphi = radians(lat2 - lat1)
    dlam = radians(lng2 - lng1)
    a = sin(dphi/2)**2 + cos(phi1)*cos(phi2)*sin(dlam/2)**2
    return R * 2 * atan2(sqrt(a), sqrt(1-a))

def ingest_location_batch(conn, batch: LocationBatch):
    """
    Filter out GPS noise (updates = DISTANCE_THRESHOLD_M:
            filtered.append((batch.device_id, batch.user_id, lat, lng, accuracy, ts))
            last_lat, last_lng = lat, lng

    if not filtered:
        return

    with conn.cursor() as cur:
        cur.executemany(
            """INSERT INTO LocationUpdate
               (device_id, user_id, lat, lng, accuracy_m, recorded_at)
               VALUES (%s, %s, %s, %s, %s, %s)""",
            filtered
        )
    conn.commit()
    # Update Redis GEO with latest position
    last = filtered[-1]
    update_device_location(last[0], last[2], last[3])

Geofence Evaluation

def check_geofences(conn, device_id: str, lat: float, lng: float):
    """
    On each location update, check if the device has entered or exited any geofence.
    Efficient: only evaluate fences whose bounding box contains the point.
    """
    with conn.cursor() as cur:
        # Bounding box pre-filter (avoids computing haversine for every fence)
        degree_per_meter = 1.0 / 111_320
        cur.execute("""
            SELECT fence_id, center_lat, center_lng, radius_m
            FROM Geofence
            WHERE is_active = TRUE
              AND center_lat BETWEEN %s AND %s
              AND center_lng BETWEEN %s AND %s
        """, (
            lat - 0.1, lat + 0.1,   # ~11km bounding box
            lng - 0.1, lng + 0.1
        ))
        candidates = cur.fetchall()

    for fence_id, clat, clng, radius_m in candidates:
        dist = haversine_m(lat, lng, clat, clng)
        inside_now = dist  dict | None:
    with conn.cursor() as cur:
        cur.execute("""
            SELECT event_type FROM GeofenceEvent
            WHERE device_id = %s AND fence_id = %s
            ORDER BY occurred_at DESC LIMIT 1
        """, (device_id, fence_id))
        row = cur.fetchone()
    return {'event_type': row[0]} if row else None

def record_geofence_event(conn, fence_id: str, device_id: str, event_type: str):
    with conn.cursor() as cur:
        cur.execute(
            "INSERT INTO GeofenceEvent (fence_id, device_id, event_type) VALUES (%s,%s,%s)",
            (fence_id, device_id, event_type)
        )
    conn.commit()

Location History and Data Retention

-- Query movement history for a device over a time range
SELECT lat, lng, accuracy_m, recorded_at
FROM LocationUpdate
WHERE device_id = $1
  AND recorded_at BETWEEN $2 AND $3
ORDER BY recorded_at ASC;

-- Partition pruning: Postgres automatically scans only the relevant day partition.
-- Create daily partitions:
-- CREATE TABLE location_updates_2026_04_17 PARTITION OF LocationUpdate
--     FOR VALUES FROM ('2026-04-17') TO ('2026-04-18');

-- Retention policy: drop partitions older than 90 days
-- DROP TABLE location_updates_2026_01_17;  (run monthly via pg_cron)

Key Interview Points

  • Redis GEO vs PostGIS: Redis GEO is a sorted set using geohash encoding — O(log N) radius queries, sub-millisecond latency, but no persistence guarantee. PostGIS ST_DWithin on indexed geometry columns achieves similar performance for read-heavy workloads with full durability. Use Redis for live device positions; PostGIS for historical spatial analysis.
  • GPS noise filtering: Raw GPS updates include jitter of 5–50m. Without filtering, a stationary device generates thousands of meaningless inserts. Threshold: discard updates within 10m of the last stored position. Accuracy_m from the device assists: only trust updates where accuracy_m < 50.
  • Geofence scaling: With 10,000 active fences and 1M devices, evaluating all fences per update is O(fences) = slow. Mitigations: (1) spatial index on Geofence.center using PostGIS and ST_DWithin for the bounding-box prefilter; (2) only evaluate fences “near” the device using a geohash prefix lookup; (3) maintain a per-device “active fence” set in Redis to skip re-evaluation when position hasn’t changed significantly.
  • Write throughput: At 10 updates/sec/device × 100K devices = 1M writes/sec. Use: Kafka for buffering, batch inserts (executemany), and partitioned tables. Avoid per-row transactions; commit in batches of 1,000.
  • Privacy: Location is sensitive. Encrypt at rest (Postgres transparent encryption or column-level encryption for lat/lng). Retention: delete raw location history after 90 days. Anonymize aggregated traces. Require explicit user consent before sharing with third-party services.

{“@context”:”https://schema.org”,”@type”:”FAQPage”,”mainEntity”:[{“@type”:”Question”,”name”:”Why use Redis GEO for live device positions instead of querying PostgreSQL directly?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Redis GEO uses a sorted set where each member is encoded with a geohash score, enabling O(log N) proximity queries (GEORADIUS/GEOSEARCH) with sub-millisecond latency. At 100,000 active devices with location updates every 5 seconds, that is 20,000 writes/second and potentially thousands of proximity reads/second. PostgreSQL with PostGIS handles proximity queries well for historical data but becomes a bottleneck under this combined write and read load. The separation is: Redis GEO holds the current live position of every device (ephemeral, fast); PostgreSQL stores the historical location log (durable, queryable with range filters). On Redis restart, Redis GEO is rebuilt from the most recent PostgreSQL row per device at startup.”}},{“@type”:”Question”,”name”:”How do you prevent GPS jitter from flooding the database with meaningless updates?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Raw GPS on a stationary device reports positions that vary by 5–50m due to satellite geometry and atmospheric interference. Without filtering, a device sitting still generates thousands of identical-looking inserts per hour. Filter logic: compute haversine distance between the new position and the last stored position for that device. Only write to the database if the device moved more than 10 meters. Store last_known_position per device in Redis (a simple hash: HSET device:{id} lat 37.7749 lng -122.4194) to avoid a DB lookup on every update. The threshold (10m) is configurable — delivery apps need 5m precision, logistics fleet tracking is fine with 100m. Also filter by accuracy: discard GPS readings with accuracy_m > 100 (poor signal quality).”}},{“@type”:”Question”,”name”:”How does geofence evaluation scale with millions of active devices?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Naive approach: on every location update, iterate all active geofences and compute haversine distance — O(fences * devices_per_second) = unacceptable at scale. Scalable approach: (1) Spatial index on Geofence.center using PostGIS GEOGRAPHY type and GIST index; use ST_DWithin for bounding-box pre-filter — reduces candidates from 10,000 fences to ~10 nearby ones. (2) Geohash-bucketed fences: index fences by the geohash cells they overlap; on each location update, look up only the fences in the device’s current geohash cell (and neighbors). (3) Event-driven evaluation: only evaluate geofences when the device has moved significantly (>50m from last evaluation point) — eliminates stationary device evaluation entirely.”}},{“@type”:”Question”,”name”:”How do you model and query movement traces for a device over a time range?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Store LocationUpdate with (device_id, lat, lng, recorded_at) partitioned by day. Query: SELECT lat, lng, recorded_at FROM LocationUpdate WHERE device_id = $1 AND recorded_at BETWEEN $2 AND $3 ORDER BY recorded_at ASC. Partition pruning means only the relevant day partitions are scanned — a query for a 3-day window touches 3 partitions instead of the full table. For visualization (drawing a route on a map): the ordered lat/lng sequence is the trace. For distance traveled: sum haversine between consecutive points. For speed: distance between points divided by elapsed time. Downsample for display: for traces with thousands of points, apply the Ramer-Douglas-Peucker algorithm to reduce to 200 points while preserving the shape.”}},{“@type”:”Question”,”name”:”How do you expire stale device positions from the Redis GEO set?”,”acceptedAnswer”:{“@type”:”Answer”,”text”:”Redis GEO sorted sets don’t support per-member TTLs — you cannot set a TTL on a single device’s position. Workaround: maintain a separate Redis sorted set (devices:active) scored by last_seen_timestamp. On each location update, ZADD devices:active {unix_timestamp} {device_id}. A background job runs every minute: ZRANGEBYSCORE devices:active 0 {5_minutes_ago}, gets a list of stale device IDs, and removes them from both devices:active and the GEO key: ZREM devices:live {device_id}. This ensures the live map doesn’t show devices that have been offline for 5+ minutes. Alternatively, use a Lua script for atomic check-and-remove.”}}]}

Location tracking and GPS data pipeline design is discussed in Uber system design interview questions.

Location tracking and real-time geofencing design is covered in Lyft system design interview preparation.

Location tracking and proximity detection design is discussed in Snap system design interview guide.

Scroll to Top