Knowledge Base System Low-Level Design: Article Versioning, Full-Text Search, and Feedback Loop

Knowledge Base System Low-Level Design

A knowledge base must do three things well: let authors maintain accurate articles without losing history, let users find relevant content quickly, and continuously improve based on whether users found the content helpful. This article covers article versioning with an immutable version history, full-text search using PostgreSQL tsvector with ranking, category and tag taxonomy, helpful/unhelpful feedback aggregation, and related article suggestions via tag overlap scoring.

Article Versioning Design

The core invariant: published versions are immutable. An article is a pointer to its current published version; editors create new draft versions that replace the pointer on publish. This gives you a complete audit trail and the ability to roll back without any soft-delete trickery:

CREATE TABLE article (
    id              BIGSERIAL PRIMARY KEY,
    slug            TEXT        NOT NULL UNIQUE,
    current_version_id BIGINT,                    -- FK set after first publish
    status          TEXT        NOT NULL DEFAULT 'draft'
                                CHECK (status IN ('draft','published','archived')),
    created_by      BIGINT      NOT NULL,
    created_at      TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    published_at    TIMESTAMPTZ,
    archived_at     TIMESTAMPTZ
);

CREATE TABLE article_version (
    id              BIGSERIAL PRIMARY KEY,
    article_id      BIGINT      NOT NULL REFERENCES article(id),
    version_number  INT         NOT NULL,
    title           TEXT        NOT NULL,
    body_markdown   TEXT        NOT NULL,
    body_html       TEXT        NOT NULL,          -- pre-rendered HTML
    change_summary  TEXT,
    author_id       BIGINT      NOT NULL,
    created_at      TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    search_vector   TSVECTOR,                      -- populated by trigger
    UNIQUE (article_id, version_number)
);

-- Circular FK: article.current_version_id → article_version.id
ALTER TABLE article ADD CONSTRAINT fk_current_version
    FOREIGN KEY (current_version_id) REFERENCES article_version(id)
    DEFERRABLE INITIALLY DEFERRED;

CREATE INDEX idx_av_article ON article_version (article_id, version_number DESC);
CREATE INDEX idx_av_search  ON article_version USING gin (search_vector);

CREATE TABLE article_tag (
    article_id  BIGINT NOT NULL REFERENCES article(id) ON DELETE CASCADE,
    tag         TEXT   NOT NULL,
    PRIMARY KEY (article_id, tag)
);

CREATE INDEX idx_at_tag ON article_tag (tag);

CREATE TABLE article_feedback (
    id          BIGSERIAL PRIMARY KEY,
    article_id  BIGINT      NOT NULL REFERENCES article(id),
    version_id  BIGINT      NOT NULL REFERENCES article_version(id),
    user_id     BIGINT,                            -- NULL for anonymous
    session_id  TEXT        NOT NULL,
    helpful     BOOLEAN     NOT NULL,
    comment     TEXT,
    created_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    UNIQUE (article_id, session_id)               -- one vote per session
);

CREATE INDEX idx_af_article ON article_feedback (article_id, created_at DESC);

tsvector Full-Text Search Setup

PostgreSQL’s tsvector stores pre-processed lexemes for fast GIN index lookups. Populate it via trigger so it stays current automatically:

CREATE OR REPLACE FUNCTION update_article_search_vector()
RETURNS TRIGGER LANGUAGE plpgsql AS $$
BEGIN
    NEW.search_vector :=
        setweight(to_tsvector('english', coalesce(NEW.title, '')),   'A') ||
        setweight(to_tsvector('english', coalesce(NEW.body_markdown, '')), 'B');
    RETURN NEW;
END;
$$;

CREATE TRIGGER trg_article_search_vector
BEFORE INSERT OR UPDATE OF title, body_markdown ON article_version
FOR EACH ROW EXECUTE FUNCTION update_article_search_vector();

Full-Text Search with Ranking

from typing import List, Dict, Any, Optional

def search_articles(
    db,
    query: str,
    tag: Optional[str] = None,
    limit: int = 20,
    offset: int = 0
) -> List[Dict[str, Any]]:
    """
    Full-text search over current published versions.
    Returns results sorted by ts_rank_cd (cover density ranking).
    """
    if not query.strip():
        return []

    # Convert user query to tsquery; use plainto_tsquery for natural language
    params: list = [query, query]
    tag_clause = ""
    if tag:
        tag_clause = "AND EXISTS (SELECT 1 FROM article_tag WHERE article_id=a.id AND tag=%s)"
        params.append(tag)

    params += [limit, offset]

    rows = db.fetchall(
        f"""
        SELECT
            a.id,
            a.slug,
            av.title,
            av.version_number,
            ts_rank_cd(av.search_vector, plainto_tsquery('english', %s)) AS rank,
            ts_headline('english', av.body_markdown,
                        plainto_tsquery('english', %s),
                        'MaxWords=35, MinWords=15, StartSel=, StopSel='
            ) AS excerpt,
            a.published_at,
            COALESCE(f.helpful_count, 0)   AS helpful_count,
            COALESCE(f.unhelpful_count, 0) AS unhelpful_count
        FROM article a
        JOIN article_version av ON av.id = a.current_version_id
        LEFT JOIN (
            SELECT article_id,
                   COUNT(*) FILTER (WHERE helpful=TRUE)  AS helpful_count,
                   COUNT(*) FILTER (WHERE helpful=FALSE) AS unhelpful_count
            FROM article_feedback
            GROUP BY article_id
        ) f ON f.article_id = a.id
        WHERE a.status = 'published'
          AND av.search_vector @@ plainto_tsquery('english', %s)
          {tag_clause}
        ORDER BY rank DESC, a.published_at DESC
        LIMIT %s OFFSET %s
        """,
        (query, query, query) + ((tag,) if tag else ()) + (limit, offset)
    )
    return [dict(r) for r in rows]

Publishing a New Version

def publish_version(
    db,
    article_id: int,
    title: str,
    body_markdown: str,
    author_id: int,
    change_summary: str = ''
) -> int:
    """
    Creates a new immutable article_version and updates the article pointer.
    Returns new version_id.
    """
    import markdown   # pip install markdown

    body_html = markdown.markdown(body_markdown, extensions=['tables', 'fenced_code', 'toc'])

    # Next version number
    row = db.fetchone(
        "SELECT COALESCE(MAX(version_number), 0) + 1 AS next_v FROM article_version WHERE article_id=%s",
        (article_id,)
    )
    next_v = row['next_v']

    # Insert immutable version (trigger populates search_vector)
    db.execute(
        """INSERT INTO article_version
           (article_id, version_number, title, body_markdown, body_html, author_id, change_summary)
           VALUES (%s,%s,%s,%s,%s,%s,%s)""",
        (article_id, next_v, title, body_markdown, body_html, author_id, change_summary)
    )
    version_id = db.fetchone("SELECT lastval()")['lastval']

    # Update current pointer and mark published (DEFERRABLE FK allows this order)
    db.execute(
        """UPDATE article
           SET current_version_id=%s, status='published', published_at=NOW()
           WHERE id=%s""",
        (version_id, article_id)
    )
    db.commit()
    return version_id
def get_related_articles(db, article_id: int, limit: int = 5) -> List[Dict]:
    """
    Returns articles sharing the most tags with the given article,
    excluding the article itself.
    """
    return db.fetchall(
        """
        SELECT
            a.id,
            a.slug,
            av.title,
            COUNT(*)::int AS shared_tags,
            a.published_at
        FROM article_tag source_tag
        JOIN article_tag related_tag ON related_tag.tag = source_tag.tag
                                    AND related_tag.article_id != source_tag.article_id
        JOIN article a      ON a.id = related_tag.article_id AND a.status = 'published'
        JOIN article_version av ON av.id = a.current_version_id
        WHERE source_tag.article_id = %s
        GROUP BY a.id, a.slug, av.title, a.published_at
        ORDER BY shared_tags DESC, a.published_at DESC
        LIMIT %s
        """,
        (article_id, limit)
    )

Feedback Aggregation and Usefulness Score

A simple helpful/unhelpful ratio lets you surface low-quality articles for editorial review:

def record_feedback(db, article_id: int, version_id: int,
                    session_id: str, helpful: bool,
                    user_id: Optional[int] = None,
                    comment: str = '') -> bool:
    """
    Records feedback. Returns False if session already voted (idempotent).
    """
    try:
        db.execute(
            """INSERT INTO article_feedback
               (article_id, version_id, session_id, helpful, user_id, comment)
               VALUES (%s,%s,%s,%s,%s,%s)""",
            (article_id, version_id, session_id, helpful, user_id, comment or None)
        )
        db.commit()
        return True
    except Exception:
        db.rollback()
        return False   # duplicate session_id


def get_low_quality_articles(db, min_votes: int = 20, max_helpful_ratio: float = 0.40):
    """
    Returns articles where fewer than 40% of votes are helpful
    and total votes exceed threshold — candidates for revision.
    """
    return db.fetchall(
        """
        SELECT a.id, a.slug, av.title,
               COUNT(*) AS total_votes,
               ROUND(AVG(CASE WHEN helpful THEN 1.0 ELSE 0.0 END), 3) AS helpful_ratio
        FROM article_feedback af
        JOIN article a ON a.id = af.article_id
        JOIN article_version av ON av.id = a.current_version_id
        WHERE a.status = 'published'
        GROUP BY a.id, a.slug, av.title
        HAVING COUNT(*) >= %s
           AND AVG(CASE WHEN helpful THEN 1.0 ELSE 0.0 END) <= %s
        ORDER BY helpful_ratio ASC
        """,
        (min_votes, max_helpful_ratio)
    )

See also: Atlassian Interview Guide

See also: Netflix Interview Guide 2026: Streaming Architecture, Recommendation Systems, and Engineering Excellence

See also: Airbnb Interview Guide 2026: Search Systems, Trust and Safety, and Full-Stack Engineering

Scroll to Top