In [1]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os
from app.database import SessionLocal, Base, engine
from app.models import *
from app import models
from app.utils import hash_password, reset_db


import random
import uuid
from typing import List, Dict, Set, Tuple

import numpy as np
from faker import Faker
from app.models import (
    User,
    Group,
    GroupMembership,
    Report,
    Contest,
    ContestParticipation,
    Role,
)

In [2]:

"""devseed.py – deterministic dev database seeder

run this once after `pip install -r requirements.txt` and after your
`DATABASE_URL` env‑var points at a *local* database you are happy to wipe.

    python devseed.py

it will:
    • drop & recreate all tables (via `reset_db()`)
    • insert               5 000 users
    • insert                  30 groups (schmuckstaffel + 29 random)
    • insert             15 000 contest_participations spread over 30 contests
    • insert               1 000 reports (reporter & respondent share a group)
all random draws are behind `SEED = 42`, so reruns are identical.
"""

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
Faker.seed(SEED)
faker = Faker()

NUM_USERS = 5_000
NUM_GROUPS = 30  # includes the global schmuckstaffel
NUM_CONTESTS = 30
NUM_PARTICIPATIONS = 15_000
NUM_REPORTS = 1_000

DEFAULT_PASS = "devpass"

# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------

def unique_handles(n: int) -> List[str]:
    """return *n* unique cf‑style handles."""
    handles: Set[str] = set()
    while len(handles) < n:
        handle = faker.unique.user_name()  # faker.unique ensures no dup.
        handles.add(handle.lower())
    return list(handles)


def pick_random_subset(population: List[str], k: int) -> List[str]:
    """k distinct elements from population (k may be 0)."""
    if k == 0:
        return []
    return random.sample(population, k)


# ---------------------------------------------------------------------------
# wipe & boot db
# ---------------------------------------------------------------------------

reset_db()  # drops everything & recreates metadata
Base.metadata.create_all(bind=engine)

db = SessionLocal()

# ---------------------------------------------------------------------------
# users
# ---------------------------------------------------------------------------

print("⏳  generating users …")

admin_users = [
    User(
        user_id="shrey",
        role=Role.admin,
        cf_handle="negative-xp",
        atcoder_handle=None,
        codechef_handle=None,
        twitter_handle=None,
        trusted_score=88,
        hashed_password=hash_password(DEFAULT_PASS),
    ),
    User(
        user_id="ani",
        role=Role.admin,
        cf_handle="roomTemperatureIQ",
        atcoder_handle=None,
        codechef_handle=None,
        twitter_handle=None,
        trusted_score=88,
        hashed_password=hash_password(DEFAULT_PASS),
    ),
]

remaining = NUM_USERS - len(admin_users)
handles = unique_handles(remaining)

users: List[User] = admin_users[:]
for i in range(remaining):
    uid = f"u{i:04d}"  # u0000 … u4997
    users.append(
        User(
            user_id=uid,
            role=Role.user,
            cf_handle=handles[i],
            atcoder_handle=None if random.random() < 0.5 else handles[i] + "_ac",
            codechef_handle=None if random.random() < 0.7 else handles[i] + "_cc",
            twitter_handle=None if random.random() < 0.6 else handles[i] + "_tw",
            trusted_score=random.randint(0, 100),
            hashed_password=hash_password(DEFAULT_PASS),
        )
    )

db.add_all(users)

db.flush()  # users must exist before memberships

user_ids = [u.user_id for u in users]

# ---------------------------------------------------------------------------
# groups
# ---------------------------------------------------------------------------

print("⏳  generating groups & memberships …")

groups: List[Group] = []
memberships: List[GroupMembership] = []

# 0. the global group
schmuck = Group(
    group_id="schmuckstaffel",
    group_name="schmuckstaffel",
    group_description="global umbrella group",
    is_private=False,
)
groups.append(schmuck)

# every user is in schmuckstaffel
admin_for_schmuck = admin_users[0]  # shrey
memberships.append(
    GroupMembership(
        user_id=admin_for_schmuck.user_id,
        group_id=schmuck.group_id,
        role=Role.admin,
        user_group_rating=1500,
        user_group_max_rating=1500,
    )
)

for uid in user_ids:
    if uid == admin_for_schmuck.user_id:
        continue  # already inserted
    memberships.append(
        GroupMembership(
            user_id=uid,
            group_id=schmuck.group_id,
            role=Role.user,
            user_group_rating=1500,
            user_group_max_rating=1500,
        )
    )

# 1‑29. the remaining groups with variable sizes
size_palette = [5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597]
while len(size_palette) < NUM_GROUPS - 1:
    size_palette.append(random.randint(10, 800))
random.shuffle(size_palette)

for g_idx in range(1, NUM_GROUPS):
    g_id = f"g{g_idx:02d}"
    g_name = faker.unique.catch_phrase().lower().replace(" ", "‑")
    is_priv = random.random() < 0.3
    group = Group(
        group_id=g_id,
        group_name=g_name,
        group_description=faker.sentence(nb_words=10),
        is_private=is_priv,
    )
    groups.append(group)

    # choose members
    g_size = size_palette[g_idx - 1]
    g_members = pick_random_subset(user_ids, g_size)

    # ensure there is an admin distinct per group
    admin_uid = random.choice(g_members)
    mod_count = random.randint(0, min(9, g_size - 1))
    mod_uids = random.sample([uid for uid in g_members if uid != admin_uid], mod_count)

    for uid in g_members:
        role = Role.user
        if uid == admin_uid:
            role = Role.admin
        elif uid in mod_uids:
            role = Role.moderator
        memberships.append(
            GroupMembership(
                user_id=uid,
                group_id=g_id,
                role=role,
                user_group_rating=random.randint(800, 2600),
                user_group_max_rating=random.randint(1200, 3000),
            )
        )

# persist groups & memberships

db.add_all(groups)
db.add_all(memberships)
db.flush()

# ---------------------------------------------------------------------------
# contests & participations
# ---------------------------------------------------------------------------

print("⏳  generating contests & participations …")

contests: List[Contest] = []
for c_idx in range(NUM_CONTESTS):
    contests.append(
        Contest(
            contest_id=f"c{c_idx:02d}",
            cf_contest_id=1000 + c_idx,
        )
    )

db.add_all(contests)
db.flush()

# map group_id -> [user_ids] for fast sampling
from collections import defaultdict

gid_to_users: Dict[str, List[str]] = defaultdict(list)
for m in memberships:
    gid_to_users[m.group_id].append(m.user_id)

duplicate_guard: Set[Tuple[str, str, str]] = set()
participations: List[ContestParticipation] = []

number_of_parts = dict()

while len(participations) < NUM_PARTICIPATIONS:
    contest = random.choice(contests)
    group = random.choice(groups)
    user_pool = gid_to_users[group.group_id]
    if not user_pool:
        continue  # group with zero members (unlikely but guard)
    user = random.choice(user_pool)
    key = (user, group.group_id, contest.contest_id)
    if key in duplicate_guard:
        continue
    duplicate_guard.add(key)
    participations.append(
        ContestParticipation(
            user_id=user,
            group_id=group.group_id,
            contest_id=contest.contest_id,
            user_group_rating_before=None,
            user_group_rating_after=None,
            user_group_rank = number_of_parts.get((group.group_id, contest.contest_id), 0)+1,
        )
    )
    number_of_parts[(group.group_id, contest.contest_id)] = number_of_parts.get((group.group_id, contest.contest_id), 0)+1

db.add_all(participations)

# ---------------------------------------------------------------------------
# reports
# ---------------------------------------------------------------------------

print("⏳  generating reports …")

reports: List[Report] = []
for _ in range(NUM_REPORTS):
    grp = random.choice(groups)
    members = gid_to_users[grp.group_id]
    if len(members) < 2:
        continue  # need at least two distinct users
    reporter, respondent = random.sample(members, 2)
    resolved = random.random() < 0.4
    reports.append(
        Report(
            report_id=str(uuid.uuid4()),
            group_id=grp.group_id,
            contest_id=random.choice(contests).contest_id,
            reporter_user_id=reporter,
            respondent_user_id=respondent,
            report_description=faker.sentence(nb_words=12),
            resolved=resolved,
            resolve_message="resolved" if resolved else "pending",
        )
    )

db.add_all(reports)

# ---------------------------------------------------------------------------
# commit & done
# ---------------------------------------------------------------------------

print("💾  committing to db …")
db.commit()
db.close()
print("✅  devseed loaded (deterministic, seed=42)")


dropping all tables...
all tables dropped.
creating tables from models...
schema rebuilt.
⏳  generating users …
⏳  generating groups & memberships …
⏳  generating contests & participations …
⏳  generating reports …
💾  committing to db …
✅  devseed loaded (deterministic, seed=42)


In [4]:
df = pd.read_sql("SELECT * FROM users", engine)
print(df.shape)
df.head()

(5000, 9)


Unnamed: 0,user_id,role,cf_handle,atcoder_handle,codechef_handle,twitter_handle,internal_default_rated,trusted_score,hashed_password
0,shrey,admin,negative-xp,,,,True,88,uExYGkSiyIlrSvQGeWuAvFhAbmAWBlE5Tc/EbpPeiepKPC...
1,ani,admin,roomTemperatureIQ,,,,True,88,n5dKAcwDjKVBMJIO54LMLXxKBv/YIm/lRDfJ1qPnvwT//G...
2,u0000,user,perryrobert,perryrobert_ac,,,True,28,7u9i+lkS1+r+TZCPyxXkxWJkkzwJbjKUnR7ytDbYskuwKu...
3,u0001,user,imoore,,,imoore_tw,True,69,CCpkp0Sj44Y0GOkGkWWDOLOASjOz5FLIqkGYFe3C4tHXah...
4,u0002,user,dannysmith,,,,True,27,NuNXAQV5DOXplizkgc+GaT/higCzhj9HlfNJWdI+T6mWnx...


In [5]:
reports = pd.read_sql("SELECT * FROM reports", engine)
reports.head()

Unnamed: 0,report_id,group_id,contest_id,reporter_user_id,respondent_user_id,report_description,create_date,resolved,resolve_message
0,207697f0-8711-4a86-9d8c-16010fdc0c67,g21,c06,u0645,u0173,Blood use ok teacher probably political consid...,2025-05-04 18:49:43.748048,False,pending
1,7e3f7867-362f-43e5-a81f-c97e92a1c33d,g20,c02,u1011,u2950,Heavy national result very investment enter me...,2025-05-04 18:49:43.748048,True,resolved
2,403e1152-a2b0-4a87-ab27-95e2dda56b0a,g03,c13,u0151,u0712,Image my by maybe off smile develop movement m...,2025-05-04 18:49:43.748048,False,pending
3,958a540e-e7e8-40a3-8397-3d73a94b8069,g22,c27,u1154,u1228,Knowledge office interesting perhaps as feel D...,2025-05-04 18:49:43.748048,True,resolved
4,b1859028-3c49-4a92-b9c6-9d6cc8de96ec,g16,c10,u4906,u4723,Resource blue degree rest thus game wonder ana...,2025-05-04 18:49:43.748048,False,pending
