In [17]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os
from app.database import SessionLocal, Base, engine
from app.models import *
from app import models
from app.utils import hash_password, reset_db


import random
import uuid
from typing import List, Dict, Set, Tuple

import numpy as np
from faker import Faker
from app.models import (
    User,
    Group,
    GroupMembership,
    Report,
    Contest,
    ContestParticipation,
    Role,
)

In [18]:
def random_subset(n, k):
    return random.sample([i for i in range(n)], k)

def pick_random_subset(population: list, k: int) -> list:
    """k distinct elements from population (k may be 0)."""
    if k == 0:
        return []
    return random.sample(population, k)

def get_mock_contest_standings(cid, include_users=None):
    if include_users is None:
        include_users = []
    res = {
        'contest': {
            'id': cid,
            'name': f'Codeforces Round {cid}',
            'type': 'CF',
            'phase': 'FINISHED',
            'frozen': False,
            'durationSeconds': 7200,
            'startTimeSeconds': 1746110100,
            'relativeTimeSeconds': 267545
        },
        'standings': [
        ]
    }
    num_participants = 8000
    standings = [
        {'handle': 'random-user'+str(i+1), 'rank': (i+1), 'points': 9754 - i, 'penalty': 0}
        for i in range(num_participants)
    ]
    res['standings'] = standings
    udx = pick_random_subset([i for i in range(num_participants)], len(include_users))

    for i in range(len(udx)):
        res['standings'][udx[i]]['handle'] = include_users[i]
    
    return res


In [19]:
"""
    simulate what happens at the actual database level when:
    
        - actual users register
        - a group is created by a user
        - a group moderator invites someone to join a group
        - a user requests to join a group
        - a group moderator accepts a pending join request
        - a user accepts a group join invite
        - a user is kicked out of a group
        - a user leaves a group

        - cron job pulls an upcoming contest from codeforces (or any other platform) - contests will be available for registration AFTER it gets pulled and added to db
        - a user registers for a contest within a group
        - a user revokes his contest registration within a group (can only be performed if time.now() < contest.start_date)
        - cron job pulls contest standings from cf and initiates rating recalc per group


        - a user creates a report object
        - a moderator/admin resolves a report object
        - ratings rollback for successful reports?
        
        - an announcement is created by siteadmins
"""

'\n    simulate what happens at the actual database level when:\n\n        - actual users register\n        - a group is created by a user\n        - a group moderator invites someone to join a group\n        - a user requests to join a group\n        - a group moderator accepts a pending join request\n        - a user accepts a group join invite\n        - a user is kicked out of a group\n        - a user leaves a group\n\n        - cron job pulls an upcoming contest from codeforces (or any other platform) - contests will be available for registration AFTER it gets pulled and added to db\n        - a user registers for a contest within a group\n        - a user revokes his contest registration within a group (can only be performed if time.now() < contest.start_date)\n        - cron job pulls contest standings from cf and initiates rating recalc per group\n\n\n        - a user creates a report object\n        - a moderator/admin resolves a report object\n        - ratings rollback for 

In [20]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
Faker.seed(SEED)
faker = Faker()

NUM_USERS = 5_000
NUM_GROUPS = 30
NUM_CONTESTS = 5
NUM_PARTICIPATIONS = 15_000
NUM_REPORTS = 100
NUM_ANNOUNCEMENTS = 30

DEFAULT_PASS = "devpass"

In [21]:
reset_db()
Base.metadata.create_all(bind=engine)

db = SessionLocal()

dropping all tables...
all tables dropped.
creating tables from models...
schema rebuilt.


In [22]:
# POPULATE USERS
# create admins and common group
admin_users = [
    User(
        user_id="shrey",
        role=Role.admin,
        cf_handle="negative-xp",
        atcoder_handle=None,
        codechef_handle=None,
        twitter_handle=None,
        trusted_score=88,
        hashed_password=hash_password(DEFAULT_PASS),
    ),
    User(
        user_id="ani",
        role=Role.admin,
        cf_handle="roomTemperatureIQ",
        atcoder_handle=None,
        codechef_handle=None,
        twitter_handle=None,
        trusted_score=88,
        hashed_password=hash_password(DEFAULT_PASS),
    ),
]

users = admin_users

# create 5000 users
while len(users) < NUM_USERS:
    uid = 'testUser' + str(len(users)-1)
    users.append(
        User(
            user_id=uid,
            role=Role.user,
            cf_handle=uid + '_cf',
            atcoder_handle=None if random.random() < 0.5 else uid + "_ac",
            codechef_handle=None if random.random() < 0.7 else uid + "_cc",
            twitter_handle=None if random.random() < 0.6 else uid + "_tw",
            trusted_score=random.randint(0, 100),
            hashed_password=hash_password(DEFAULT_PASS),
        )
    )

db.add_all(users)
db.commit()

user_df = pd.read_sql("SELECT * FROM users", engine)
print(user_df.shape)
user_df.head()

(5000, 10)


Unnamed: 0,user_id,role,create_date,cf_handle,atcoder_handle,codechef_handle,twitter_handle,internal_default_rated,trusted_score,hashed_password
0,shrey,admin,2025-05-11 23:30:34.046342,negative-xp,,,,True,88,3mBOVhHuzUL15t75XORafCY1BWHbKFo8WAgXA6d5qu2qAZ...
1,ani,admin,2025-05-11 23:30:34.046342,roomTemperatureIQ,,,,True,88,Dhmk8tks5U6+m32DCcUX9nEr2u7Cr68nMXixLRRxS42/9k...
2,testUser1,user,2025-05-11 23:30:34.046342,testUser1_cf,testUser1_ac,,,True,28,YKFNrzcmsWSt61aYoR0z1zHShHb4kTZSK3L+tNX6DcfaiT...
3,testUser2,user,2025-05-11 23:30:34.046342,testUser2_cf,,,testUser2_tw,True,69,mFcwCnJuU1eqbH1xI275Mh9OHvMlCzkpvvYWhk2LgCCv9E...
4,testUser3,user,2025-05-11 23:30:34.046342,testUser3_cf,,,,True,27,Hwnezl61kz3VypU9NfN1JeDnHFzgsA+OmhQRn/QniGbdnC...


In [23]:
# POPULATE GROUPS

common_group = Group(
    group_id="main",
    group_name="main",
    group_description="group consisting of ALL users",
    is_private=False,
)

groups = [common_group]
for g_idx in range(1, NUM_GROUPS):
    g_id = f"g{g_idx:02d}"
    g_name = faker.unique.catch_phrase().lower().replace(" ", "‑")
    is_priv = random.random() < 0.3
    group = Group(
        group_id=g_id,
        group_name=g_name,
        group_description=faker.sentence(nb_words=10),
        is_private=is_priv,
    )
    groups.append(group)

db.add_all(groups)
db.commit()

group_df = pd.read_sql("SELECT * FROM groups", engine)
print(group_df.shape)
group_df.head()

(30, 5)


Unnamed: 0,group_id,group_name,group_description,is_private,create_date
0,main,main,group consisting of ALL users,False,2025-05-11 23:30:35.989350
1,g01,sharable‑bifurcated‑algorithm,Each cause bill scientist nation opportunity a...,False,2025-05-11 23:30:35.989350
2,g02,robust‑4thgeneration‑open‑architecture,Respond red information last everything thank ...,False,2025-05-11 23:30:35.989350
3,g03,optimized‑global‑focus‑group,Democratic shake bill here grow gas enough.,False,2025-05-11 23:30:35.989350
4,g04,balanced‑upward-trending‑knowledgebase,By two bad fall pick those gun court attorney ...,False,2025-05-11 23:30:35.989350


In [8]:
# POPULATE MEMBERSHIPS
memberships = []


# common group should have ALL users
memberships.append(GroupMembership(
    user_id=users[0].user_id,
    group_id=groups[0].group_id,
    role=Role.admin,
    user_group_rating=1500,
    user_group_max_rating=1500,
))

for i in range(1, NUM_USERS):
    memberships.append(
        GroupMembership(
            user_id=users[i].user_id,
            group_id=groups[0].group_id,
            role=Role.user,
            user_group_rating=1500,
            user_group_max_rating=1500,
        )
    )

# add memberships to other groups
size_palette = [5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597]
while len(size_palette) < NUM_GROUPS - 1:
    size_palette.append(random.randint(10, 800))
random.shuffle(size_palette)

user_idx = [i for i in range(NUM_USERS)]

for i in range(1, NUM_GROUPS):
    grp_size = size_palette[i-1]
    members = pick_random_subset(user_idx, grp_size)
    # admin for this group
    memberships.append(
        GroupMembership(
            user_id = users[members[0]].user_id,
            group_id = groups[i].group_id,
            role=Role.admin,
            user_group_rating=1500,
            user_group_max_rating=1500, 
        )
    )
    for j in members[1:]:
        memberships.append(
            GroupMembership(
                user_id=users[j].user_id,
                group_id=groups[i].group_id,
                role=Role.user,
                user_group_rating=1500,
                user_group_max_rating=1500,
            )
        )

db.add_all(memberships)
db.commit()

membership_df = pd.read_sql("SELECT * FROM group_memberships", engine)
print(membership_df.shape)
membership_df.head()

(15335, 6)


Unnamed: 0,user_id,group_id,role,user_group_rating,user_group_max_rating,status
0,shrey,main,admin,1500,1500,active
1,ani,main,user,1500,1500,active
2,testUser1,main,user,1500,1500,active
3,testUser2,main,user,1500,1500,active
4,testUser3,main,user,1500,1500,active


In [9]:
# populate CONTESTS
contests = []
for i in range(NUM_CONTESTS):
    contests.append(
        Contest(
            contest_id = f"c{i+1}",
            cf_contest_id = 1000 + i,
            finished = False,
        )
    )

db.add_all(contests)
db.commit()

contest_df = pd.read_sql("SELECT * FROM contests", engine)
print(contest_df.shape)
contest_df.head()

(5, 4)


Unnamed: 0,contest_id,cf_contest_id,cf_standings,finished
0,c1,1000,,False
1,c2,1001,,False
2,c3,1002,,False
3,c4,1003,,False
4,c5,1004,,False


In [10]:
# populate contest participations
participations = []

for contest in contests:
    for group in groups:
        members = group.memberships
        n_members = len(members)
        parts = random_subset(n_members, int(3*n_members//4))
        for ii in parts:
            participations.append(
                ContestParticipation(
                    user_id = members[ii].user_id,
                    group_id = group.group_id,
                    contest_id = contest.contest_id,
                    rating_before = members[ii].user_group_rating,
                )
            )

db.add_all(participations)
db.commit()

participation_df = pd.read_sql("SELECT * FROM contest_participations", engine)
print(participation_df.shape)
participation_df.head()

(57440, 6)


Unnamed: 0,user_id,group_id,contest_id,rank,rating_before,rating_after
0,testUser1105,main,c1,,1500,
1,testUser2537,main,c1,,1500,
2,testUser3091,main,c1,,1500,
3,testUser796,main,c1,,1500,
4,testUser3208,main,c1,,1500,


In [11]:
# populate reports
n_parts = len(participations)
reports = []

while len(reports) < NUM_REPORTS:
    idx = int(random.random() * n_parts)
    rp = participations[idx]
    
    grp = db.query(Group).filter(Group.group_id == rp.group_id).all()[0]
    members = grp.memberships
    reporter = members[int(len(members) * random.random())]
    reports.append(
        Report(
            report_id = f"report{len(reports)}",
            group_id = rp.group_id,
            contest_id = rp.contest_id,
            reporter_user_id = reporter.user_id,
            respondent_user_id = rp.user_id,
            report_description = faker.sentence(nb_words=12)
        )
    )

db.add_all(reports)
db.commit()

report_df = pd.read_sql("SELECT * FROM reports", engine)
print(report_df.shape)
report_df.head()

(100, 10)


Unnamed: 0,report_id,group_id,contest_id,reporter_user_id,respondent_user_id,report_description,create_date,resolved,resolved_by,resolve_message
0,report0,main,c5,testUser2970,testUser3329,Voice boy wife condition while enter board its...,2025-05-11 19:12:01.214934,False,,
1,report1,g18,c1,testUser239,testUser1920,Tonight couple and job mind southern rather vo...,2025-05-11 19:12:01.214934,False,,
2,report2,g22,c5,testUser4662,testUser1994,Finish summer rest feel finally impact I fast ...,2025-05-11 19:12:01.214934,False,,
3,report3,main,c2,testUser4446,testUser1329,Fight decision size parent focus kid put.,2025-05-11 19:12:01.214934,False,,
4,report4,g25,c2,testUser4237,testUser4596,List top somebody college be middle plan frien...,2025-05-11 19:12:01.214934,False,,


In [12]:
# populate announcememts

announcements = []
for i in range(NUM_ANNOUNCEMENTS):
    announcements.append(
        Announcement(
            announcement_id = f"anmt{i}",
            group_id = groups[int(random.random() * len(groups))].group_id,
            title = faker.sentence(nb_words=7),
            content = faker.sentence(nb_words=100)
        )
    )

db.add_all(announcements)
db.commit()

announcement_df = pd.read_sql("SELECT * FROM announcements", engine)
print(announcement_df.shape)
announcement_df.head()

(30, 5)


Unnamed: 0,announcement_id,group_id,create_date,title,content
0,anmt0,g14,2025-05-11 19:12:02.961840,Consider whom item treat area buy check clearl...,Generation wait thus suffer economy play nearl...
1,anmt1,g06,2025-05-11 19:12:02.961840,No guy eye hit late near stay perhaps particul...,Window hour some fund voice sense current meet...
2,anmt2,g10,2025-05-11 19:12:02.961840,Machine whatever everything fear walk word sid...,First give value somebody event business quali...
3,anmt3,g24,2025-05-11 19:12:02.961840,Reduce tree serious soon stay seven quite.,Their bank land region back nor article natura...
4,anmt4,g17,2025-05-11 19:12:02.961840,Help painting always authority source onto.,Describe decade trade field training deep coup...


In [24]:
# update ratings for a contest across ALL groups

# TODO