In [16]:
import re
import json
import time
import pickle
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs

MAX_API_REQUESTS = 5
API_REQUESTS_INTERVAL = 1
MIN_CONTESTS = 5


def GetRequestStatusOk(res):
    if res["status"] != "OK":
        return False
    return True


def GetRequestBody(res):
    return res["result"]


def GetRequest(method):
    BlockAPICalls()
    res = requests.get("https://codeforces.com/api/" + method)
    if not res:
        print("Unexpected status code:", str(res.status_code))
    return res.json()


def GetAllUsers():
    res = GetRequest("user.ratedList?activeOnly=false")
    if GetRequestStatusOk(res) == False:
        print("Couldn't download all users")
        quit()
    return GetRequestBody(res)


def GetActiveUsers():
    res = GetRequest("user.ratedList?activeOnly=true")
    if(GetRequestStatusOk(res) == False):
        print("Couldn't download active users")
        quit()
    return GetRequestBody(res)


def GetContests():
    res = GetRequest("contest.list?gym=false")
    if GetRequestStatusOk(res) == False:
        print("Couldn't download contest list")
        quit()
    res = GetRequestBody(res)
    return list(filter(lambda con: con["phase"] == "FINISHED" and con["type"] == "CF", res))


def GetAuthors(contestId):
    url = "http://codeforces.com/contests/" + str(contestId)
    res = requests.get(url)
    content = res.text
    soup = bs(content, "html.parser")
    return set(tag.text for tag in soup.findAll("a", {"class": re.compile("rated-user*")}))


def GetHistory(user):
    res = GetRequest("user.rating?handle=" + user)
    if GetRequestStatusOk(res) == False:
        return None
    return GetRequestBody(res)


def GetStandings(contestId):
    res = GetRequest("contest.ratingChanges?contestId=" + str(contestId))
    if GetRequestStatusOk(res) == False:
        return None
    return GetRequestBody(res)
    
    
def BlockAPICalls():
    BlockAPICalls.cnt += 1
    if BlockAPICalls.cnt >= MAX_API_REQUESTS:
        BlockAPICalls.now = time.time()
        diff =  BlockAPICalls.now - BlockAPICalls.lasttime
        if diff < API_REQUESTS_INTERVAL:
            time.sleep(diff)
        BlockAPICalls.cnt = 0
        BlockAPICalls.lasttime = BlockAPICalls.now

BlockAPICalls.cnt = 5
BlockAPICalls.lasttime = time.time()
BlockAPICalls.now = BlockAPICalls.lasttime


def FetchUsers():
    print("Fetching users ...")
    users = GetAllUsers()
    columns = ["handle", "country", "city", "organization",
               "contribution", "rating", "maxRating"]
    users_df = pd.DataFrame(users)[columns].set_index("handle")
    print("Fetched users")
    
    with open("users.pickle", "wb") as outfile:
        pickle.dump(users_df, outfile)
        
    return users_df.index

        
def FetchContests():
    print("Fetching contests ...")
    contests = GetContests()
    columns = ["id", "durationSeconds", "startTimeSeconds"]
    contests_df = pd.DataFrame(contests)[columns].set_index("id")
    contests_df.columns = ["duration", "startTime"]
    contests_df["dayTime"] = contests_df["startTime"] % (24 * 60 * 60)
    contests_df["authors"] = [GetAuthors(contestId) for contestId in contests_df.index]
    print("Fetched contests")
        
    with open("contests.pickle", "wb") as outfile:
        pickle.dump(contests_df, outfile)
        
    return contests_df.index
    
    
def FetchHistory(handle):
    history = GetHistory(handle)
    if history == None:
        print("PROBLEM WITH", handle)
        with open("error.json", "a") as outfile:
            json.dump(handle, outfile)
        return None
    if len(history) < MIN_CONTESTS:
        return None
    columns = ["rank", "oldRating", "newRating"]
    history_df = pd.DataFrame(history)[columns]
    history_df["delta"] = history_df.newRating - history_df.oldRating
    return history_df
    
    
def FetchAllHistory(handles):
    print("Fetching all history ...")
    all_history = {}
    left = len(handles)
    for handle in handles:
        history = FetchHistory(handle)
        left -= 1
        print("Contest histories left", left)
        if history is not None:
            all_history[handle] = history
    print("Fetched all history")
    
    with open("history.pickle", "wb") as outfile:
        pickle.dump(all_history, outfile)
    
    
def FetchStandings(contestId):
    standings = GetStandings(contestId)
    if standings is None:
        return None
    columns = ["handle", "rank", "oldRating", "newRating"]
    standings_df = pd.DataFrame(standings)[columns].set_index("handle")
    standings_df["delta"] = standings_df.newRating - standings_df.oldRating
    return standings_df


def FetchAllStandings(contestIds):
    print("Fetching all standings ...")
    all_standings = {}
    left = len(contestIds)
    for contestId in contestIds:
        standings = FetchStandings(contestId)
        left -= 1
        print("Standings left", left)
        if standings is not None:
            all_standings[contestId] = standings
    print("Fetched all standings")
    
    with open("standings.pickle", "wb") as outfile:
        pickle.dump(all_standings, outfile)
        
    
def FetchAll():
    handles = FetchUsers()
    contestIds = FetchContests()
    FetchAllContestHistory(handles)
    FetchAllStandings(contestIds)
    
    
class Database:
    def __init__(self, users, contests, history, standings, clean=True):
        self.users = users
        self.contests = contests
        self.history = history
        self.standings = standings
        if clean:
            self.clean()
    
    def clean(self):
        pass
    
    
def LoadDataBase(clean=True):
    users = contests = None
    history = standings = None
    with open("users.pickle", "rb") as infile:
        users = pickle.load(infile)
    with open("contests.pickle", "rb") as infile:
        contests = pickle.load(infile)
    with open("history.pickle", "rb") as infile:
        history = pickle.load(infile)
    with open("standings.pickle", "rb") as infile:
        standings = pickle.load(infile)
    return Database(users, contests, history, standings, clean=clean)

In [55]:
with open("users.pickle", "rb") as infile:
    users = pickle.load(infile)

In [56]:
users

Unnamed: 0_level_0,country,city,organization,contribution,rating,maxRating
handle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
tourist,Belarus,Gomel,ITMO University,154,3778,3783
Retired_MiFaFaOvO,Samoa,,National University of Samoa,116,3681,3681
Benq,United States,Princeton,MIT,109,3592,3633
ecnerwala,United States,Cupertino,MIT,148,3521,3668
Um_nik,,,Nizhny Novgorod SU,182,3423,3567
...,...,...,...,...,...,...
MisterMax,,,,0,-39,1405
jh05013,,,,14,-39,1708
errorerror,,,,0,-41,1376
fourist,,,,-1,-44,1366


In [65]:
GetStandings(1438)

[]

In [64]:
FetchStandings(1438)

KeyError: "None of [Index(['handle', 'rank', 'oldRating', 'newRating'], dtype='object')] are in the [columns]"

In [20]:
class UserContestRatingClass:
    def __init__(self, contest):
        self.contestId = contest["contestId"]
        self.rank = contest["rank"]
        self.oldRating = contest["oldRating"]
        self.newRating = contest["newRating"]

    def delta(self):
        return self.newRating - self.oldRating

    def __str__(self):
        return 'contestId: %d, rank: %d, oldRating: %d, newRating: %d' % (
                self.contestId, self.rank, self.oldRating, self.newRating)

    def __repr__(self):
        return str(self)

In [21]:
with open("user-contest-history-info.pickle", "rb") as infile:
    history = pickle.load(infile)

In [32]:
users = GetActiveUsers()

In [33]:
users = users[:10]

In [34]:
columns = ["handle", "country", "city", "organization",
               "contribution", "rating", "maxRating"]
users_df = pd.DataFrame(users)[columns].set_index("handle")

In [8]:
contests = GetContests()

In [9]:
contests = contests[:10]

In [10]:
columns = ["id", "durationSeconds", "startTimeSeconds"]
contests_df = pd.DataFrame(contests)[columns].set_index("id")
contests_df.columns = ["duration", "startTime"]
contests_df["dayTime"] = contests_df["startTime"] % (24 * 60 * 60)
contests_df["authors"] = [GetAuthors(contestId) for contestId in contests_df.index]

In [11]:
contests_df

Unnamed: 0_level_0,duration,startTime,dayTime,authors
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1467,7200,1610116500,52500,"{alimq, DS007}"
1470,7200,1609857300,52500,"{kokokostya, Kapt, Karabutsa, Allvik06, AliceG..."
1471,7200,1609857300,52500,"{kokokostya, Kapt, Karabutsa, Allvik06, AliceG..."
1466,10800,1609338900,52500,"{Okrut, Anadi, gawry}"
1411,7200,1608476700,54300,"{dargelirli, neckbotov}"
1464,7200,1608476700,54300,"{dargelirli, neckbotov}"
1465,7200,1608476700,54300,"{dargelirli, neckbotov}"
1458,7200,1608370500,34500,"{amethyst0, AndreySergunin, Endagorion}"
1459,7200,1608370500,34500,"{amethyst0, AndreySergunin, Endagorion}"
1410,172800,1608300300,50700,{}


In [33]:
users_df = users_df.iloc[:10]

In [35]:
history = FetchAllHistory(users_df.index)

In [43]:
with open("user-contest-history-info.pickle", "rb") as infile:
    history = pickle.load(infile)

In [28]:
def contestHistoryToDict(cntstHist):
        return {"contestId": cntstHist.contestId,
                "rank": cntstHist.rank,
                "oldRating": cntstHist.oldRating,
                "newRating": cntstHist.newRating,
                "delta": cntstHist.delta()}

In [49]:
allh = {}
for user, hist in history.items():
    allh[user] = pd.DataFrame([contestHistoryToDict(entry) for entry in hist])

In [54]:
with open("history.pickle", "wb") as outfile:
    pickle.dump(allh, outfile)

In [31]:
pd.DataFrame(hist)

Unnamed: 0,contestId,rank,oldRating,newRating,delta
0,2,14,0,1602,1602
1,8,5,1602,1764,162
2,10,18,1764,1878,114
3,13,11,1878,1967,89
4,19,2,1967,2063,96
...,...,...,...,...,...
181,1434,1,3509,3619,110
182,1441,1,3619,3687,68
183,1450,2,3687,3712,25
184,1458,1,3712,3783,71


In [36]:
all_history = {}
for handle in users_df.index:
    history = FetchHistory(handle)
    if history is not None:
        all_history[handle] = history

In [12]:
all_standings = {}
for contestId in contests_df.index:
    standings = FetchStandings(contestId)
    if standings is not None:
        all_standings[contestId] = standings

Unexpected status code: 400


In [4]:
res = requests.get("https://codeforces.com/api/" + "contest.ratingChanges?contestId=" + str(1410))

In [5]:
res.json()

{'status': 'FAILED', 'comment': 'contestId: Contest with id 1410 not found'}