# API call to hack attempts, insert into SQL database

Hack attempts during contest (where one user attempts to break another user's code) are not recorded in the submissions folder, and must be obtained through a separate API call.

Hack attempts are important to figure out which users are purposely "tanking" their rating. These users typically have double-digit unsuccessful hacking attempts in-contest (which leads to massive penalties).

Note this information was ultimately not used in the final model due to time constraints and the small number of users who purposefully lower their own rating.

In [None]:
import pandas as pd

from sqlalchemy import *
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd

dbname = 'codeforces'
username = 'Joy'

engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
print engine.url

## create a database (if it doesn't exist)
if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))

## Contest information
get contests, only need to run once

In [None]:
# create table with schema
metadata = MetaData()
#index	creationTimeSeconds	id	verdict	ghost	defender	hacker	contestID	problemID	problemName
contests = Table('contests', metadata,
    Column('id', Integer, primary_key=True),
    Column('durationSeconds', Integer, nullable=False),
    Column('relativeTimeSeconds', Integer),
    Column('startTimeSeconds', Integer),
    Column('frozen', Boolean),
    Column('name', String),
    Column('type', String),
    Column('phase', String)
)
contests.drop(engine, checkfirst=True)
contests.create(engine)

url = 'http://codeforces.com/api/contest.list?gym=false'
r = requests.get(url).json()['result']
df_contests = pd.DataFrame.from_dict(r)
#df_contests.to_csv('contests.tsv', sep='\t', index=False, header=True, encoding='utf')
df_contests.to_sql('contests', engine, if_exists='replace')

## Get information on hacks

In [None]:
import requests
from time import sleep
def getContestHacks(contest):
    url = 'http://codeforces.com/api/contest.hacks?contestId=' + str(contest)
    print url
    maxtries = 5
    tries = 0
    while tries < maxtries:
        print "attempt", tries
        try:
            r = requests.get(url).json()
            if r['status'] == 'FAILED':
                print r['comment']
                return
            r = r['result']
            if len(r) > 0:
                return pd.DataFrame.from_dict(r)
            else:
                return
        except:
            print "error, attempt", tries
            tries += 1
            sleep(5)
    print "ERROR GETTING HACK INFO FOR CONTEST", contest

### Create table schema for hacks

In [None]:
# create table with schema
metadata = MetaData()
#index	creationTimeSeconds	id	verdict	ghost	defender	hacker	contestID	problemID	problemName
hacks = Table('hacks', metadata,
    Column('id', Integer, primary_key=True),
    Column('creationTimeSeconds', Integer, nullable=False),
    Column('verdict', String),
    Column('ghost', Boolean),
    Column('defender', String),
    Column('hacker', String),
    Column('contestID', String),
    Column('problemID', String),
    Column('problemName', String)
)
hacks.drop(engine, checkfirst=True)
hacks.create(engine)

### request information on hacks

In [None]:
contest_ids = df_contests.id
last_idx = 443

for i, cid in enumerate(contest_ids[last_idx:]):
    print last_idx + i, cid
    df_hack = getContestHacks(cid)
    
    if df_hack is None:
        continue
    
    # format resulting dataframe
    ghost = df_hack.defender.apply(lambda x: x['ghost'])
    defender = df_hack.defender.apply(lambda x: x['members'][0]['handle'])
    hacker = df_hack.hacker.apply(lambda x: x['members'][0]['handle'])
    contestID = df_hack.problem.apply(lambda x: x['contestId'])
    problemID = df_hack.problem.apply(lambda x: x['index'])
    problemName = df_hack.problem.apply(lambda x: x['name'])
    
    df_hack.drop(['defender', 'hacker', 'judgeProtocol', 'problem', 'test'], inplace=True, axis=1)
    df_hack['ghost'] = ghost
    df_hack['defender'] = defender
    df_hack['hacker'] = hacker
    df_hack['contestID'] = contestID
    df_hack['problemID'] = problemID
    df_hack['problemName'] = problemName
    
    print "writing to sql..."
    df_hack.to_sql('hacks', engine, if_exists='append', index=False)

## Test connection

In [None]:
# connect:
con = psycopg2.connect(database = dbname, user = username)
cur = con.cursor()

###  show all tables

In [None]:
cur.execute("""
SELECT
    table_schema || '.' || table_name
FROM
    information_schema.tables
WHERE
    table_type = 'BASE TABLE'
AND
    table_schema NOT IN ('pg_catalog', 'information_schema');
    """)
rows = cur.fetchall()
for r in rows:
    print r

In [None]:
# query:
sql_query = """
SELECT * FROM hacks WHERE verdict='HACK_SUCCESSFUL';
"""
df_hack = pd.read_sql_query(sql_query,con)

In [None]:
con.commit()
con.close()