In [46]:
import pandas as pd
import numpy as np
from pathlib import Path
from collections import defaultdict, deque

In [26]:
"""
tid = tournament id 
pid = player id
[beers = number of beers drunken] => unnecessary, due to players are only on the list if they at least drink one bear at a tournament

players = defaultdict(float)

players["10"] = .32
players["15"] = 6.32
players["14"] = 9.32
players["13"] = 6.322
players["11"] = 79.32

print(players)

https://thispointer.com/python-pandas-how-to-create-dataframe-from-dictionary/ => use players.items()
pd.DataFrame(players.items(),columns=["pid","ema"]).sort_values("ema",ascending=False)[0:2]
"""

df = pd.read_csv("tally.csv",usecols=['pid','tid'])
df = df.sort_values('tid')

In [62]:
class Classifier:
    def __init__(self, N, alpha, window_width=None):
        self.N = N
        self.alpha = alpha
        self.window_width = window_width
    
    def fit(self,df):
        #mapping from playerid => ema value
        players = defaultdict(float)
        
        grouped = df.sort_values("tid").groupby('tid')
        if self.window_width is not None:
            # prune away old values
            grouped = deque(grouped, maxlen=self.window_width)
        
        # iterate over every tournament
        for name, group in grouped:
            active_players = set(group.pid)
                
            # inc/dec value to every player (regardless of his beer count)
            for pid in active_players | set(players.keys()):
                
                # update ema
                players[int(pid)] = self.alpha * int(pid in active_players) + (1-self.alpha) * players[int(pid)]
                

        self.fitted = pd.DataFrame(players.items(), columns=["pid","ema"])
        
    def predict(self):
        # return last N player with highest ema score
        return self.fitted.sort_values("ema",ascending=False)[:self.N]
        
    def score(self,x):
        return len(set(x)-set(self.predict().pid))

In [69]:
clf = Classifier(20, 0.11,300)
clf.fit(df[df.tid<300])
set(df[df.tid == 300].pid) - set(clf.predict().pid)

{3, 23, 28, 33, 35, 42}

In [70]:
%%time

def evaluate(alpha=.16, N=20):    
    scores = []

    for tid in sorted(set(df.tid)):
        clf = Classifier(N, alpha,200)
        clf.fit(df[df.tid<tid])
        scores.append(clf.score(df[df.tid == tid].pid))

    return pd.Series(scores).mean()

evaluate()

CPU times: user 2.28 s, sys: 3.38 ms, total: 2.28 s
Wall time: 2.28 s


3.1467391304347827

In [71]:
from smac.scenario.scenario import Scenario
from smac.facade.smac_hpo_facade import SMAC4HPO
from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter


cs = ConfigurationSpace()
cs.add_hyperparameter(UniformFloatHyperparameter("alpha", 0, 1, default_value=.11))

scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 100,   # max. number of function evaluations; for this example set to a low number
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

smac = SMAC4HPO(scenario=scenario,
                rng=np.random.RandomState(42),
                tae_runner=lambda x: evaluate(x['alpha'],26))

smac.optimize()

INFO:smac.utils.io.cmd_reader.CMDReader:Output to smac3-output_2020-02-26_14:08:40_445173
INFO:smac.facade.smac_hpo_facade.SMAC4HPO:Optimizing a deterministic scenario for quality without a tuner timeout - will make SMAC deterministic and only evaluate one configuration per iteration!
INFO:smac.initial_design.sobol_design.SobolDesign:Running initial design for 10 configurations
INFO:smac.facade.smac_hpo_facade.SMAC4HPO:<class 'smac.facade.smac_hpo_facade.SMAC4HPO'>
INFO:smac.intensification.intensification.Intensifier:Challenger (1.8859) is better than incumbent (2.0380) on 1 runs.
INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
INFO:smac.intensification.intensification.Intensifier:  alpha : 0.5 -> 0.25
INFO:smac.intensification.intensification.Intensifier:Challenger (1.8750) is better than incumbent (1.8859) on 1 runs.
INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
INFO:smac.intensification.intensification.Intensifier:  alpha : 

INFO:smac.intensification.intensification.Intensifier:Challenger (1.8424) is better than incumbent (1.8478) on 1 runs.
INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
INFO:smac.intensification.intensification.Intensifier:  alpha : 0.1089730148043242 -> 0.11047497695401691
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 1.8424
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 1.8424
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 1.8424
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 1.8424
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 1.8424
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 1.8424
INFO:smac.intensification.intensification.Intensifi

Configuration:
  alpha, Value: 0.11047497695401691