In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
import pickle

np.set_printoptions(precision=3, suppress=True)

# 予測実行クラス

In [2]:
class Ranking:

    def __init__(self, models=None):
        """
        コンストラクタ
        """
        self.models = models
        self.coeff = 1000

    def train(self):
        df = pd.read_csv('../data/2019.csv')

        # 前処理
        df['0'] = (df.east - df.south) / self.coeff
        df['1'] = (df.east - df.west) / self.coeff
        df['2'] = (df.east - df.north) / self.coeff
        df = df[['game', '0', '1', '2', 'game_result']]

        # 学習
        models = []

        for i in range(8):
            models.append(LogisticRegression(tol=1e-6,random_state=123, verbose=0))
            model = models[-1]

            x = df[df.game == i].iloc[:, 1:4]
            y = df[df.game == i].iloc[:, 4]

            x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.1, random_state=123)
            
            model.fit(x_train, y_train)

            print('{} Accuracy (train) :'.format(i), model.score(x_train, y_train))
            print('{} Accuracy (test)  :'.format(i), model.score(x_test, y_test))

        with open('score_predictor.bin', mode='wb') as f:
            pickle.dump(models, f)
            
        self.models = models
        print('学習完了')
    
    def predict(self, game_num, score):
        """
        予測の実行
        """
        # 入力値の作成
        feature = np.array([[(score[0]-score[1])/self.coeff, (score[0]-score[2])/self.coeff, (score[0]-score[3])/self.coeff]])

        # print('feature', feature)

        # 予測
        proba = self.models[game_num].predict_proba(feature)[0]

        print('proba', proba)

        # 各プレイヤーごとに1〜4位ごとの確率算出
        return np.array([ \
                    [ \
                    proba[0]+proba[1]+proba[2]+proba[3]+proba[4]+proba[5], \
                    proba[6]+proba[7]+proba[12]+proba[13]+proba[18]+proba[19], \
                    proba[8]+proba[10]+proba[14]+proba[16]+proba[20]+proba[22], \
                    proba[9]+proba[11]+proba[15]+proba[17]+proba[21]+proba[23], \
                    ], \
                    [ \
                    proba[6]+proba[7]+proba[8]+proba[9]+proba[10]+proba[11], \
                    proba[0]+proba[1]+proba[14]+proba[15]+proba[20]+proba[21], \
                    proba[2]+proba[4]+proba[12]+proba[17]+proba[18]+proba[23], \
                    proba[3]+proba[5]+proba[13]+proba[16]+proba[19]+proba[22], \
                    ], \
                    [ \
                    proba[12]+proba[13]+proba[14]+proba[15]+proba[16]+proba[17], \
                    proba[2]+proba[3]+proba[8]+proba[9]+proba[22]+proba[23], \
                    proba[0]+proba[5]+proba[6]+proba[11]+proba[19]+proba[21], \
                    proba[1]+proba[4]+proba[7]+proba[10]+proba[18]+proba[20], \
                    ], \
                    [ \
                    proba[18]+proba[19]+proba[20]+proba[21]+proba[22]+proba[23], \
                    proba[4]+proba[5]+proba[10]+proba[11]+proba[16]+proba[17], \
                    proba[1]+proba[3]+proba[7]+proba[9]+proba[13]+proba[15], \
                    proba[0]+proba[2]+proba[6]+proba[8]+proba[12]+proba[14], \
                    ] \
                ])

# テスト実行

In [3]:
r = Ranking()
r.train()

f = np.array([300, 250, 250, 200])
result = r.predict(4, f)
print(result)

f = np.array([350, 300, 200, 150])
result = r.predict(7, f)
print(result)

f = np.array([450, 300, 200, 50])
result = r.predict(7, f)
print(result)

f = np.array([359, 5, 535, 101])
result = r.predict(5, f)
print(result)

0 Accuracy (train) : 0.0582199179088356
0 Accuracy (test)  : 0.0618823910578325
1 Accuracy (train) : 0.08539055230171329
1 Accuracy (test)  : 0.08423645320197044
2 Accuracy (train) : 0.10980619141864895
2 Accuracy (test)  : 0.10485632666778692
3 Accuracy (train) : 0.12939001848428835
3 Accuracy (test)  : 0.1303378494254845
4 Accuracy (train) : 0.1477042770977819
4 Accuracy (test)  : 0.15134370579915135
5 Accuracy (train) : 0.16496723267938276
5 Accuracy (test)  : 0.16732617297908423
6 Accuracy (train) : 0.18303057155516173
6 Accuracy (test)  : 0.18779904306220097
7 Accuracy (train) : 0.1749478192513079
7 Accuracy (test)  : 0.16878048780487806
学習完了
proba [0.069 0.048 0.067 0.044 0.042 0.044 0.065 0.046 0.054 0.026 0.033 0.025
 0.063 0.043 0.05  0.027 0.033 0.024 0.044 0.044 0.03  0.025 0.032 0.023]
[[0.313 0.305 0.232 0.15 ]
 [0.248 0.249 0.264 0.239]
 [0.24  0.245 0.272 0.243]
 [0.199 0.201 0.233 0.368]]
proba [0.103 0.071 0.069 0.03  0.047 0.027 0.102 0.076 0.059 0.015 0.034 0.016
 0.

In [4]:
df = pd.read_csv('../data/201901.csv')
df['3'] = (df.east - df.east) / 100
df['0'] = (df.east - df.south) / 100
df['1'] = (df.east - df.west) / 100
df['2'] = (df.east - df.north) / 100
df = df[['game', '0', '1', '2', '3', 'game_result']]
df

Unnamed: 0,game,0,1,2,3,game_result
0,0,0.00,0.00,0.00,0.0,14
1,0,0.58,0.58,1.16,0.0,14
2,1,0.58,-0.22,1.96,0.0,14
3,1,0.38,-0.22,1.66,0.0,14
4,2,1.72,0.92,2.80,0.0,14
...,...,...,...,...,...,...
151520,2,-0.75,1.25,1.00,0.0,6
151521,3,-2.11,2.51,1.00,0.0,6
151522,4,-1.24,3.38,2.74,0.0,6
151523,5,-1.24,2.58,3.54,0.0,6


In [5]:
df = pd.read_csv('../data/201901.csv')
# df.describe()
df

Unnamed: 0.1,Unnamed: 0,east,game,north,south,west,east_end,south_end,west_end,north_end,game_result
0,0,250,0,250,250,250,264,268,363,105,14
1,1,308,0,192,250,250,264,268,363,105,14
2,2,308,1,112,250,330,264,268,363,105,14
3,3,293,1,127,255,315,264,268,363,105,14
4,4,386,2,106,214,294,264,268,363,105,14
...,...,...,...,...,...,...,...,...,...,...,...
151520,151520,285,2,185,360,160,359,577,101,-37,6
151521,151521,285,3,185,496,34,359,577,101,-37,6
151522,151522,372,4,98,496,34,359,577,101,-37,6
151523,151523,372,5,18,496,114,359,577,101,-37,6
