# ランキングデータ作成プログラム
```
| ヘッダ名称 | 説明                                 | 
| ---------- | ------------------------------------ | 
| id         | 対戦カードID                         | 
| year       | 開催年度                             | 
| stage      | 開催大会                             | 
| match_num  | 節数                                 | 
| team       | チーム名                             | 
| point      | 今節勝ち点                           | 
| goal       | 今節得失点                           | 
| score      | 今節得点                             | 
| point_last | 前節までの勝ち点                     | 
| goal_last  | 前節までの得失点                     | 
| score_last | 前節までの得点                       |
| win_last   | 前節までの得点                       | 
| rank_last  | 前節までの結果を反映させたランキング | 
```
id, team が紐づけkey

In [1]:
import pandas as pd
import numpy as np

In [2]:
# データ読込
train = pd.read_csv(filepath_or_buffer='../../data/JLeague/train.csv')
train_add = pd.read_csv(filepath_or_buffer='../../data/JLeague/train_add.csv')
condition = pd.read_csv(filepath_or_buffer='../../data/JLeague/condition.csv')
condition_add = pd.read_csv(filepath_or_buffer='../../data/JLeague/condition_add.csv')
test = pd.read_csv(filepath_or_buffer='../../data/JLeague/test.csv')
test_2014_add = pd.read_csv(filepath_or_buffer='../../data/JLeague/2014_add.csv')

### データ結合

In [3]:
#condtion_all
condition_all = pd.concat([condition, condition_add])

#train_all
train_all = pd.concat([train, train_add])
train_all = train_all.merge(condition_all, on='id')

#test_all
test_all = pd.concat([test,test_2014_add])
test_all = test_all.merge(condition_all, on='id')

### 前節までの順位データ作成

In [4]:
han = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
zen = ['０', '１', '２', '３', '４', '５', '６', '７', '８', '９']
# 節数抽出関数
def get_matchnum_list(x):
    """
    x : 開催節（match）
    """
    tmp = x.str[1:3].str.replace('節', '')
    
    for i in range(10):
        tmp = tmp.str.replace(zen[i], han[i])
    
    return tmp.astype(int)

#勝ち点計算関数
def win_point(x, y):
    """
    x, y それぞれともに整数
    x点入れたチームの勝ち点を計算する
    """
    tmp = np.zeros(len(x), dtype=np.int)
    for i in range(len(x)):
        if x.iloc[i] > y.iloc[i]:
            tmp[i] = 3
        elif x.iloc[i] == y.iloc[i]:
            tmp[i] = 1
    return tmp



### 各試合の勝ち点、得失点データの作成（2012-2014の全試合）

In [5]:
score_table = pd.concat([train_all.drop('y', axis=1), test_all])
score_table = score_table[['id','year', 'stage','home', 'away', 
                           'match', 'home_score', 'away_score']]
#節数
score_table['match_num'] = get_matchnum_list(score_table.match)
#勝ち点
score_table['home_point'] = win_point(score_table.home_score, score_table.away_score)
score_table['away_point'] = win_point(score_table.away_score, score_table.home_score)
#得失点差
score_table['home_goal'] = score_table.home_point - score_table.away_point
score_table['away_goal'] = score_table.away_point - score_table.home_point
#勝利数
score_table['home_win'] = np.zeros(len(score_table), dtype=np.int)
score_table.loc[score_table['home_score'] > score_table['away_score'], 'home_win'] = 1
score_table['away_win'] = np.zeros(len(score_table), dtype=np.int)
score_table.loc[score_table['home_score'] > score_table['away_score'], 'away_win'] = 1


score_table = score_table.drop('match', axis=1)

### 試合、チームごとのスコア

In [7]:
home_ranking_data = score_table[['id','year', 'stage', 'home', 'match_num',
                            'home_score', 'home_point', 'home_goal', 'home_win']]\
                    .rename(columns={'home': 'team',
                                     'home_point': 'point',
                                     'home_score': 'score',
                                     'home_goal':'goal',
                                     'home_win':'win'
                                    })
away_ranking_data = score_table[['id','year', 'stage', 'away', 'match_num',
                            'away_score', 'away_point', 'away_goal', 'away_win']]\
                    .rename(columns={'away': 'team',
                                     'away_point': 'point',
                                     'away_score': 'score',
                                     'away_goal': 'goal',
                                     'away_win': 'win'
                                    })
ranking_data = pd.concat([home_ranking_data, away_ranking_data])\
    .sort_values(['team', 'year', 'match_num']).reset_index(drop=True)

#累計の勝ち点を算出
ranking_data['point_last'] = ranking_data.groupby(['team', 'year']).cumsum().shift().point
ranking_data.loc[ranking_data['match_num'] == 1, 'point_last'] = 0
ranking_data['point_last'] = ranking_data.point_last.astype(int)
#累計の得失点を算出
ranking_data['goal_last'] = ranking_data.groupby(['team', 'year']).cumsum().shift().goal
ranking_data.loc[ranking_data['match_num'] == 1, 'goal_last'] = 0
ranking_data['goal_last'] = ranking_data.goal_last.astype(int)
#累計の得点を算出
ranking_data['score_last'] = ranking_data.groupby(['team', 'year']).cumsum().shift().score
ranking_data.loc[ranking_data['match_num'] == 1, 'score_last'] = 0
ranking_data['score_last'] = ranking_data.score_last.astype(int)
#累計の勝利数を算出
ranking_data['win_last'] = ranking_data.groupby(['team', 'year']).cumsum().shift().win
ranking_data.loc[ranking_data['match_num'] == 1, 'win_last'] = 0
ranking_data['win_last'] = ranking_data.win_last.astype(int)
#各節ごとの順位を算出
ranking_data['rank_tmp'] = (ranking_data['point_last']*1000000
                            + ranking_data['goal_last']*10000
                            + ranking_data['win_last']*100
                            + ranking_data['score_last'])
ranking_data['rank_last'] = ranking_data.groupby(['year', 'stage', 'match_num']).rank(method='min').rank_tmp.astype(int)
ranking_data.loc[ranking_data['match_num'] == 1, 'rank_last'] = 0
ranking_data = ranking_data.drop('rank_tmp', axis=1)


#ステージ（J1/J2）、開催年度、開催節、ランキングでソート
ranking_data = ranking_data.sort_values(['stage', 'year', 'match_num', 'rank_last']).reset_index(drop=True)


# CSVに保存
ranking_data.to_csv('new_data/ranking_data.csv', header=True, index=False)