# 5月研究発表

まずは、必要なデータを読み込み遷移確率を定義する必要がある。<br>
今回は、2021年の読売ジャイアンツの打順でシミュレーションを行う。<br>

## ライブラリとデータの読み込み

In [2]:
# ライブラリの読み込み
import numpy as np
import pandas as pd
import pprint
from tqdm import tqdm
from functools import lru_cache

# データの読み込み
pitching = pd.read_csv('player_data/pitching.csv')
batting = pd.read_csv('player_data/batting.csv')
batting.tail(20)

Unnamed: 0,Rk,Name,Age,G,PA,AB,R,H,2B,3B,...,GDP,HBP,SH,SF,IBB,League,Team,Year,PlayerID,Bats
31059,29,Shinnosuke Shigenobu*,26.0,106,174,158,25,42,7,2,...,1.0,0,2,3.0,1.0,Japan Central League,Yomiuri Giants,2019,shigen000shi,left
31060,30,Tomoyuki Sugano,29.0,22,53,46,2,4,1,0,...,0.0,0,7,0.0,0.0,Japan Central League,Yomiuri Giants,2019,sugano001tom,right
31061,31,Ginjiro Sumitani,31.0,58,138,126,17,33,4,0,...,4.0,2,2,1.0,1.0,Japan Central League,Yomiuri Giants,2019,sumita001gin,right
31062,32,Kazuto Taguchi*,23.0,55,3,2,0,0,0,0,...,0.0,0,1,0.0,0.0,Japan Central League,Yomiuri Giants,2019,taguch000kaz,left
31063,33,Kyosuke Takagi*,29.0,55,4,4,0,0,0,0,...,0.0,0,0,0.0,0.0,Japan Central League,Yomiuri Giants,2019,takagi000kyo,left
31064,34,Yuki Takahashi*,22.0,18,34,32,1,3,0,0,...,1.0,0,2,0.0,0.0,Japan Central League,Yomiuri Giants,2019,takaha000yuk,left
31065,35,Hosei Takata,20.0,2,1,1,0,0,0,0,...,0.0,0,0,0.0,0.0,Japan Central League,Yomiuri Giants,2019,takata000hos,right
31066,36,Shunta Tanaka*,25.0,62,176,156,17,35,7,0,...,2.0,0,4,2.0,0.0,Japan Central League,Yomiuri Giants,2019,tanaka002shu,left
31067,37,Soichiro Tateoka,29.0,25,16,14,4,4,2,0,...,0.0,0,0,0.0,0.0,Japan Central League,Yomiuri Giants,2019,tateok001soi,right
31068,38,Shosei Togo,19.0,2,3,3,0,0,0,0,...,0.0,0,0,0.0,0.0,Japan Central League,Yomiuri Giants,2019,togo--000sho,right


## 列名の確認

In [3]:
batting.columns

Index(['Rk', 'Name', 'Age', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR', 'RBI',
       'SB', 'CS', 'BB', 'SO', 'BA', 'OBP', 'SLG', 'OPS', 'TB', 'GDP', 'HBP',
       'SH', 'SF', 'IBB', 'League', 'Team', 'Year', 'PlayerID', 'Bats'],
      dtype='object')

In [4]:
pitching_mask = ['Name', 'W', 'L', 'ERA', 'G', 'BB', 'IBB', 'SO', 'HBP', 'WHIP','Team', 'Year']
batting_mask = ['Name','Age', 'G', 'PA', 'H', '2B', '3B', 'HR', 'RBI', 'SO', 'OPS', 'Team', 'Year']
pitching[pitching['Name'] == 'Takahiro Norimoto'][pitching_mask]

Unnamed: 0,Name,W,L,ERA,G,BB,IBB,SO,HBP,WHIP,Team,Year
14154,Takahiro Norimoto,15,8,3.34,27,51,1.0,134,6.0,1.135,Tohoku Rakuten Golden Eagles,2013
14476,Takahiro Norimoto,14,10,3.02,30,39,3.0,204,6.0,1.115,Tohoku Rakuten Golden Eagles,2014
14799,Takahiro Norimoto,10,11,2.91,28,48,0.0,215,4.0,1.151,Tohoku Rakuten Golden Eagles,2015
15129,Takahiro Norimoto,11,11,2.91,28,50,0.0,216,6.0,1.241,Tohoku Rakuten Golden Eagles,2016
15460,Takahiro Norimoto,15,7,2.57,25,48,1.0,222,3.0,1.056,Tohoku Rakuten Golden Eagles,2017
15804,Takahiro Norimoto,10,11,3.69,27,51,2.0,187,3.0,1.231,Tohoku Rakuten Golden Eagles,2018
16150,Takahiro Norimoto,5,5,2.78,12,10,0.0,67,3.0,1.0,Tohoku Rakuten Golden Eagles,2019


## データの前処理

読みこんだデータには、今回の分析に用いるために必要なデータである単打が含まれていないため計算して求める。<br>
また、いらない変数を削除し、選手名に含まれている不要な空白文字を削除する。

In [5]:
# 不要な列を削除
giants_2021.drop(["打数", "打率", "試合", "得点", "塁打", "打点", "盗塁",
                  "盗塁刺", "犠打", "犠飛", "併殺打",  "出塁率", "長打率"], axis=1, inplace=True)

# 不要な空白文字を削除
giants_2021["選手名"] = giants_2021["選手名"].apply(lambda x: ''.join(x.split()))


# 単打の列を追加
single = giants_2021['安打'] - (giants_2021['二塁打'] +
                              giants_2021['三塁打'] + giants_2021['本塁打'])
giants_2021.insert(3, "単打", single)

NameError: name 'giants_2021' is not defined

## 遷移確率のデータフレームを作成

前処理を行ったデータから、遷移確率を計算してデータフレームとして保存する

In [6]:
def rate_convert(e):
    rate = giants_2021[e] / giants_2021["打席"]
    return rate


single_rate = rate_convert("単打")
double_rate = rate_convert("二塁打")
triple_rate = rate_convert("三塁打")
HR_rate = rate_convert("本塁打")
BB_IBB_HBP_rate = (
    giants_2021["四球"] + giants_2021["敬遠"] + giants_2021["死球"]) / giants_2021["打席"]
out_rate = 1 - (single_rate + double_rate + triple_rate +
                HR_rate + BB_IBB_HBP_rate)

team_rate = pd.DataFrame({"選手名": giants_2021["選手名"], "単打": single_rate, "二塁打": double_rate,
                          "三塁打": triple_rate, "本塁打": HR_rate, "四死球": BB_IBB_HBP_rate, "アウト": out_rate})
# NPBの平均投手の推移確率を追加
pitcher = pd.Series(["投手", 0.076, 0.014, 0.001, 0.005, 0.0323,
                      1-sum([0.076, 0.014, 0.001, 0.005, 0.0323])], index=team_rate.columns.values)
team_rate = team_rate.append(pitcher, ignore_index=True)
team_rate.head(5)

NameError: name 'giants_2021' is not defined

## 遷移確率行列を定義

作成した遷移確率のデータフレームから、遷移確率を定義する<br>

### $$P= \begin{pmatrix}A&B&0&0\\0&A&B&0\\0&0&A&F\\0&0&0&1\end{pmatrix}$$

In [7]:
p_s, p_d, p_t, p_h, p_w = 0.2, 0.1, 0.001, 0.2, 0.3
p_out = 1-(p_s+p_d+p_t+p_h+p_w)


A = np.array([[p_h, p_s+p_w, p_d, p_t, 0, 0, 0, 0],
              [p_h, 0, 0, p_t, p_s+p_w, 0, p_d, 0],
              [p_h, p_s, p_d, p_t, p_w, 0, 0, 0],
              [p_h, p_s, p_d, p_t, 0, p_w, 0, 0],
              [p_h, 0, 0, p_t, p_s, 0, p_d, p_w],
              [p_h, 0, 0, p_t, p_s, 0, p_d, p_w],
              [p_h, p_s, p_d, p_t, 0, 0, 0, p_w],
              [p_h, 0, 0, p_t, p_s, 0, p_d, p_w]])
F = np.array([np.repeat(p_out, 8)]).T
B = F * np.eye(8)

# 遷移確率行列
P = np.block([[A, B, np.zeros((8, 8)), np.zeros((8, 1))],
             [np.zeros((8, 8)), A, B, np.zeros((8, 1))],
             [np.zeros((8, 8)), np.zeros((8, 8)), A, F],
             [np.zeros((1, 8)), np.zeros((1, 8)), np.zeros((1, 8)), 1]])


## 1イニングにおける得点確率分布の算出

### $$u_n = u_{n-1}P$$

In [8]:
# 初期値
u_0 = np.insert(np.repeat(0, 24), 0, 1)


def oneinning_sim(n):
    if n == 0:
        return u_0
    else:
        return np.dot(oneinning_sim(n-1), P)


# n人目の打者のプレーが終わったとき状態ｓに至っている確率を要素に持つベクトル
print(oneinning_sim(10))

[2.71470498e-02 2.03687602e-02 5.10066292e-03 1.35735249e-04
 2.64848338e-02 5.08371720e-05 8.47286200e-03 2.09631934e-02
 6.74439815e-02 5.06040361e-02 1.26720590e-02 3.37219908e-04
 6.57987756e-02 1.26299591e-04 2.10499318e-02 5.20808425e-02
 7.54008557e-02 5.65741752e-02 1.41670771e-02 3.77004279e-04
 7.35615525e-02 1.41200104e-04 2.35333507e-02 5.82252116e-02
 3.19182492e-01]


## 遷移確率行列を分解する

### $$P=P^{0} + P^{1} + P^{2} + P^{3} + P^{4} $$

In [13]:
A_0 = np.array([[0, p_s+p_w, p_d, p_t, 0, 0, 0, 0],
                [0, 0, 0, 0, p_s+p_w, 0, p_d, 0],
                [0, 0, 0, 0, p_w, 0, 0, 0],
                [0, 0, 0, 0, 0, p_w, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, p_w],
                [0, 0, 0, 0, 0, 0, 0, p_w],
                [0, 0, 0, 0, 0, 0, 0, p_w],
                [0, 0, 0, 0, 0, 0, 0, 0]])

A_1 = np.array([[p_h, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, p_t, 0, 0, 0, 0],
                [0, p_s, p_d, p_t, 0, 0, 0, 0],
                [0, p_s, p_d, p_t, 0, 0, 0, 0],
                [0, 0, 0, 0, p_s, 0, p_d, 0],
                [0, 0, 0, 0, p_s, 0, p_d, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, p_w]])

A_2 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, p_t, 0, 0, 0, 0],
                [0, 0, 0, p_t, 0, 0, 0, 0],
                [0, p_s, p_d, p_t, 0, 0, 0, 0],
                [0, 0, 0, 0, p_s, 0, p_d, 0]])

A_3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, p_t, 0, 0, 0, 0]])

A_4 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0],
                [p_h, 0, 0, 0, 0, 0, 0, 0]])

P_0 = np.block([[A_0, B, np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), A_0, B, np.zeros((8, 1))],
                [np.zeros((8, 8)), np.zeros((8, 8)), A_0, F],
                [np.zeros((1, 8)), np.zeros((1, 8)), np.zeros((1, 8)), 1]])

P_1 = np.block([[A_1, np.zeros((8, 8)), np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), A_1, np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), np.zeros((8, 8)), A_1, np.zeros((8, 1))],
                [np.zeros((1, 8)), np.zeros((1, 8)), np.zeros((1, 8)), 0]])

P_2 = np.block([[A_2, np.zeros((8, 8)), np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), A_2, np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), np.zeros((8, 8)), A_2, np.zeros((8, 1))],
                [np.zeros((1, 8)), np.zeros((1, 8)), np.zeros((1, 8)), 0]])

P_3 = np.block([[A_3, np.zeros((8, 8)), np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), A_3, np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), np.zeros((8, 8)), A_3, np.zeros((8, 1))],
                [np.zeros((1, 8)), np.zeros((1, 8)), np.zeros((1, 8)), 0]])

P_4 = np.block([[A_4, np.zeros((8, 8)), np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), A_4, np.zeros((8, 8)), np.zeros((8, 1))],
                [np.zeros((8, 8)), np.zeros((8, 8)), A_4, np.zeros((8, 1))],
                [np.zeros((1, 8)), np.zeros((1, 8)), np.zeros((1, 8)), 0]])


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 行列$U_{n\mid r}$ を再帰関数を使って求める

In [None]:
# 初期状態
R_max = 20

U_0 = np.zeros((R_max+1, 25))
U_0[0, 0] = 1

# 再帰関数
@lru_cache(maxsize=1000)
def inning_score_sim(n, r):
    if n == 0:
        return U_0[r]
    elif r == 0:
        return np.dot(inning_score_sim(n-1, r), P_0)
    elif r == 1:
        return np.dot(inning_score_sim(n-1, r), P_0) + np.dot(inning_score_sim(n-1, r-1), P_1)
    elif r == 2:
        return np.dot(inning_score_sim(n-1, r), P_0) + np.dot(inning_score_sim(n-1, r-1), P_1) + np.dot(inning_score_sim(n-1, r-2), P_2)
    elif r == 3:
        return np.dot(inning_score_sim(n-1, r), P_0) + np.dot(inning_score_sim(n-1, r-1), P_1) + np.dot(inning_score_sim(n-1, r-2), P_2) + np.dot(inning_score_sim(n-1, r-3), P_3)
    elif r >= 4:
        return np.dot(inning_score_sim(n-1, r), P_0) + np.dot(inning_score_sim(n-1, r-1), P_1) + np.dot(inning_score_sim(n-1, r-2), P_2) + np.dot(inning_score_sim(n-1, r-3), P_3) + np.dot(inning_score_sim(n-1, r-4), P_4)


# nを∞に発散
n = 100
U_n = []
for r in range(R_max+1):
    U_n.append(inning_score_sim(n, r))

df_U = pd.DataFrame(U_n, columns=list(range(1,26)))
df_U

In [None]:
from matplotlib import pyplot as plt
plt.rcParams['font.family'] = "MS Gothic"

fig, ax = plt.subplots(figsize=(15,8))
ax.bar(df_U.index, df_U[25].values, width=1)
ax.set(title='1イニングにおける得点確率分布', xlabel='得点', ylabel='確率')
plt.show()