# 라이브러리 불러오기

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

# 데이터 불러오기

In [2]:
hitter_2018 = pd.read_csv('data/2018_hitter.csv')
hitter_2019 = pd.read_csv('data/2019_hitter.csv')
hitter_2020 = pd.read_csv('data/2020_hitter.csv')
hitter_2021 = pd.read_csv('data/2021_hitter.csv')

hitter = pd.concat([hitter_2018, hitter_2019,
                    hitter_2020, hitter_2021])

hitter.shape

(1098, 16)

# 컬럼명 수정

In [3]:
hitter.columns = ['연도','선수코드','경기','타석',
                  '타수','타율','안타','홈런',
                  '루타','장타율','희생플라이','볼넷',
                  '삼진','고의사구','사구','병살타']

# 데이터 전처리

- OPS = 장타율 + 출루율
- 장타율 = 1루타 + (2*2루타) + (3*3루타) + (4*홈런) / 타수
- 출루율 = (안타 + 사사구) / (타수 + 사사구 + 희생플라이)
- 사사구 = 볼넷 + 사구

In [4]:
hitter['사사구'] = hitter['볼넷'] + hitter['사구']

hitter['출루율'] =( hitter['안타'] + hitter['사사구']) / (hitter['타수'] + hitter['사사구'] + hitter['희생플라이'])

hitter['OPS'] = hitter['장타율'] + hitter['출루율']

In [5]:
X = hitter[['선수코드','연도']]
y = hitter[['OPS','장타율','출루율']]

# 모델링

In [6]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()

model.fit(X, y)

model.score(X, y)

0.8455816280796488

# 예측하기

In [7]:
temp = []
temp = pd.DataFrame(temp)

temp['선수코드'] = [76232, 68050, 75847, 67341, 79192, 
                    78224, 78513, 76290, 79215, 67872]
temp['연도'] = [2017] * 10

In [8]:
pred = model.predict(temp)
pred

array([[0.98530067, 0.56977   , 0.41553067],
       [0.71454864, 0.41167   , 0.30287864],
       [0.90246416, 0.52678   , 0.37568416],
       [0.81760107, 0.44226   , 0.37534107],
       [0.84683929, 0.48928   , 0.35755929],
       [0.92161704, 0.5419    , 0.37971704],
       [0.83875963, 0.50682   , 0.33193963],
       [0.98969629, 0.58146   , 0.40823629],
       [0.84713204, 0.46292   , 0.38421204],
       [0.87870455, 0.50739   , 0.37131455]])

# 제출 파일 생성

In [9]:
test = pd.read_excel('data/test.xlsx')
submission = test.iloc[1:11,:]
PCODE = [76232, 68050, 75847, 67341, 79192, 
         78224, 78513, 76290, 79215, 67872]

submission['PCODE'] = PCODE

# submission : 제출 양식

In [10]:
submission['OPS'] = pred[:,0]
submission['장타율'] = pred[:,1]
submission['출루율'] = pred[:,2]

submission

Unnamed: 0,NO.,PCODE,OPS,장타율,출루율
1,1,76232,0.985301,0.56977,0.415531
2,2,68050,0.714549,0.41167,0.302879
3,3,75847,0.902464,0.52678,0.375684
4,4,67341,0.817601,0.44226,0.375341
5,5,79192,0.846839,0.48928,0.357559
6,6,78224,0.921617,0.5419,0.379717
7,7,78513,0.83876,0.50682,0.33194
8,8,76290,0.989696,0.58146,0.408236
9,9,79215,0.847132,0.46292,0.384212
10,10,67872,0.878705,0.50739,0.371315
