## Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import OLSInfluence

## Data Preprocessing

In [2]:
df_1 = pd.read_csv('../Data/train_1.csv')
df_2 = pd.read_csv('../Data/train_2.csv')
df_3 = pd.read_csv('../Data/train_3.csv')
df_test = pd.read_csv('../Data/test.csv')


In [3]:
# train data 병합

data = pd.concat([df_1,df_2,df_3])
data.columns = ["file_name",'name','height','weight','satba_time','play_time','rest_time','status','skill','move','game_num']
df_test.columns = ["file_name",'name','height','weight','satba_time','play_time','rest_time','status','skill','move','game_num']

Unnamed: 0,file_name,name,height,weight,satba_time,play_time,rest_time,status,skill,move,game_num
0,태백_12_1_1_0,손희찬,175,84,99,3,0,0,2,3559.90600,1
1,태백_12_1_1_0,이준호,172,82,99,3,0,1,2,3559.90600,1
2,태백_12_1_2_0,손희찬,175,84,174,11,60,1,7,7353.33000,2
3,태백_12_1_2_0,이준호,172,82,174,11,60,0,7,7353.33000,2
4,태백_12_1_3_0,손희찬,175,84,232,13,120,1,8,13551.63900,3
...,...,...,...,...,...,...,...,...,...,...,...
39,한라_2_2_3_0,박민교,182,105,448,98,2966,0,26,46936.19226,8
40,한라_2_2_4_0,최성환,183,107,548,112,3240,0,30,44853.84620,10
41,한라_2_2_4_0,박민교,182,105,499,107,3026,1,29,50993.41491,9
42,한라_2_2_5_0,최성환,183,107,591,119,3360,0,32,48026.34604,11


In [4]:
# round 변수 생성

data['round'] = data['file_name'].apply(lambda x: x.split('_')[1])
df_test['round'] = df_test['file_name'].apply(lambda x: x.split('_')[1])

Unnamed: 0,file_name,name,height,weight,satba_time,play_time,rest_time,status,skill,move,game_num,round
0,태백_12_1_1_0,손희찬,175,84,99,3,0,0,2,3559.906,1,12
1,태백_12_1_1_0,이준호,172,82,99,3,0,1,2,3559.906,1,12
2,태백_12_1_2_0,손희찬,175,84,174,11,60,1,7,7353.33,2,12
3,태백_12_1_2_0,이준호,172,82,174,11,60,0,7,7353.33,2,12
4,태백_12_1_3_0,손희찬,175,84,232,13,120,1,8,13551.639,3,12


In [5]:
# 기존 12강으로 되어있는 경기를 16강으로 변경

data['round'] = data['round'].apply(lambda x: '16' if x=='12' else x)

## Fit & Predict

In [6]:
# 독립 변수 : 키, 몸무게, 샅바 시간, 경기 시간, 휴식 시간, 기술 횟수, 이동 거리, round
# 종속 변수 : 누적 판수
# 다중 회귀 분석 적용

model = smf.ols('game_num ~ height+weight+satba_time+play_time+rest_time+skill+move+round', data = data).fit()
model.summary()

0,1,2,3
Dep. Variable:,game_num,R-squared:,0.962
Model:,OLS,Adj. R-squared:,0.959
Method:,Least Squares,F-statistic:,332.2
Date:,"Sun, 24 Sep 2023",Prob (F-statistic):,6.47e-88
Time:,15:27:34,Log-Likelihood:,-102.35
No. Observations:,142,AIC:,226.7
Df Residuals:,131,BIC:,259.2
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.8017,2.664,-0.301,0.764,-6.072,4.468
round[T.2],1.1460,0.397,2.889,0.005,0.361,1.931
round[T.4],0.7024,0.288,2.438,0.016,0.132,1.273
round[T.8],0.1304,0.186,0.701,0.484,-0.237,0.498
height,0.0104,0.017,0.622,0.535,-0.023,0.044
weight,-0.0101,0.005,-2.222,0.028,-0.019,-0.001
satba_time,0.0042,0.001,4.305,0.000,0.002,0.006
play_time,-0.0018,0.003,-0.631,0.529,-0.007,0.004
rest_time,-6.166e-05,0.000,-0.532,0.596,-0.000,0.000

0,1,2,3
Omnibus:,2.91,Durbin-Watson:,1.57
Prob(Omnibus):,0.233,Jarque-Bera (JB):,3.05
Skew:,-0.009,Prob(JB):,0.218
Kurtosis:,3.718,Cond. No.,1550000.0


In [19]:
df_test['predict']=model.predict(df_test.drop(["file_name","name","status","game_num"],axis=1))
df_test

Unnamed: 0,file_name,name,height,weight,satba_time,play_time,rest_time,status,skill,move,game_num,round,predict
0,금강_16_1_1_0,김철겸,178,93,95,24,0,1,2,5477.614,1,16,1.068032
1,금강_16_1_1_0,임경택,182,90,95,24,0,0,2,5477.614,1,16,1.140038
2,금강_16_1_2_0,김철겸,178,93,154,29,60,1,4,13080.72,2,16,2.088429
3,금강_16_1_2_0,임경택,182,90,154,29,60,0,4,13080.72,2,16,2.160436
4,금강_16_2_1_0,이민섭,188,94,121,20,0,1,4,5794.215,1,16,1.416662
5,금강_16_2_1_0,유영도,180,93,121,20,0,0,4,5794.215,1,16,1.343382
6,금강_16_2_2_0,이민섭,188,94,210,22,60,1,6,10464.132,2,16,2.30941
7,금강_16_2_2_0,유영도,180,93,210,22,60,0,6,10464.132,2,16,2.23613
8,금강_16_3_1_0,전도언,178,90,146,10,0,1,4,5471.641,1,16,1.447949
9,금강_16_3_1_0,최영원,180,90,146,10,0,0,4,5471.641,1,16,1.468795


## TO CSV

In [20]:
df_test.to_csv('../Output/금강_predict.csv', index=False, encoding='cp949')