## Import

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import OLSInfluence

## Data Preprocessing

In [3]:
df_1 = pd.read_csv('../Data/train_1.csv')
df_2 = pd.read_csv('../Data/train_2.csv')
df_test = pd.read_csv('../Data/train_3.csv')
df_3 = pd.read_csv('../Data/test.csv')


In [4]:
# train data 병합

data = pd.concat([df_1,df_2,df_3])
data.columns = ["file_name",'name','height','weight','satba_time','play_time','rest_time','status','skill','move','game_num']
df_test.columns = ["file_name",'name','height','weight','satba_time','play_time','rest_time','status','skill','move','game_num']

In [5]:
# round 변수 생성

data['round'] = data['file_name'].apply(lambda x: x.split('_')[1])
df_test['round'] = df_test['file_name'].apply(lambda x: x.split('_')[1])

In [6]:
# 기존 12강으로 되어있는 경기를 16강으로 변경

data['round'] = data['round'].apply(lambda x: '16' if x=='12' else x)

## Fit & Predict

In [7]:
# 독립 변수 : 키, 몸무게, 샅바 시간, 경기 시간, 휴식 시간, 기술 횟수, 이동 거리, round
# 종속 변수 : 누적 판수
# 다중 회귀 분석 적용

model = smf.ols('game_num ~ height+weight+satba_time+play_time+rest_time+skill+move+round', data = data).fit()
model.summary()

0,1,2,3
Dep. Variable:,game_num,R-squared:,0.961
Model:,OLS,Adj. R-squared:,0.959
Method:,Least Squares,F-statistic:,355.4
Date:,"Sun, 12 Nov 2023",Prob (F-statistic):,1.08e-95
Time:,20:47:04,Log-Likelihood:,-105.84
No. Observations:,154,AIC:,233.7
Df Residuals:,143,BIC:,267.1
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.6195,1.976,0.820,0.414,-2.286,5.525
round[T.2],-0.0555,0.289,-0.192,0.848,-0.626,0.515
round[T.4],-0.1737,0.215,-0.809,0.420,-0.598,0.251
round[T.8],-0.3239,0.140,-2.321,0.022,-0.600,-0.048
height,-0.0094,0.012,-0.763,0.447,-0.034,0.015
weight,0.0027,0.004,0.753,0.452,-0.004,0.010
satba_time,0.0011,0.001,2.229,0.027,0.000,0.002
play_time,-0.0043,0.002,-2.029,0.044,-0.008,-0.000
rest_time,0.0003,7.4e-05,3.580,0.000,0.000,0.000

0,1,2,3
Omnibus:,7.017,Durbin-Watson:,1.709
Prob(Omnibus):,0.03,Jarque-Bera (JB):,6.851
Skew:,0.421,Prob(JB):,0.0325
Kurtosis:,3.598,Cond. No.,1220000.0


In [8]:
df_test['predict']=model.predict(df_test.drop(["file_name","name","status","game_num"],axis=1))
df_test

Unnamed: 0,file_name,name,height,weight,satba_time,play_time,rest_time,status,skill,move,game_num,round,predict
0,한라_8_1_1_0,이국희,183,106,51,6,0,0,1,4798.979413,1,8,0.475826
1,한라_8_1_1_0,김무호,183,107,51,6,0,1,1,4798.979413,1,8,0.478499
2,한라_8_1_2_0,이국희,183,106,99,12,60,0,2,11549.26283,2,8,1.28795
3,한라_8_1_2_0,김무호,183,107,99,12,60,0,2,11549.26283,2,8,1.290622
4,한라_8_2_1_0,유정훈,190,105,62,3,0,0,1,3691.158295,1,8,0.325533
5,한라_8_2_1_0,최성환,183,107,62,3,0,1,1,3691.158295,1,8,0.396428
6,한라_8_2_2_0,유정훈,190,105,112,9,60,0,4,8455.673135,2,8,1.17344
7,한라_8_2_2_0,최성환,183,107,112,9,60,0,4,8455.673135,2,8,1.244335
8,한라_8_3_1_0,박동환,193,105,50,9,0,0,3,5164.752321,1,8,0.626998
9,한라_8_3_1_0,박민교,182,105,50,9,0,1,3,5164.752321,1,8,0.730005


## TO CSV

In [9]:
df_test.to_csv('../Output/한라_predict.csv', index=False, encoding='cp949')