In [39]:
import os
import random

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold , StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from category_encoders.target_encoder import TargetEncoder


df = pd.read_csv('K-League-data.csv').drop(columns='Unnamed: 0')
print(df.shape)
print(df)

(1332, 52)
      Rnd.        대회   H/A  득점  도움  슈팅  유효 슈팅  블락된슈팅  벗어난슈팅  PA내 슈팅  ...  경고  \
0        1  대구vs수원FC  HOME   1   1  13      4      2      7       5  ...   1   
1        1    대구vs서울  HOME   0   0  10      1      3      6       7  ...   4   
2        1    제주vs포항  HOME   0   0   9      1      3      5       5  ...   2   
3        1    서울vs인천  HOME   2   1  13      7      2      4       7  ...   3   
4        1    포항vs대구  AWAY   2   1   9      4      3      2       6  ...   3   
...    ...       ...   ...  ..  ..  ..    ...    ...    ...     ...  ...  ..   
1327    38    포항vs강원  AWAY   0   0  11      0      2      9       6  ...   3   
1328    38  수원FCvs서울  HOME   0   0  10      0      5      5       5  ...   0   
1329    38    울산vs대구  AWAY   0   0   9      4      1      4       3  ...   3   
1330    38  수원FCvs수원  HOME   2   1  12      3      4      5       5  ...   2   
1331    38    울산vs대구  HOME   2   2  19      6      9      4      10  ...   1   

      퇴장       기준    시즌  경기결

In [40]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

seed_everything(42)

In [41]:
X = df.drop(columns=['경기결과', '기준'])
X = pd.get_dummies(X, columns=['대회', 'H/A', '시즌'], drop_first=True)
# categorical_features = list(X.dtypes[X.dtypes == "object"].index)

# for i in categorical_features:
#     te = TargetEncoder(cols=[i])
#     train_x[i] = te.fit_transform(train_x[i], train_y)
#     test_x[i] = te.transform(test_x[i])
#     X[i] = te.fit_transform()
y = df['경기결과']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(1198, 225) (134, 225)
(1198,) (134,)


In [75]:
# 로지스틱 회귀

model = LogisticRegression(random_state=42, solver='newton-cg')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))


Accuracy: 0.6194029850746269
              precision    recall  f1-score   support

           D       0.50      0.51      0.51        41
           L       0.70      0.71      0.70        52
           W       0.64      0.61      0.62        41

    accuracy                           0.62       134
   macro avg       0.61      0.61      0.61       134
weighted avg       0.62      0.62      0.62       134





In [76]:
# 다층신경망

from sklearn.neural_network import MLPClassifier

model = MLPClassifier(random_state=42, max_iter=500)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))


Accuracy: 0.6194029850746269
              precision    recall  f1-score   support

           D       0.52      0.56      0.54        41
           L       0.65      0.75      0.70        52
           W       0.70      0.51      0.59        41

    accuracy                           0.62       134
   macro avg       0.62      0.61      0.61       134
weighted avg       0.63      0.62      0.62       134



In [53]:
# LDA

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

model = LinearDiscriminantAnalysis()
model.fit(X_train, y_train)

# 모델 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

Accuracy: 0.6119402985074627
              precision    recall  f1-score   support

           D       0.54      0.54      0.54        41
           L       0.64      0.69      0.67        52
           W       0.65      0.59      0.62        41

    accuracy                           0.61       134
   macro avg       0.61      0.60      0.61       134
weighted avg       0.61      0.61      0.61       134



In [54]:
# QDA

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

model = QuadraticDiscriminantAnalysis()
model.fit(X_train, y_train)

# 모델 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

Accuracy: 0.3582089552238806
              precision    recall  f1-score   support

           D       0.38      0.80      0.52        41
           L       0.00      0.00      0.00        52
           W       0.38      0.37      0.37        41

    accuracy                           0.36       134
   macro avg       0.25      0.39      0.30       134
weighted avg       0.23      0.36      0.27       134





In [55]:
# SVM

from sklearn import svm

model = svm.SVC(random_state=42, kernel='linear')
model.fit(X_train, y_train)

# 모델 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

Accuracy: 0.582089552238806
              precision    recall  f1-score   support

           D       0.45      0.49      0.47        41
           L       0.67      0.62      0.64        52
           W       0.62      0.63      0.63        41

    accuracy                           0.58       134
   macro avg       0.58      0.58      0.58       134
weighted avg       0.59      0.58      0.58       134



In [62]:
# 그래디언트부스팅

from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier(random_state=42, max_depth=3, n_estimators=500, )
model.fit(X_train, y_train)

# 모델 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

Accuracy: 0.5149253731343284
              precision    recall  f1-score   support

           D       0.38      0.32      0.35        41
           L       0.61      0.60      0.60        52
           W       0.51      0.61      0.56        41

    accuracy                           0.51       134
   macro avg       0.50      0.51      0.50       134
weighted avg       0.51      0.51      0.51       134



In [61]:
# 에이다 부스트

from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier(random_state=42,)
model.fit(X_train, y_train)

# 모델 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

Accuracy: 0.5373134328358209
              precision    recall  f1-score   support

           D       0.40      0.41      0.40        41
           L       0.66      0.56      0.60        52
           W       0.55      0.63      0.59        41

    accuracy                           0.54       134
   macro avg       0.54      0.54      0.53       134
weighted avg       0.55      0.54      0.54       134



In [72]:
# lightgbm

import lightgbm as lgb

lgb_model = lgb.LGBMClassifier(random_state=42, max_depth=3, n_estimators=500)
lgb_model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3646
[LightGBM] [Info] Number of data points in the train set: 1198, number of used features: 50
[LightGBM] [Info] Start training from score -1.221941
[LightGBM] [Info] Start training from score -1.055323
[LightGBM] [Info] Start training from score -1.029286
Accuracy: 0.5149253731343284
              precision    recall  f1-score   support

           D       0.38      0.32      0.35        41
           L       0.61      0.60      0.60        52
           W       0.51      0.61      0.56        41

    accuracy                           0.51       134
   macro avg       0.50      0.51      0.50       134
weighted avg       0.51      0.51      0.51       134



In [73]:
# randomforest

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 모델 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

Accuracy: 0.5597014925373134
              precision    recall  f1-score   support

           D       0.46      0.29      0.36        41
           L       0.58      0.67      0.63        52
           W       0.58      0.68      0.63        41

    accuracy                           0.56       134
   macro avg       0.54      0.55      0.54       134
weighted avg       0.55      0.56      0.54       134

