In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [2]:
np.random.seed(5)

In [3]:
train = pd.read_csv("basketball_train.csv")
test = pd.read_csv("basketball_test.csv")

### SVM

* `C(cost)`: 결정경계선의 마진을 결정하는 파라미터
    * `C`가 클수록 결정경계선과 서포트벡터의 간격(마진이) 좁아진다.
* `gamma`: 데이터포인트의 영향력을 결정하는 파라미터
    * `gamma`가 클수록 결정경계선과 데이터포인트의 거리가 가까워진다. (Overfitting; 구부러진다)

* 종속변수(Dependent Variable): Pos
* 독립변수(Independent Variable): 3P, TRB, BLK

In [4]:
train.head()

Unnamed: 0,Player,Pos,3P,TRB,BLK
0,Wesley Matthews,SG,2.4,3.5,0.2
1,Allen Crabbe,SG,1.7,2.9,0.3
2,Andrew Bogut,C,0.0,8.1,0.9
3,Richaun Holmes,C,0.5,5.5,1.0
4,Hassan Whiteside,C,0.0,14.1,2.1


In [5]:
x_train = train[['3P', 'BLK']]
y_train = train['Pos']

In [6]:
# svm 알고리즘 -> 모델링(최적의 파라미터 검색)
def svcParam(x, y):
    params=[
        {
            'kernel': ['rbf'],
            'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
            'C': [0.01, 0.1, 1, 10, 100, 1000]
        }
    ]
    model = GridSearchCV(SVC(), params, cv=10)
    model.fit(x_train, y_train)
    return model

model = svcParam(x_train, y_train)

In [7]:
print(model)
print(model.best_params_)
print(model.best_score_)

GridSearchCV(cv=10, estimator=SVC(),
             param_grid=[{'C': [0.01, 0.1, 1, 10, 100, 1000],
                          'gamma': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1],
                          'kernel': ['rbf']}])
{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'}
0.95


In [11]:
x_test = test[["3P", "BLK"]]
y_test = test[["Pos"]]

In [12]:
y_pred = model.predict(x_test)

In [13]:
from sklearn.metrics import classification_report

In [14]:
print("정확도: " + str(accuracy_score(y_test, y_pred)))

정확도: 0.95


In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           C       1.00      0.90      0.95        10
          SG       0.91      1.00      0.95        10

    accuracy                           0.95        20
   macro avg       0.95      0.95      0.95        20
weighted avg       0.95      0.95      0.95        20



In [17]:
pd.DataFrame({"예측": y_pred, "실제": y_test['Pos']})

Unnamed: 0,예측,실제
0,SG,C
1,SG,SG
2,SG,SG
3,C,C
4,C,C
5,SG,SG
6,C,C
7,SG,SG
8,SG,SG
9,C,C
