## SVM 實作

### Load data

In [None]:
import pandas as pd
from sklearn import datasets
import seaborn as sns

In [None]:
data = datasets.load_iris()
data

### Format data

In [None]:
iris = pd.DataFrame(data["data"], columns=data["feature_names"])
iris["target"] = data["target"]
iris

### Visualize

In [None]:
sns.scatterplot(x="sepal length (cm)", y="sepal width (cm)", hue="target", data=iris)

In [None]:
sns.pairplot(hue="target", data=iris)

### Training

In [None]:
from sklearn.svm import SVC, LinearSVC

In [None]:
X = iris.iloc[:, 0:4].values
y = iris["target"].values

In [None]:
model = LinearSVC(max_iter=5000)
# model = SVC(max_iter=5000)
# model = SVC(kernel="poly", max_iter=5000)

In [None]:
model.fit(X=X, y=y)

### Mean accuracy

In [None]:
model.score(X=X, y=y)

### Prediction

In [None]:
predict_y = model.predict(X=X)

In [None]:
predict_y

### Confusion matrix

In [None]:
from sklearn import metrics

In [None]:
confmat = metrics.confusion_matrix(y, predict_y)
confmat

In [None]:
sns.heatmap(confmat, cmap="YlGnBu_r", annot=True, fmt="d")

In [None]:
print(metrics.classification_report(y, predict_y, labels=[0, 1, 2]))

## ROC curve

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import label_binarize

### Transform labels into one-hot vectors

In [None]:
y = label_binarize(y, classes=[0, 1, 2])

In [None]:
model1 = OneVsRestClassifier(LinearSVC(max_iter=5000))
model2 = OneVsRestClassifier(SVC(kernel="poly", max_iter=5000))
model3 = OneVsRestClassifier(SVC(max_iter=5000))

In [None]:
model1.fit(X=X, y=y)
model2.fit(X=X, y=y)
model3.fit(X=X, y=y)

In [None]:
score_y1 = model1.decision_function(X)
score_y2 = model2.decision_function(X)
score_y3 = model3.decision_function(X)

### FPR, TPR

In [None]:
n_classes = 3

In [None]:
model1_roc = dict()
model2_roc = dict()
model3_roc = dict()
for i in range(n_classes):
    fpr = "fpr_cls_{}".format(i)
    tpr = "tpr_cls_{}".format(i)
    model1_roc[fpr], model1_roc[tpr], _ = metrics.roc_curve(y[:,i], score_y1[:,i])
    model2_roc[fpr], model2_roc[tpr], _ = metrics.roc_curve(y[:,i], score_y2[:,i])
    model3_roc[fpr], model3_roc[tpr], _ = metrics.roc_curve(y[:,i], score_y3[:,i])

In [None]:
sns.lineplot(x=model1_roc["fpr_cls_0"], y=model1_roc["tpr_cls_0"])

In [None]:
sns.lineplot(x=model1_roc["fpr_cls_1"], y=model1_roc["tpr_cls_1"])

In [None]:
sns.lineplot(x=model1_roc["fpr_cls_2"], y=model1_roc["tpr_cls_2"])

In [None]:
sns.lineplot(x=model1_roc["fpr_cls_0"], y=model1_roc["tpr_cls_0"])
sns.lineplot(x=model1_roc["fpr_cls_1"], y=model1_roc["tpr_cls_1"])
sns.lineplot(x=model1_roc["fpr_cls_2"], y=model1_roc["tpr_cls_2"])

In [None]:
metrics.auc(model1_roc["fpr_cls_0"], model1_roc["tpr_cls_0"])

In [None]:
metrics.auc(model1_roc["fpr_cls_1"], model1_roc["tpr_cls_1"])

In [None]:
metrics.auc(model1_roc["fpr_cls_2"], model1_roc["tpr_cls_2"])

In [None]:
sns.lineplot(x=model1_roc["fpr_cls_1"], y=model1_roc["tpr_cls_1"])

In [None]:
sns.lineplot(x=model2_roc["fpr_cls_1"], y=model2_roc["tpr_cls_1"])

In [None]:
sns.lineplot(x=model3_roc["fpr_cls_1"], y=model3_roc["tpr_cls_1"])

In [None]:
sns.lineplot(x=model1_roc["fpr_cls_1"], y=model1_roc["tpr_cls_1"])
sns.lineplot(x=model2_roc["fpr_cls_1"], y=model2_roc["tpr_cls_1"])
sns.lineplot(x=model3_roc["fpr_cls_1"], y=model3_roc["tpr_cls_1"])

## Handwriting digits classification

In [None]:
data = datasets.load_digits()
data

In [None]:
digits = pd.DataFrame(data["data"], columns=["feature{}".format(i) for i in range(1,65)])
digits["target"] = data["target"]
digits

In [None]:
digits.iloc[:, 0:65] = digits.iloc[:, 0:65] / 255

In [None]:
X = digits.iloc[:, 0:65].values
y = [str(i) for i in digits["target"]]

In [None]:
model1 = LinearSVC(max_iter=5000)
model2 = SVC(kernel="poly", max_iter=5000)
model3 = SVC(max_iter=5000)

In [None]:
model1.fit(X=X, y=y)
model2.fit(X=X, y=y)
model3.fit(X=X, y=y)

In [None]:
model1.score(X=X, y=y)

In [None]:
model2.score(X=X, y=y)

In [None]:
model3.score(X=X, y=y)