<a href="https://colab.research.google.com/github/sungjin-kim-data/ML/blob/master/Multiclass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()

In [None]:
iris

In [None]:
iris["feature_names"]

In [None]:
iris["target_names"]

In [None]:
data, target = iris["data"], iris["target"]

In [None]:
pd.DataFrame(data, columns=iris["feature_names"]).describe()

In [None]:
pd.Series(target).value_counts()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
X_train,X_test, y_train, y_test = train_test_split(
    data, target, train_size=0.7, random_state=2, stratify=target
)

In [None]:
print("train data :", len(X_train))
print("test data :", len(X_test))

In [None]:
pd.Series(y_train).value_counts()

In [None]:
pd.Series(y_test).value_counts()

In [None]:
X = X_train[:, :2]

In [None]:
X[0]

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(X[:, 0], X[:, 1], c=y_train, ec='k', cmap=plt.cm.Paired)
plt.xlabel("Sepal length")
plt.ylabel("Sepal width");

In [None]:
ovr_logit = LogisticRegression(multi_class="ovr")
ovr_logit.fit(X, y_train)

In [None]:
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

plt.figure(figsize=(10, 10))
plt.scatter(X[: ,0], X[:, 1], c=ovr_logit.predict(X), ec='k', cmap=plt.cm.Paired)
plt.xlabel("Sepla length")
plt.ylabel("Sepla width")
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

coef = ovr_logit.coef_
intercept = ovr_logit.intercept_

def plot_hyperplane(c, color):
  def line(x0):
    return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
  plt.plot([x_min, x_max], [line(x_min), line(x_max)],
           ls="--", color=color)

for i, color in zip(ovr_logit.classes_, "bry"):
  plot_hyperplane(i, color)

In [None]:
multi_logit = LogisticRegression(multi_class="multinomial")
multi_logit.fit(X, y_train)

In [None]:
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

plt.figure(figsize=(10, 10))
plt.scatter(X[: ,0], X[:, 1], c=multi_logit.predict(X), ec='k', cmap=plt.cm.Paired)
plt.xlabel("Sepla length")
plt.ylabel("Sepla width")
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

coef = multi_logit.coef_
intercept = multi_logit.intercept_

def plot_hyperplane(c, color):
  def line(x0):
    return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
  plt.plot([x_min, x_max], [line(x_min), line(x_max)],
           ls="--", color=color)

for i, color in zip(multi_logit.classes_, "bry"):
  plot_hyperplane(i, color)

In [None]:
multi_logit = LogisticRegression()

In [None]:
multi_logit.fit(X_train, y_train)

In [None]:
train_pred_proba = multi_logit.predict_proba(X_train)

In [None]:
sp_pred = train_pred_proba[0]
sp_pred

In [None]:
print(f"class 0에 속하지 않을 확률 : {1 - sp_pred[0]:.4f}")
print(f"class 1과 2에 속할 확률 : {sp_pred[1:].sum():.4f}")

In [None]:
train_pred = multi_logit.predict(X_train)
test_pred = multi_logit.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
train_acc = accuracy_score(y_train, train_pred)
test_acc = accuracy_score(y_test, test_pred)

In [None]:
print(f"Train accuracy is : {train_acc:.2f}")
print(f"Test accuracy is : {test_acc:.2f}")

In [None]:
ovr_logit = LogisticRegression(multi_class="ovr")

In [None]:
ovr_logit.fit(X_train, y_train)

In [None]:
ovr_train_pred = ovr_logit.predict(X_train)
ovr_test_pred = ovr_logit.predict(X_test)

In [None]:
ovr_train_acc = accuracy_score(y_train, ovr_train_pred)
ovr_test_acc = accuracy_score(y_test, ovr_test_pred)

In [None]:
print(f"ovr Train accuracy is : {ovr_train_acc: .2f}")
print(f"ovr test accuracy is : {ovr_test_acc: .2f}")