In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.metrics import f1_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# kNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
iris = datasets.load_iris()
iris_x = iris.data[:, 2:]
iris_y = iris.target

In [None]:
x_train, x_test, y_train, y_test = train_test_split(iris_x, iris_y, test_size=0.2)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
model = KNeighborsClassifier()

In [None]:
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)
f1_score(y_test, y_pred, average="micro")

# Linear regression

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
diabetes_x, diabetes_y = datasets.load_diabetes(return_X_y=True)
diabetes_x = diabetes_x[:, 2].reshape(-1, 1)

x_train, x_test, y_train, y_test = train_test_split(diabetes_x, diabetes_y, test_size=0.2)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)
mean_squared_error(y_test, y_pred)

In [None]:
plt.scatter(x_test, y_test, color="black")
plt.plot(x_test, y_pred, color="blue", linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
poly_features = PolynomialFeatures(degree=4)

x_train_poly = poly_features.fit_transform(x_train).reshape(-1, 5)
x_test_poly = poly_features.transform(x_test).reshape(-1, 5)

model = LinearRegression()
model.fit(x_train_poly, y_train)

y_pred = model.predict(x_test_poly)
mean_squared_error(y_test, y_pred)

In [None]:
X_test = np.linspace(-0.1, 0.16, 100).reshape(-1, 1)
X_test_poly = poly_features.fit_transform(X_test).reshape(-1, 5)
y_pred = model.predict(X_test_poly)
plt.scatter(x_test, y_test, color="black")
plt.plot(X_test, y_pred, color="blue", linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()

# Logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
breast_cancer = datasets.load_breast_cancer()
bc_x = breast_cancer.data[:, :2]
bc_y = breast_cancer.target

x_train, x_test, y_train, y_test = train_test_split(bc_x, bc_y, test_size=0.2)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = LogisticRegression()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
f1_score(y_test, y_pred)

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
iris_x = iris.data[:, 2:]
iris_y = iris.target

x_train, x_test, y_train, y_test = train_test_split(iris_x, iris_y, test_size=0.2)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

model = SVC(kernel="linear")
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
f1_score(y_test, y_pred, average="micro")

## SVM with non-linear kernel

In [None]:
model_rbf = SVC(kernel="rbf")
model_rbf.fit(x_train, y_train)

y_pred_rbf = model_rbf.predict(x_test)
f1_score(y_test, y_pred_rbf, average="macro")

# Decision tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
iris_x = iris.data[:, 2:]
iris_y = iris.target

x_train, x_test, y_train, y_test=train_test_split(iris_x, iris_y, test_size=0.2)

model = DecisionTreeClassifier(max_depth=2)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
f1_score(y_test, y_pred, average="micro")

In [None]:
from sklearn.tree import export_graphviz

In [None]:
export_graphviz(
    model,
    out_file="./decision_tree.dot",
    feature_names=iris.feature_names[2:],
    class_names=iris.target_names,
    rounded=True,
    filled=True)

Run `dot -Tpng decision_tree.dot -o decision_tree.png` in terminal.

# Voting classifier

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
iris_x = iris.data[:, 2:]
iris_y = iris.target

x_train, x_test, y_train, y_test = train_test_split(iris_x, iris_y, test_size=0.25)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

dt_model = DecisionTreeClassifier()
svm_model = SVC()
log_model = LogisticRegression()

voting_model = VotingClassifier(
    estimators=[('dt', dt_model),
                ('svm', svm_model),
                ('lr', log_model)],
    voting='hard')
voting_model.fit(x_train, y_train)
for model in (dt_model, svm_model, log_model, voting_model):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    print(model.__class__.__name__, f1_score(y_test, y_pred,average="micro"))

# Bagging classifier

In [None]:
from sklearn.ensemble import BaggingClassifier

In [None]:
iris_x = iris.data
iris_y = iris.target

x_train, x_test, y_train, y_test = train_test_split(iris_x, iris_y, test_size=0.2)

model = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

f1_score(y_test, y_pred, average="micro")

# Random forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
iris_x = iris.data
iris_y = iris.target

x_train, x_test, y_train, y_test = train_test_split(iris_x, iris_y, test_size=0.2)

model = RandomForestClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

f1_score(y_test, y_pred, average="micro")