# Table of Contents
* [Linear Regression](#linear-regression)
* [Logistic Regression](#logistic-regression)
* [kNN Classification](#knn-classification)
* [Decision Tree Classification](#decision-tree-classification)
* [Decision Tree Classification with Pruning](#decision-tree-classification-pruning)
* [k-means Clustering](#k-means-clustering)

In [None]:
# Initialization
%matplotlib inline
from warnings import filterwarnings
filterwarnings('ignore')

## Linear Regression <a class="anchor" id="linear-regression"></a>

In [None]:
# Linear Regression
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split as split
from sklearn.linear_model import LinearRegression
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = split(X, y, test_size=0.25, random_state=42)
lr = LinearRegression().fit(X_train, y_train)
print(f'R2 score: {lr.score(X_test, y_test):.2f}')

## Logistic Regression <a class="anchor" id="logistic-regression"></a>

In [None]:
# Logistic Regression
from pandas import read_csv
from sklearn.model_selection import train_test_split as split
from sklearn.linear_model import LogisticRegression
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = read_csv('pima-indians-diabetes.data.csv', names=names)
array = df.values
X = array[:,:-1]
y = array[:,-1]
X_train, X_test, y_train, y_test = split(X, y, test_size=0.25, random_state=42)
lgr = LogisticRegression().fit(X_train, y_train)
print(f'Accuracy: {100 * lgr.score(X_test, y_test):.2f} %')

## kNN Classification <a class="anchor" id="knn-classification"></a>

In [None]:
# KNN Classification
from pandas import read_csv
from sklearn.model_selection import train_test_split as split
from sklearn.neighbors import KNeighborsClassifier
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = read_csv('pima-indians-diabetes.data.csv', names=names)
array = df.values
X = array[:,:-1]
y = array[:,-1]
X_train, X_test, y_train, y_test = split(X, y, test_size=0.25, random_state=42)
knn = KNeighborsClassifier().fit(X_train, y_train)
print(f'Accuracy: {100 * knn.score(X_test, y_test):.2f} %')

## Decision Tree Classification <a class="anchor" id="decision-tree-classification"></a>

In [None]:
# Decision Tree Classification
from pandas import read_csv
from sklearn.model_selection import train_test_split as split
from sklearn.tree import DecisionTreeClassifier
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = read_csv('pima-indians-diabetes.data.csv', names=names)
array = df.values
X = array[:,:-1]
y = array[:,-1]
X_train, X_test, y_train, y_test = split(X, y, test_size=0.25, random_state=42)
dtc = DecisionTreeClassifier().fit(X_train, y_train)
print(f'Train accuracy: {100 * dtc.score(X_train, y_train):.2f} %')
print(f'Test accuracy: {100 * dtc.score(X_test, y_test):.2f} %')

## Decision Tree Classification with Pruning<a class="anchor" id="decision-tree-classification-pruning"></a>

In [None]:
# Decision Tree Classification with pre-pruning
from pandas import read_csv
from sklearn.model_selection import train_test_split as split
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = read_csv('pima-indians-diabetes.data.csv', names=names)
array = df.values
X = array[:,:-1]
y = array[:,-1]
X_train, X_test, y_train, y_test = split(X, y, test_size=0.25, random_state=42)
dtc = DecisionTreeClassifier(max_leaf_nodes=10).fit(X_train, y_train)
print(f'Train accuracy: {100 * dtc.score(X_train, y_train):.2f} %')
print(f'Test accuracy: {100 * dtc.score(X_test, y_test):.2f} %')

plt.figure(figsize=(10, 10))
plot_tree(dtc, feature_names=names, class_names=['0', '1'], rounded=True, filled=True)
plt.show()

## k-means Clustering <a class="anchor" id="k-means-clustering"></a>

In [None]:
# k-means clustering
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
X, y = make_blobs(centers=3, cluster_std=2, random_state=42)
km = KMeans(n_clusters=3).fit(X)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
fig.suptitle('k-means Clustering')
ax1.scatter(X[:, 0], X[:, 1])
ax1.set_title('Before clustering')
ax2.scatter(X[:, 0], X[:, 1], c=km.labels_)
ax2.set_title('After clustering')
plt.show()