# **Pattern Recognition and Machine Learning**
> 📘 Tutorial
>
> Week 7: SVM & Decision Tree

## **Face Detection using SVM**
### 1. Retrieve data

In [3]:
from sklearn.datasets import fetch_lfw_people

faces = fetch_lfw_people(min_faces_per_person=88)

In [None]:
faces

### 2. Explore data

In [5]:
faces.target_names

array(['Colin Powell', 'Donald Rumsfeld', 'George W Bush',
       'Gerhard Schroeder', 'Tony Blair'], dtype='<U17')

In [6]:
faces.images.shape

(1140, 62, 47)

In [9]:
n_samples = faces.images.shape[0]
n_features = faces.data.shape[1]
n_classes = faces.target_names.shape[0]

print(f'Numbers of samples: {n_samples}')
print(f'Numbers of features: {n_features}')
print(f'Numbers of classes: {n_classes}')

AttributeError: shape

In [None]:
faces.data.shape

In [None]:
from matplotlib import pyplot as plt

fig, ax = plt.subplots(3, 4, figsize=(8,8))
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]])

### 3. Split data into train/test sets

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(faces.data, faces.target,
    train_size = 0.8, random_state=14)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

### 4. Build model

In [None]:
from sklearn.svm import SVC

model = SVC()

### 5. Train and predict

In [None]:
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

### 6. Evaluate the result

In [None]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average='weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average='weighted'))

### 7. Print classification report

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, target_names=faces.target_names))

### 8. Visualize the confusion matrix

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

mat = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(7,5))
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
xticklabels=faces.target_names,
yticklabels=faces.target_names)
plt.xlabel('True label')
plt.ylabel('Predicted label')

In [None]:
params = model.get_params()

params

### 9. Tune parameter

In [None]:
# Here we will adjust C (which controls the margin hardness) and gamma
# (which controls the size of the radial basis function (RBF) kernel),
# and determine the best model:
from sklearn.model_selection import GridSearchCV

model = SVC()

# Parameter setup
param_grid = [
    {'C': [0.5, 0.1, 1, 5, 10],
     'kernel': ['linear'],
     'class_weight': ['balanced']},
    {'C': [0.5, 0.1, 1, 5, 10],
     'gamma': [0.0001, 0.001, 0.01, 0.1, 0.005, 0.05, 0.5],
     'kernel': ['rbf'],
     'class_weight': ['balanced']}
]

# Run the Grid Search and fit the training data
grs = GridSearchCV(model, param_grid)

grs.fit(X_train, y_train)

In [None]:
# Output best value
print("Best Hyper Parameters:",grs.best_params_)

In [None]:
# Make prediction and calculate metrics
model_best = grs.best_estimator_

y_pred = model_best.predict(X_test)

# Evaluate the model
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average='weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average='weighted'))

In [None]:
plt.figure(figsize=(8,6))

sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
xticklabels=faces.target_names,
yticklabels=faces.target_names)
plt.xlabel('True label')
plt.ylabel('Predicted label')

### 10. Display misclassified faces

In [None]:
fig, ax = plt.subplots(4, 6, figsize=(10,10))

for i, axi in enumerate(ax.flat):
    axi.imshow(X_test[i].reshape(62, 47), cmap='bone')
    axi.set(xticks=[], yticks=[])
    axi.set_xlabel(faces.target_names[y_test[i]].split()[-1])
    axi.set_ylabel(faces.target_names[y_pred[i]].split()[-1],
    color='black' if y_pred[i] == y_test[i] else 'red')

fig.suptitle('Predicted Names: Incorrect Labels in Red', size=14);


## **Breast Cancer Detection using Decision Tree**
### 1. Retrieve data

In [None]:
from sklearn.datasets import load_breast_cancer

breast_cancer_dataset = load_breast_cancer()

In [None]:
breast_cancer_dataset

### 2. Explore data

In [None]:
breast_cancer_dataset.target_names

In [None]:
breast_cancer_dataset.data.shape

In [None]:
breast_cancer_dataset.feature_names

In [None]:
breast_cancer_dataset.target.shape

### 3. Split data into train/test sets

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    breast_cancer_dataset.data, breast_cancer_dataset.target,
    test_size = 0.2, random_state = 14)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

### 4. Build model

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()

### 5. Train and predict

In [None]:
model.fit(X_train,y_train)

y_pred= model.predict(X_test)

### 6. Evaluate the result

In [None]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average='weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average='weighted'))

### 7. Print classification report

In [None]:
report = classification_report(y_test, y_pred,
            target_names = breast_cancer_dataset.target_names)

print(report)

### 8. Tune parameter

In [None]:
from sklearn.model_selection import GridSearchCV

model = DecisionTreeClassifier()

# Parameter setup
params = {'criterion': ['gini', 'entropy'], 'max_depth': range(1,10)}

# Run the Grid Search and fit the training data
# 10-fold
#grs = GridSearchCV(model, param_grid=params, cv = 10)

# 5-fold default
grs = GridSearchCV(model, param_grid=params)

grs.fit(X_train, y_train)

In [None]:
# Output best value
print("Best Hyper Parameters:",grs.best_params_)

In [None]:
# Make prediction and calculate metrics
model_best = grs.best_estimator_

y_pred = model_best.predict(X_test)

# Evaluate the model
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average='weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average='weighted'))

### 9. Visualize the tree

In [None]:
from matplotlib import pyplot as plt
from sklearn import tree

fig = plt.figure(figsize=(15,10))
_ = tree.plot_tree(model, 
                   filled=True,
                   feature_names=breast_cancer_dataset.feature_names,  
                   class_names=breast_cancer_dataset.target_names)