# ML Cheatsheet

In [1]:
import numpy as np
from sklearn import model_selection, preprocessing

In [2]:
from sklearn import datasets
dt_bh = datasets.load_boston()    # Boston house price (506, 13)
dt_ir = datasets.load_iris()      # Iris {0, 1, 2} (150, 4) flowers. The latter [50:150] more difficult
dt_dg = datasets.load_digits()    # Digits {0, 1, ..., 9} (1797, 64) 8x8 images

## I. Supervised Learning

In [3]:
# Demonstrate the Supervised Learning model
def DemoSL(model):
    print(model)
    model.fit(X_tn, y_tn)
    print(model.score(X_tn, y_tn))
    print(model.score(X_tt, y_tt))

### I.1. Regression

In [None]:
X = dt_bh.data
y = dt_bh.target

# Train test split
X_tn, X_tt, y_tn, y_tt = model_selection.train_test_split(X, y, test_size=100, random_state=27)

# Normalization
ss = preprocessing.StandardScaler()
X_tn = ss.fit_transform(X_tn)
print(ss)

X_tt = ss.transform(X_tt)

In [None]:
# Linear Regression
from sklearn.linear_model import LinearRegression
rgs_lin = LinearRegression()
DemoSL(rgs_lin)
# Note that the score is not accuracy (percentage)

#rgs_lin.predict(X_tt)
#print(rgs_lin.coef_)
#print(rgs_lin.intercept_)

### I.2. Classification

In [4]:
X = dt_dg.data
y = dt_dg.target

# Train test split
X_tn, X_tt, y_tn, y_tt = model_selection.train_test_split(X, y, test_size=0.3, random_state=27)

# Normalization
ss = preprocessing.StandardScaler()
X_tn = ss.fit_transform(X_tn)

X_tt = ss.transform(X_tt)

In [7]:
# GridSearchCV : Exhaustive search of hyper-parameters for an estimator
from sklearn.metrics import classification_report

def gsCV(model, param_grid, scorings):

    for scoring in scorings:
        print('\n# Tuning hyper-parameters for %s' % scoring)
        gcv = model_selection.GridSearchCV(model, param_grid, scoring, cv=5, n_jobs=4, verbose=1)
        gcv.fit(X_tn, y_tn)

        means = gcv.cv_results_['mean_test_score']
        stds  = gcv.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, gcv.cv_results_['params']):
            print('%.3f (+/-%.3f) for %r' % (mean, std * 2, params))
        print('\n# Best parameters on development set:', gcv.best_params_)

        print('\n# Scores computed on evaluation set:\n')
        print(classification_report(y_tt, gcv.predict(X_tt), digits=3))

    print(gcv)
    #print(gcv.cv_results_)

scorings = ['accuracy']    # 'accuracy', 'precision', 'recall', 'f1'

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
clf_log = LogisticRegression(random_state=27)
DemoSL(clf_log)

#print(clf_log.coef_)
#print(clf_log.intercept_)

In [11]:
# kNN (Instance-based learning)
from sklearn.neighbors import KNeighborsClassifier
clf_knn = KNeighborsClassifier(n_neighbors=5, p=2)
DemoSL(clf_knn)

#np.column_stack((clf_knn.predict(X_tt), np.round(clf_knn.predict_proba(X_tt), 3)))

param_grid = [{'n_neighbors': [3, 5, 10, 15], 'p': [1, 2]}]
#gsCV(clf_knn, param_grid, scorings)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')
0.986475735879
0.974074074074


In [None]:
# SVM
from sklearn.svm import SVC
clf_svc = SVC(kernel='rbf', gamma=0.03, C=1, random_state=27)    # gamma for 'rbf', 'poly', 'sigmoid'
DemoSL(clf_svc)

param_grid = [{'kernel': ['rbf', 'linear', 'poly'], 'C': [3, 10, 20]}]
#gsCV(clf_svc, param_grid, scorings)

In [None]:
# Neural Network
from sklearn.neural_network import MLPClassifier
clf_mlp = MLPClassifier((100, 50, 20), learning_rate_init=0.05, alpha=0.05, verbose=1, random_state=27)
DemoSL(clf_mlp)

In [None]:
# Naive Bayes
from sklearn.naive_bayes import GaussianNB    # or BernoulliNB
clf_nb = GaussianNB()
DemoSL(clf_nb)

In [None]:
# Gaussian Process
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
clf_gp = GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True, max_iter_predict=2, n_jobs=4, random_state=27)
#DemoSL(clf_gp)    # Commented out because it takes too long for dt_dg. It is quick for dt_ir

In [None]:
# QDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf_qda = QuadraticDiscriminantAnalysis()
DemoSL(clf_qda)

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
clf_dt = DecisionTreeClassifier(max_depth=7, criterion='entropy', random_state=27)    # criterion='gini' or 'entropy' (info gain)
DemoSL(clf_dt)

param_grid = [{'max_depth': [10, 20, 40], 'max_features': [32, None], 'criterion': ['gini', 'entropy']}]
#gsCV(clf_dt, param_grid, scorings)

In [None]:
# Random Forest: Collection of decision trees that use a random subset of training data(Bagging) and features --> majority vote
from sklearn.ensemble import RandomForestClassifier
clf_rf = RandomForestClassifier(n_estimators=300, max_depth=10, random_state=27)
DemoSL(clf_rf)

param_grid = [{'max_depth': [7, 10, 15], 'n_estimators': [50, 100, 300]}]
#gsCV(clf_rf, param_grid, scorings)

In [None]:
# Ada Boost
from sklearn.ensemble import AdaBoostClassifier
clf_ab = AdaBoostClassifier(n_estimators=300, learning_rate=0.01, random_state=27)
DemoSL(clf_ab)

In [None]:
# Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier
clf_gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.5, max_depth=5, random_state=27)
DemoSL(clf_gb)

## II. Unsupervised Learning

### II.1. Clustering

In [None]:
X = dt_ir.data[:100]    # The first 100 are easier
y = dt_ir.target[:100]

# Train test split
X_tn, X_tt, y_tn, y_tt = model_selection.train_test_split(X, y, test_size=0.3, random_state=27)

# Normalization
ss = preprocessing.StandardScaler()
X_tn = ss.fit_transform(X_tn)

X_tt = ss.transform(X_tt)

In [None]:
# k-Means
from sklearn.cluster import KMeans
clu_km = KMeans(n_clusters=2, random_state=27)
print(clu_km)

clu_km.fit(X_tn)
clu_km.predict(X_tt)

In [None]:
y_tt    # Prediction above should cluster similarly

### II.2. Dimensionality Reduction

In [None]:
X = dt_ir.data
y = dt_ir.target

# Train test split
X_tn, X_tt, y_tn, y_tt = model_selection.train_test_split(X, y, test_size=0.3, random_state=27)

# Normalization
ss = preprocessing.StandardScaler()
X_tn = ss.fit_transform(X_tn)
]
X_tt = ss.transform(X_tt)

In [None]:
# PCA
from sklearn.decomposition import PCA
dr_pca = PCA(n_components=3, random_state=27)
print(dr_pca)

X_tn_reduced = dr_pca.fit_transform(X_tn)
X_tt_reduced = dr_pca.transform(X_tt)

X_tt_reduced.shape