In [1]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
from sklearn.datasets import fetch_openml
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X = X / 255

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

mlp = MLPClassifier().fit(X_train, y_train)
print("train_score:{}".format(mlp.score(X_train, y_train)))
print("test_score:{}".format(mlp.score(X_test, y_test)))

train_score:1.0
test_score:0.9784


In [4]:
#The effect of different choices of activation function

for this_activation in ['logistic', 'tanh', 'relu']:
    mlp = MLPClassifier(solver='lbfgs', activation = this_activation,
                         alpha = 0.1, hidden_layer_sizes = [10, 10],
                         random_state = 0).fit(X_train, y_train)
    print("activation function:", this_activation)
    print("train_score:{}".format(mlp.score(X_train, y_train)))
    print("test_score:{}".format(mlp.score(X_test, y_test)))
    print("------------------")
    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


activation function: logistic
train_score:0.9613904761904762
test_score:0.9329714285714286
------------------


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


activation function: tanh
train_score:0.9623238095238095
test_score:0.9303428571428571
------------------


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


activation function: relu
train_score:0.9248571428571428
test_score:0.9115428571428571
------------------


In [5]:
#Regularization parameter: alpha

for this_alpha in [0.01, 0.1, 1.0, 5.0]:
    mlp = MLPClassifier(solver='lbfgs', activation = 'tanh',
                         alpha = this_alpha,
                         hidden_layer_sizes = [10, 10],
                         random_state = 0).fit(X_train, y_train)
    print("the value of alpha:",this_alpha)
    print("train_score:{}".format(mlp.score(X_train, y_train)))
    print("test_score:{}".format(mlp.score(X_test, y_test)))
    print("------------------")
    

the value of alpha: 0.01
train_score:0.9615428571428571
test_score:0.9305142857142857
------------------
the value of alpha: 0.1
train_score:0.9608380952380953
test_score:0.9308
------------------
the value of alpha: 1.0
train_score:0.9612571428571428
test_score:0.9308
------------------
the value of alpha: 5.0
train_score:0.9618476190476191
test_score:0.9334857142857143
------------------


In [9]:
#the effect of numbers of hidden units
for units in [1,10,50,100]:
    mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[units])
    mlp.fit(X_train, y_train)
    print("the number of hidden units:",units)
    print("train_score:{}".format(mlp.score(X_train, y_train)))
    print("test_score:{}".format(mlp.score(X_test, y_test)))
    print("------------------")


the number of hidden units: 1
train_score:0.11253333333333333
test_score:0.11251428571428572
------------------
the number of hidden units: 10
train_score:0.9133523809523809
test_score:0.9028571428571428
------------------
the number of hidden units: 50
train_score:0.9999428571428571
test_score:0.9668
------------------
the number of hidden units: 100
train_score:1.0
test_score:0.9749714285714286
------------------


In [10]:
#the effect of numbers of hidden layers
for layer in [[10],[10,10],[10,10,10]]:
    mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=layer)
    mlp.fit(X_train, y_train)
    print("the number of hidden layers:",layer)
    print("train_score:{}".format(mlp.score(X_train, y_train)))
    print("test_score:{}".format(mlp.score(X_test, y_test)))
    print("------------------")

the number of hidden layers: [10]
train_score:0.9133523809523809
test_score:0.9028571428571428
------------------
the number of hidden layers: [10, 10]
train_score:0.9231619047619047
test_score:0.9114857142857142
------------------
the number of hidden layers: [10, 10, 10]
train_score:0.9256190476190476
test_score:0.9153714285714286
------------------


In [6]:
#choosing the best parameters
from sklearn.model_selection import GridSearchCV
parameters = {'solver': ['lbfgs'], 'activation' : ['logistic', 'tanh', 'relu'],
              'alpha': [0.01, 0.1, 1.0, 5.0],
              'hidden_layer_sizes':[[1],[10],[50],[1,1],[10,10],[50,50],[1,1,1],[10,10,10],[50],[50],[50]],
              'random_state':[0]} 
clf_grid = GridSearchCV(MLPClassifier(), parameters, n_jobs=-1)

clf_grid.fit(X_train, y_train)
print("Test set score: {:.2f}".format(clf_grid.score(X_test, y_test)))
print("Best parameters: {}".format(clf_grid.best_params_))
print("Best cross-validation score: {:.2f}".format(clf_grid.best_score_))
print("Best estimator:\n{}".format(clf_grid.best_estimator_))



Test set score: 0.97
Best parameters: {'activation': 'logistic', 'alpha': 5.0, 'hidden_layer_sizes': [50, 50], 'random_state': 0, 'solver': 'lbfgs'}
Best cross-validation score: 0.97
Best estimator:
MLPClassifier(activation='logistic', alpha=5.0, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=[50, 50], learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=0, shuffle=True, solver='lbfgs', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)
