In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from dmba import classificationSummary

In [3]:
example_df = pd.read_csv('data/TinyData.csv')
predictors = ['Fat', 'Salt']
outcome = 'Acceptance'
X = example_df[predictors]
y = example_df[outcome]
classes = sorted(y.unique())
clf = MLPClassifier(hidden_layer_sizes=(3), activation='logistic', solver='lbfgs', 
                    random_state=1)
clf.fit(X, y)
clf.predict(X)
# Network structure
print('Intercepts')
print(clf.intercepts_)
print('Weights')
print(clf.coefs_)                           
# Prediction
print(pd.concat([example_df,
    pd.DataFrame(clf.predict_proba(X), columns=classes)
], axis=1))

Intercepts
[array([0.13368042, 4.07247549, 7.00768105]), array([14.3074867])]
Weights
[array([[ -1.3065648 ,  -4.20427797, -13.29587331],
       [ -0.04399729,  -4.91606921,  -6.03356987]]), array([[ -0.27348314],
       [ -9.01211576],
       [-17.63504684]])]
   Obs.  Fat  Salt Acceptance   dislike      like
0     1  0.2   0.9       like  0.000490  0.999510
1     2  0.1   0.1    dislike  0.999994  0.000006
2     3  0.2   0.4    dislike  0.999741  0.000259
3     4  0.2   0.5    dislike  0.997368  0.002632
4     5  0.4   0.5       like  0.002133  0.997867
5     6  0.3   0.8       like  0.000075  0.999925


The first part of the output shows the estimated parameters that connect the input layer and the hidden layer and then those connecting the hidden layer and output layer. Intercepts are the bias nodes and correspond to θ3, θ4, θ5, and θ6. The Intercepts and Weights are used to compute the output of the hidden layer nodes. They were computed iteratively after choosing a random initial set of values.

In [4]:
classificationSummary(y, clf.predict(X), class_names=classes)

Confusion Matrix (Accuracy 1.0000)

        Prediction
 Actual dislike    like
dislike       3       0
   like       0       3
