In [19]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import plotly
import plotly.express as px
np.set_printoptions(precision=2,linewidth=120,suppress=True)
pd.options.mode.chained_assignment = None  # default='warn'
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn import datasets, metrics

np.set_printoptions(precision=2,linewidth=120,suppress=True)

# Data and preprocessing

In [11]:
# The iris dataset has 4 features (sepal and petal width and length in cm) and three classes (species of flower)
iris = datasets.load_iris()
#print(iris.DESCR)

iris.data.shape    # iris.data is a numpy.ndarray object. iris.target contains the class variable (in numeric form)
iris.data[0:5,:] , iris.target[0:5]   

dm = iris.data      # data matrix    

(150, 4)

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2]]),
 array([0, 0, 0, 0, 0]))

In [7]:
# Splitting the data and classifications into training and test sets

nrows = dm.shape[0]         # number of rows in the data matrix
f = 2/3                     # fraction to be used for training

ntrain = int(f*nrows)     # Number of observations to be used for training
ntest = nrows - ntrain

v1 = np.random.choice(nrows,size=ntrain,replace=False) # vector of random integers from 0 to nrows-1
v2 = np.setdiff1d(np.arange(nrows),v1)


dtrain = dm[v1,:] ; ctrain = iris.target[v1]    # training data and corresponding classifications

dtest = dm[v2,:]  ; ctest = iris.target[v2]     # test data and classifications

In [20]:
# Standardizing data
dtrain2 = (dtrain - np.mean(dtrain,axis=0))/np.std(dtrain,axis=0)
dtest2 = (dtest - np.mean(dtest,axis=0))/np.std(dtest,axis=0)

# Network training

In [25]:
from sklearn.neural_network import MLPClassifier

ANN1 = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(5, 3), random_state=1)

ANN1.fit(dtrain2, ctrain)

MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 3), random_state=1,
              solver='lbfgs')

# Prediction

In [29]:
yhat = ANN1.predict(dtest2)
yhat
ctest
metrics.mean_squared_error(ctest,yhat)
metrics.accuracy_score(ctest,yhat)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

0.02

0.98

In [28]:
help(metrics.accuracy_score)

Help on function accuracy_score in module sklearn.metrics._classification:

accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None)
    Accuracy classification score.
    
    In multilabel classification, this function computes subset accuracy:
    the set of labels predicted for a sample must *exactly* match the
    corresponding set of labels in y_true.
    
    Read more in the :ref:`User Guide <accuracy_score>`.
    
    Parameters
    ----------
    y_true : 1d array-like, or label indicator array / sparse matrix
        Ground truth (correct) labels.
    
    y_pred : 1d array-like, or label indicator array / sparse matrix
        Predicted labels, as returned by a classifier.
    
    normalize : bool, optional (default=True)
        If ``False``, return the number of correctly classified samples.
        Otherwise, return the fraction of correctly classified samples.
    
    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.
 