## Neural Networks - Classification Model

In [1]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier

In [2]:
df=pd.read_csv("RedWine.csv",sep=";")

In [3]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [4]:
x=df.iloc[:,0:11]
x.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


In [5]:
y=df.iloc[:,11]
y.head()

0    5
1    5
2    5
3    6
4    5
Name: quality, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,random_state=40,test_size=0.2)

In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(xtrain)
xtrain = scaler.transform(xtrain)
scaler.fit(xtest)
xtest = scaler.transform(xtest)

In [8]:
nn=MLPClassifier()
nn.fit(xtrain,ytrain)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [9]:
pred=nn.predict(xtest)

In [10]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [11]:
acc=accuracy_score(ytest,pred)
acc

0.628125

In [12]:
print(classification_report(ytest,pred))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         8
           5       0.68      0.74      0.71       136
           6       0.58      0.64      0.61       125
           7       0.59      0.48      0.53        42
           8       0.00      0.00      0.00         6

   micro avg       0.63      0.63      0.63       320
   macro avg       0.31      0.31      0.31       320
weighted avg       0.60      0.63      0.61       320



  'precision', 'predicted', average, warn_for)


In [13]:
print(confusion_matrix(ytest,pred))

[[  0   1   2   0   0   0]
 [  0   0   7   1   0   0]
 [  0   0 101  35   0   0]
 [  0   0  35  80  10   0]
 [  0   0   3  19  20   0]
 [  0   0   0   2   4   0]]


In [14]:
print("List of weight matrices",len(nn.coefs_)) 
print("Length of first row of weight matrices",len(nn.coefs_[0]))
print("List of bias vectors",len(nn.intercepts_[0]))

List of weight matrices 2
Length of first row of weight matrices 11
List of bias vectors 100


## GridSearchCV - Tuning the model

In [15]:
nn.get_params()

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [16]:
from sklearn.model_selection import GridSearchCV

In [26]:
grid_params={'hidden_layer_sizes': ((100,),(150,),(200,)),
             'alpha': [10,1,0.1,0.001,0.0001],
            'max_iter':[100,200,300]
            }

In [27]:
g_nn=GridSearchCV(nn,param_grid=grid_params,cv=3)

In [28]:
g_nn.fit(xtrain,ytrain)









GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'hidden_layer_sizes': ((100,), (150,), (200,)), 'alpha': [10, 1, 0.1, 0.001, 0.0001], 'max_iter': [100, 200, 300]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [29]:
pred=g_nn.predict(xtest)
acc=accuracy_score(ytest,pred)
acc

0.621875

In [30]:
g_nn.best_params_

{'alpha': 0.1, 'hidden_layer_sizes': (150,), 'max_iter': 100}

In [37]:
## MLP Regressor

In [38]:
x=df.iloc[:,0:10]
y=df.iloc[:,10]
print(x.head())
print(y.head())

   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  
0                 11.0                  34.0   0.9978  3.51       0.56  
1                 25.0                  67.0   0.9968  3.20       0.68  
2                 15.0                  54.0   0.9970  3.26       0.65  
3                 17.0                  60.0   0.9980  3.16       0.58  
4                 11.0                  34.0   0.9978  3.51       0.56  
0    9.4
1    9.8
2    9.8
3    9.8
4    9.4
Name: alcohol, dtype: float64


In [39]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,random_state=40,test_size=0.2)

In [40]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(xtrain)
xtrain = scaler.transform(xtrain)
scaler.fit(xtest)
xtest = scaler.transform(xtest)

In [41]:
from sklearn.neural_network import MLPRegressor
nnr=MLPRegressor()
nnr.fit(xtrain,ytrain)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [42]:
nnr.predict(xtest)

array([10.41337041, 10.35418151, 10.21735727, 11.16022612,  8.83375936,
        9.72976473, 10.33305912, 12.17530644,  9.75192086, 10.30925453,
       11.2093864 ,  9.87400172, 11.81243294, 11.61976686,  9.54368775,
       11.0828277 , 10.66977513,  9.69306287,  9.56685794,  9.83231541,
        9.75135248,  9.64363337, 10.25427092,  9.97287318, 11.57385555,
        9.36010905, 11.15159529,  9.39961583, 11.37921968,  9.69105623,
       10.16375625, 10.02282563,  9.32232575,  9.97460789,  9.62414315,
       10.63048898, 13.95796483, 10.16982061,  9.70314587,  9.65191923,
       10.00845622, 10.03097931, 10.11268466, 12.16328442, 10.37928734,
        9.86358787,  9.78051662, 11.16362133, 12.51665922,  9.48578928,
       12.17530644,  9.41115114,  8.50359744, 12.16046242, 10.30464253,
        9.35091779, 10.4364868 ,  9.78458732, 10.83609252, 10.07442242,
       10.99430313,  9.15853262,  9.95650913, 10.16914084, 10.45058142,
       10.00910331, 10.20788443, 10.43441541, 11.25855279, 11.67

In [44]:
from sklearn.metrics import mean_squared_error,r2_score
nnr_score = nnr.score(xtrain,ytrain)
print("Model R2:",nnr_score*100)
ypred = nnr.predict(xtest)
mse=mean_squared_error(ytest, ypred)
print('MSE',mse)
r2=r2_score(ytest, ypred)
print('Test RSquare',r2*100)

Model R2: 61.1164563619591
MSE 0.5200106833714262
Test RSquare 55.715872296262745
