1. Import Dependencies

In [55]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

2. Data Preparation and Preprocessing

In [70]:
cols=["flength","fwidth","fsize","fconc","fcocl","fasym","fm3long","fm3trans","falpha","fdist","class"]
df=pd.read_csv("magic04.data", names=cols)
df.head()

Unnamed: 0,flength,fwidth,fsize,fconc,fcocl,fasym,fm3long,fm3trans,falpha,fdist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [71]:
df["class"]=(df["class"]=="g").astype(int)

In [72]:
train,valid,test=np.split(df.sample(frac=1),[int(0.6*len(df)),int(0.8*len(df))])

In [73]:
def scale_dataset(dataframe,oversample=False):
  x=dataframe[dataframe.columns[:-1]].values
  y=dataframe[dataframe.columns[-1]].values
  scaler= StandardScaler()
  x=scaler.fit_transform(x)
  if oversample:
    ros=RandomOverSampler()
    x,y=ros.fit_resample(x,y)
  data=np.hstack((x,np.reshape(y,(-1,1))))

  return data,x,y

In [74]:
print(len(train[train["class"]==1]))
print(len(train[train["class"]==0]))

7356
4056


In [75]:
train,x_train,y_train=scale_dataset(train,oversample=True)
valid,x_valid,y_valid=scale_dataset(valid,oversample=False)
test,x_test,y_test=scale_dataset(test,oversample=False)

3. Neural Network Model

In [40]:
def train_model(x_train,y_train,num_nodes,dropout_prob,lr,batch_size,epochs):
  nn_model=tf.keras.Sequential([
      tf.keras.layers.Dense(num_nodes,activation="relu",input_shape=(10,)),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(num_nodes,activation="relu"),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(1,activation="sigmoid")

  ])
  nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr),loss="binary_crossentropy",
                  metrics=['accuracy'])
  history=nn_model.fit(
    x_train,y_train,epochs=epochs,batch_size=batch_size,validation_split=0.2,verbose=False
  )
  return nn_model, history

4. Graphical Representation code

In [35]:
def plot_history(history):
  fig,(ax1,ax2)=plt.subplots(1,2, figsize=(10,4))
  ax1.plot(history.history['loss'],label='loss')
  ax1.plot(history.history['val_loss'],label='val_loss')
  ax1.set_xlabel('Epoch')
  ax1.set_ylabel('Binary crossentropy')
  ax1.legend()
  ax1.grid(True)


  ax2.plot(history.history['accuracy'],label='accuracy')
  ax2.plot(history.history['val_accuracy'],label='val_accuracy')
  ax2.set_xlabel('Epoch')
  ax2.set_ylabel('accuracy')
  ax2.legend()
  ax2.grid(True)
  plt.show()

5. Different combination for neural networks

In [50]:
least_val_loss= float('inf')
least_loss_model=None
epochs=100
for num_nodes in [16,32,64]:
  for dropout_prob in [0,0.2]:
    for lr in [0.01,0.005,0.001]:
      for batch_size in [32,64,128]:
        print(f"{num_nodes} nodes ,dropout {dropout_prob} , lr {lr} , batch_size {batch_size}")
        model, history = train_model(x_train,y_train,num_nodes,dropout_prob,lr,batch_size,epochs)
        plot_history(history)
        val_loss=model.evaluate(x_valid, y_valid)
        print(type(val_loss))
        for i in val_loss:
          if i < least_val_loss :
            least_val_loss= i
            least_loss_model=model

Output hidden; open in https://colab.research.google.com to view.

In [51]:
y_pred=least_loss_model.predict(x_test)
y_pred=(y_pred> 0.5).astype(int).reshape(-1,)
y_pred



array([0, 1, 0, ..., 0, 1, 1])

In [52]:
from sklearn.metrics import classification_report

In [53]:
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.75      0.88      0.81      1142
           1       0.94      0.87      0.91      2662

    accuracy                           0.88      3804
   macro avg       0.85      0.88      0.86      3804
weighted avg       0.89      0.88      0.88      3804



6. conclusion

we analyze almost 54 different Neural network model combination with different parameters and hyperparameters, and with least validation loss model we check the accuracy for dataset by classification Report method.