In [0]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import classification_report, confusion_matrix

import pickle
import pandas as pd
import numpy as np

In [2]:
from keras import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [0]:
def Create_NN_Model(No_Features=300, No_Hidden_Layers=2, No_Hidden_Neurons=150, 
                    Hidden_Activation ="relu", No_OP_Neurons=1, 
                    Output_Activation="sigmoid", Kernel_Initializer="random_normal",
                    Optimizer="adam", Loss='binary_crossentropy', Metrics =['accuracy']):
  
  classifier = Sequential()

  ## Input Layer
  classifier.add(Dense(No_Hidden_Neurons, activation=Hidden_Activation, 
                       kernel_initializer=Kernel_Initializer, input_dim=No_Features))
  
  ## Hidden layers
  for i in range(No_Hidden_Layers):
    classifier.add(Dense(No_Hidden_Neurons, activation=Hidden_Activation, 
                         kernel_initializer=Kernel_Initializer))
    
  ## Output Layer
  classifier.add(Dense(No_OP_Neurons, activation=Output_Activation, 
                       kernel_initializer=Kernel_Initializer))
  
  classifier.compile(optimizer =Optimizer, loss=Loss, metrics = Metrics)

  return classifier
  

In [0]:
def Train_NN(NN_classifier, train_data, feature_list=[], Batch_Size=50, Epochs=100):

  train_data.dropna()
  train_data = pd.DataFrame(np.nan_to_num(np.array(train_data)), columns = train_data.columns)
  train_data['Label'] = pd.to_numeric(train_data['Label'], errors='coerce')
  train_data = train_data.dropna(subset=['Label'])
  
  train_features = train_data[feature_list]    
  train_labels = train_data["Label"]
  train_labels = train_labels.astype('int')

  NN_classifier.fit(train_features,train_labels, batch_size=Batch_Size, epochs=Epochs)

  eval_model=NN_classifier.evaluate(train_features, train_labels)
  print("Loss: ", eval_model[0])
  print("Accuracy of the model: ", eval_model[1])

  return NN_classifier


In [0]:
## Store trained model in a file to reuse in other codes without training again on same data

def Store_Trained_NN(NN_obj, Filepath):
  
  with open(Filepath, "wb") as file:
    pickle.dump(NN_obj, file)

In [0]:
## Load stored trained model and returns random forest model object

def Load_Trained_NN(Filepath):
  
  with open(Filepath, "rb") as file:
    NN_obj = pickle.load(file)

  return NN_obj

In [0]:
def Evaluate_NN(test_data, NN_Model_FilePath, feature_list=[], threshold=0.5):
  
  test_data.dropna()
  test_data = pd.DataFrame(np.nan_to_num(np.array(test_data)),  columns = test_data.columns)
  test_data['Label'] = pd.to_numeric(test_data['Label'], errors='coerce')
  test_data = test_data.dropna(subset=['Label'])

  test_features = test_data[feature_list]
  test_labels = test_data["Label"]
  test_labels = test_labels.astype('int')

  NN_obj = Load_Trained_NN(NN_Model_FilePath) 
  predictions = NN_obj.predict(test_features)
  predictions_list = [int(p[0]) for p in predictions]
  
  for i in range(len(predictions_list)):
    if predictions_list[i] >= threshold:
      predictions_list[i] = 1
    else:
      predictions_list[i] = 0
  
  errors = abs(predictions_list - test_labels)

  # Calculate mean absolute error (MAE)
  MAE = round(np.mean(errors), 2)
  
  ## Confusion Matrix and Classification Report
  Confusion_Matrix = confusion_matrix(test_labels,predictions_list)
  Report = classification_report(test_labels,predictions_list)
  
  return MAE, Confusion_Matrix, Report
  

In [8]:
## WORD2VEC EMBEDDINGS

Column_List = [ "Caption"]
Vector_Size = 300
Embedding_Cols = [str(i) for i in range(Vector_Size)]
Column_List.extend(Embedding_Cols)
Column_List.append("Label")

Train_Embedding_FilePath = "/content/TrainData_Word2Vec_Embeddings.csv"
Test_Embedding_FilePath = "/content/TestData_Word2Vec_Embeddings.csv"
NN_Model_FilePath = "/content/NN_Word2Vec_Train_Model.pkl"

train_data = pd.read_csv(Train_Embedding_FilePath, usecols=Column_List)
test_data = pd.read_csv(Test_Embedding_FilePath, usecols=Column_List)

## Training Phase
NN_Classifier = Create_NN_Model()
NN_obj = Train_NN(NN_Classifier, train_data, Embedding_Cols)
Store_Trained_NN(NN_obj, NN_Model_FilePath)






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/100





Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch

In [9]:
## Testing Phase
MAE, Confusion_Matrix, Report = Evaluate_NN(test_data, NN_Model_FilePath, Embedding_Cols, 0.5)

print("============ FOR WORD2VEC EMBEDDINGS ============")

print("MEAN ABSOLUTE ERROR: ", MAE)

print("\n")
print("============ CONFUSION MATRIX ===============")
print(Confusion_Matrix)

print("\n")
print("============ CLASSIFICATION REPORT ==============")
print(Report)

tn, fp, fn, tp = Confusion_Matrix.ravel()
Accuracy = (tn+tp)/(tn + fp + fn + tp)

print("Accuracy: ", Accuracy*100)

MEAN ABSOLUTE ERROR:  0.26


[[728   5]
 [371 352]]


              precision    recall  f1-score   support

           0       0.66      0.99      0.79       733
           1       0.99      0.49      0.65       723

    accuracy                           0.74      1456
   macro avg       0.82      0.74      0.72      1456
weighted avg       0.82      0.74      0.72      1456

Accuracy:  74.17582417582418


In [11]:
## GLOVE EMBEDDINGS

Column_List = [ "Caption_Tokens"]
Vector_Size = 300
Embedding_Cols = [str(i) for i in range(Vector_Size)]
Column_List.extend(Embedding_Cols)
Column_List.append("Label")

Train_Embedding_FilePath = "/content/TrainData_Glove_Embeddings.csv"
Test_Embedding_FilePath = "/content/TestData_Glove_Embeddings.csv"
NN_Model_FilePath = "/content/NN_Glove_Train_Model.pkl"

train_data = pd.read_csv(Train_Embedding_FilePath, usecols=Column_List)
test_data = pd.read_csv(Test_Embedding_FilePath, usecols=Column_List)

## Training Phase
NN_Classifier = Create_NN_Model()
NN_obj = Train_NN(NN_Classifier, train_data, Embedding_Cols)
Store_Trained_NN(NN_obj, NN_Model_FilePath)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [12]:
## Testing Phase
MAE, Confusion_Matrix, Report = Evaluate_NN(test_data, NN_Model_FilePath, Embedding_Cols, 0.5)

print("============ FOR GLOVE EMBEDDINGS ============")

print("MEAN ABSOLUTE ERROR: ", MAE)

print("\n")
print("============ CONFUSION MATRIX ===============")
print(Confusion_Matrix)

print("\n")
print("============ CLASSIFICATION REPORT ==============")
print(Report)

tn, fp, fn, tp = Confusion_Matrix.ravel()
Accuracy = (tn+tp)/(tn + fp + fn + tp)

print("Accuracy: ", Accuracy*100)

MEAN ABSOLUTE ERROR:  0.4


[[713   4]
 [578 161]]


              precision    recall  f1-score   support

           0       0.55      0.99      0.71       717
           1       0.98      0.22      0.36       739

    accuracy                           0.60      1456
   macro avg       0.76      0.61      0.53      1456
weighted avg       0.77      0.60      0.53      1456

Accuracy:  60.027472527472526
