## Developing a Multi-Output Neural Network 

In [3]:
import pandas as pd
import os
desktop_path = os.path.expanduser("~/Desktop")
file_path = os.path.join(desktop_path, "random_subset.xlsx")
df = pd.read_excel(file_path)
print(df.head())

   UMID                                     cleaned_review sentiment  \
0  AE_1  ms files left breathless got educated got mad ...  Positive   
1  AE_2                                          fast card  Negative   
2  AE_3  heavy dutyeasy installand important easy attac...  Positive   
3  AE_4  summer breeze second book series collection ba...  Positive   
4  AE_5               outstanding value great product look  Positive   

  predicted_category  
0               Care  
1            General  
2              Leads  
3            General  
4              Leads  


In [4]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model

2023-09-05 09:48:10.310698: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-05 09:48:13.397986: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-05 09:48:13.410037: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
reviews = df['cleaned_review'].tolist()
sentiments = df['sentiment'].tolist()
categories = df['predicted_category'].tolist()

In [6]:
sentiment_mapping = {'Positive': 0, 'Negative': 1, 'Neutral': 2}
category_mapping = {'Care': 0, 'General': 1, 'Leads': 2}

In [7]:
sentiment_labels = [sentiment_mapping[label] for label in sentiments]
category_labels = [category_mapping[label] for label in categories]

In [8]:
max_words = 10000  # Choose an appropriate vocabulary size
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

In [9]:
train_reviews, val_reviews, train_sentiments, val_sentiments, train_categories, val_categories = train_test_split(
    padded_sequences, sentiment_labels, category_labels, test_size=0.2, random_state=42
)

In [10]:
# Define the model
input_layer = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(input_dim=max_words, output_dim=128)(input_layer)
lstm_layer = LSTM(64)(embedding_layer)

2023-09-05 09:48:31.557114: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 09:48:31.560545: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 09:48:31.563079: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [11]:
# Separate output layers for sentiment and category
num_sentiments = len(sentiment_mapping)
num_categories = len(category_mapping)

In [12]:
sentiment_output = Dense(num_sentiments, activation='softmax', name='sentiment')(lstm_layer)
category_output = Dense(num_categories, activation='softmax', name='category')(lstm_layer)

In [13]:
model = Model(inputs=input_layer, outputs=[sentiment_output, category_output])

In [14]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [15]:
train_sentiments = np.array(train_sentiments)
train_categories = np.array(train_categories)
val_sentiments = np.array(val_sentiments)
val_categories = np.array(val_categories)

In [21]:
# Train the model
model.fit(train_reviews, [train_sentiments, train_categories], epochs=10, batch_size=32,
          validation_data=(val_reviews, [val_sentiments, val_categories]))

Epoch 1/10


2023-08-22 22:30:55.132586: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-22 22:30:55.135348: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-22 22:30:55.137486: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-08-22 22:30:59.658412: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-22 22:30:59.660909: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-22 22:30:59.663002: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f61c0947700>

In [22]:
# Save the model
model.save('multi_output_model.h5')
print("Model saved successfully")

Model saved successfully


## Validation

In [37]:
excel_path = '/home/xjoex/Desktop/review_text.xlsx'
df_loaded = pd.read_excel(excel_path)
print(df_loaded.head())

                                          reviewText  \
0  This was the first time I read Garcia-Aguilera...   
1  As with all of Ms. Garcia-Aguilera's books, I ...   
2  I've not read any of Ms Aguilera's works befor...   
3  This romance novel is right up there with the ...   
4  Carolina Garcia Aguilera has done it again.  S...   

                                      cleaned_review  
0  first time read garciaaguilera came upon name ...  
1  ms garciaaguileras books think must read impos...  
2  ive read ms aguileras works finished one hot s...  
3  romance novel right rest amazing mystery novel...  
4  carolina garcia aguilera done shes written ano...  


In [38]:
# Preprocess text data from df_loaded
reviews_subset = df_loaded['cleaned_review'].tolist()[:200] 
sequences_subset = tokenizer.texts_to_sequences(reviews_subset)
padded_sequences_subset = pad_sequences(sequences_subset, maxlen=max_sequence_length, padding='post')

In [39]:
sentiment_probs, category_probs = model.predict(padded_sequences_subset)



In [40]:
# Convert probabilities to labels
predicted_sentiments = np.argmax(sentiment_probs, axis=1)
predicted_categories = np.argmax(category_probs, axis=1)

In [41]:
reverse_sentiment_mapping = {v: k for k, v in sentiment_mapping.items()}
reverse_category_mapping = {v: k for k, v in category_mapping.items()}

In [42]:
predicted_sentiment_labels = [reverse_sentiment_mapping[sentiment] for sentiment in predicted_sentiments]
predicted_category_labels = [reverse_category_mapping[category] for category in predicted_categories]

In [43]:
predictions_df = pd.DataFrame({
    'Review': reviews_subset,
    'Predicted_Sentiment': predicted_sentiment_labels,
    'Predicted_Category': predicted_category_labels
})

In [44]:
predictions_df.head(10)

Unnamed: 0,Review,Predicted_Sentiment,Predicted_Category
0,first time read garciaaguilera came upon name ...,Negative,Leads
1,ms garciaaguileras books think must read impos...,Negative,Leads
2,ive read ms aguileras works finished one hot s...,Negative,Leads
3,romance novel right rest amazing mystery novel...,Negative,Leads
4,carolina garcia aguilera done shes written ano...,Negative,Leads
5,write mysteriesbut sure write love story one h...,Negative,Leads
6,garciaaguilera written book cant put read love...,Negative,Leads
7,timing right good book actually long overdue t...,Negative,Leads
8,engaging dark reading book could almost feel t...,Negative,Leads
9,set amid backdrop new york citys impervious hi...,Negative,Leads


In [None]:
desktop_path = f"/home/xjoex/Desktop"  
predictions_excel_filename = "predictions.xlsx"
predictions_excel_path = f"{desktop_path}/{predictions_excel_filename}"

predictions_df.to_excel(predictions_excel_path, index=False)

print(f"Predictions saved to: {predictions_excel_path}")

## Developing a Semi-Supervised Neural Network

In [45]:
import pandas as pd
file_path = '/home/xjoex/Desktop/review_text.xlsx'
df_review_text = pd.read_excel(file_path)
print(df_review_text.head())

                                          reviewText  \
0  This was the first time I read Garcia-Aguilera...   
1  As with all of Ms. Garcia-Aguilera's books, I ...   
2  I've not read any of Ms Aguilera's works befor...   
3  This romance novel is right up there with the ...   
4  Carolina Garcia Aguilera has done it again.  S...   

                                      cleaned_review  
0  first time read garciaaguilera came upon name ...  
1  ms garciaaguileras books think must read impos...  
2  ive read ms aguileras works finished one hot s...  
3  romance novel right rest amazing mystery novel...  
4  carolina garcia aguilera done shes written ano...  


In [46]:
unlabeled_reviews = df_review_text['cleaned_review'].astype(str).tolist()
unlabeled_sequences = tokenizer.texts_to_sequences(unlabeled_reviews)
padded_unlabeled_sequences = pad_sequences(unlabeled_sequences, maxlen=max_sequence_length, padding='post')

In [47]:
# Pseudo-label the unlabeled data
unlabeled_sentiments_probs, unlabeled_categories_probs = model.predict(padded_unlabeled_sequences)
pseudo_sentiments = np.argmax(unlabeled_sentiments_probs, axis=1)
pseudo_categories = np.argmax(unlabeled_categories_probs, axis=1)

2023-09-05 10:01:41.854309: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1076114512 exceeds 10% of free system memory.


 3966/32713 [==>...........................] - ETA: 17:42

KeyboardInterrupt: 

In [48]:
pseudo_labeled_data = pd.DataFrame({
    'cleaned_review': unlabeled_reviews,
    'pseudo_sentiment': pseudo_sentiments,
    'pseudo_category': pseudo_categories
})

NameError: name 'pseudo_sentiments' is not defined

In [None]:
desktop_path = os.path.expanduser("~/Desktop")

In [27]:
excel_path = os.path.join(desktop_path, "pseudo_labeled_data_full.xlsx")
pseudo_labeled_data.to_excel(excel_path, index=False)
print("Pseudo-labeled data saved to Excel on the desktop successfully.")

NameError: name 'pseudo_labeled_data' is not defined

In [None]:
# Combine labeled and pseudo-labeled data
combined_reviews = np.concatenate((train_reviews, padded_unlabeled_sequences[:6000]))
combined_sentiments = np.concatenate((train_sentiments, pseudo_sentiments[:6000]))
combined_categories = np.concatenate((train_categories, pseudo_categories[:6000]))

In [39]:
# Train the model with combined data
model.fit(combined_reviews, [combined_sentiments, combined_categories], epochs=10, batch_size=32,
          validation_data=(val_reviews, [val_sentiments, val_categories]))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f61c0909f60>

In [40]:
model.save('trained_multi_output_modelIT4.h5')
print("Trained model saved successfully")

Trained model saved successfully


#### Evaluating the Model 

In [49]:
from keras.models import load_model

In [50]:
loaded_model = load_model('trained_multi_output_modelIT4.h5')

2023-09-05 10:05:18.968938: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 10:05:18.973401: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 10:05:18.976680: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [51]:
random_subset = df_review_text.sample(n=200, random_state=42)
random_subset_reviews = random_subset['cleaned_review'].tolist()
random_subset_sequences = tokenizer.texts_to_sequences(random_subset_reviews)
padded_random_subset_sequences = pad_sequences(random_subset_sequences, maxlen=max_sequence_length, padding='post')

In [52]:
predicted_sentiments_probs, predicted_categories_probs = loaded_model.predict(padded_random_subset_sequences)
predicted_sentiments = [reverse_sentiment_mapping[np.argmax(probs)] for probs in predicted_sentiments_probs]
predicted_categories = [reverse_category_mapping[np.argmax(probs)] for probs in predicted_categories_probs]

2023-09-05 10:05:27.894038: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 10:05:27.896731: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 10:05:27.898508: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



In [53]:
random_subset['predicted_sentiment'] = predicted_sentiments
random_subset['predicted_category'] = predicted_categories

In [54]:
print(random_subset)

                                                reviewText  \
906699   Perfect case for keeping all my flash drives i...   
998468   I bought a used camera that couldn't use the n...   
466133                                       it's perfect!   
923787   Added this to my home workstation.  Very comfo...   
625731            Solid but tough to install for some subs   
...                                                    ...   
1040435  Very tight and precise mouse trackball. I also...   
151362   Got some great portraits last week - that's wh...   
815821                                          Love them!   
654771                                        Good Product   
284373                                                Anne   

                                            cleaned_review  \
906699   perfect case keeping flash drives one place ev...   
998468   bought used camera couldnt use newer version m...   
466133                                             perfect   
923787 

In [60]:
random_subset.head(10)

Unnamed: 0,reviewText,cleaned_review,predicted_sentiment,predicted_category
906699,Perfect case for keeping all my flash drives i...,perfect case keeping flash drives one place ev...,Positive,General
998468,I bought a used camera that couldn't use the n...,bought used camera couldnt use newer version m...,Positive,General
466133,it's perfect!,perfect,Positive,General
923787,Added this to my home workstation. Very comfo...,added home workstation comfortable natural wri...,Positive,General
625731,Solid but tough to install for some subs,solid tough install subs,Positive,General
245222,8910 Universal Remote Really Is Universal,universal remote really universal,Positive,General
559339,Having no problems here...,problems,Positive,General
994926,This is exactly what I expected.,exactly expected,Positive,General
537310,... price this this product works phenomenal v...,price product works phenomenal reliable distan...,Positive,General
596339,Awesome Mic. Always buy these guys never have ...,awesome mic always buy guys never issue havent...,Positive,General


In [61]:
true_sentiments = random_subset['predicted_sentiment']
true_categories = random_subset['predicted_category']

In [62]:
sentiment_accuracy = (predicted_sentiments == true_sentiments).mean()
category_accuracy = (predicted_categories == true_categories).mean()

### Hyperparameter Tuning 

In [66]:
from itertools import product
import numpy as np
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model

In [67]:
# Define hyperparameter values for tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [16, 32, 64]
num_lstm_units = [64, 128, 256]
embedding_dims = [50, 100, 200]
dropout_rates = [0.2, 0.3, 0.4]

In [68]:
best_accuracy = 0.0
best_hyperparameters = {}
best_model = None

In [69]:
for lr, batch_size, lstm_units, embedding_dim, dropout_rate in product(learning_rates, batch_sizes, num_lstm_units, embedding_dims, dropout_rates):
    input_layer = Input(shape=(max_sequence_length,))
    embedding_layer = Embedding(input_dim=max_words, output_dim=embedding_dim)(input_layer)
    lstm_layer = LSTM(lstm_units)(embedding_layer)
    
    sentiment_output = Dense(num_sentiments, activation='softmax', name='sentiment')(lstm_layer)
    category_output = Dense(num_categories, activation='softmax', name='category')(lstm_layer)
    
    model = Model(inputs=input_layer, outputs=[sentiment_output, category_output])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

2023-09-05 11:34:43.965598: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 11:34:43.977089: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 11:34:43.978753: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [70]:
model.fit(train_reviews, [train_sentiments, train_categories], epochs=10, batch_size=batch_size, verbose=0)

2023-09-05 11:37:19.965475: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 11:37:19.968305: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 11:37:19.970181: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

<keras.callbacks.History at 0x7f7d47d003a0>

In [71]:
val_accuracy = np.mean(model.evaluate(val_reviews, [val_sentiments, val_categories], verbose=0)[1])

2023-09-05 11:38:10.065362: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 11:38:10.067864: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 11:38:10.069634: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [73]:
# Check if this model's accuracy is better than the current best
if val_accuracy > best_accuracy:
    best_accuracy = val_accuracy
    best_hyperparameters = {
        'learning_rate': lr,
        'batch_size': batch_size,
        'num_lstm_units': lstm_units,
        'embedding_dim': embedding_dim,
        'dropout_rate': dropout_rate
    }
    best_model = model


In [74]:
# Print the best hyperparameters and accuracy
print("Best Hyperparameters:")
print(best_hyperparameters)
print("Best Validation Accuracy:", best_accuracy)

Best Hyperparameters:
{'learning_rate': 0.1, 'batch_size': 64, 'num_lstm_units': 256, 'embedding_dim': 200, 'dropout_rate': 0.4}
Best Validation Accuracy: 0.5031511783599854


In [75]:
best_model.save('tuned_model.h5')

In [76]:
#tuned_model = load_model('tuned_model.h5')

In [63]:
from sklearn.model_selection import StratifiedKFold  
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)  

#### GridSearchCV

In [64]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV 

### Validation from Tuned Model

In [84]:
tuned_model = load_model('tuned_model.h5')

2023-09-05 21:00:43.620783: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 21:00:43.623053: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 21:00:43.624707: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [85]:
random_subset.head(10)

Unnamed: 0,reviewText,cleaned_review,predicted_sentiment,predicted_category
906699,Perfect case for keeping all my flash drives i...,perfect case keeping flash drives one place ev...,Positive,General
998468,I bought a used camera that couldn't use the n...,bought used camera couldnt use newer version m...,Positive,General
466133,it's perfect!,perfect,Positive,General
923787,Added this to my home workstation. Very comfo...,added home workstation comfortable natural wri...,Positive,General
625731,Solid but tough to install for some subs,solid tough install subs,Positive,General
245222,8910 Universal Remote Really Is Universal,universal remote really universal,Positive,General
559339,Having no problems here...,problems,Positive,General
994926,This is exactly what I expected.,exactly expected,Positive,General
537310,... price this this product works phenomenal v...,price product works phenomenal reliable distan...,Positive,General
596339,Awesome Mic. Always buy these guys never have ...,awesome mic always buy guys never issue havent...,Positive,General


In [82]:
subset_sequences = tokenizer.texts_to_sequences(subset_reviews)
padded_subset_sequences = pad_sequences(subset_sequences, maxlen=max_sequence_length, padding='post')

In [87]:
sentiment_probs, category_probs = tuned_model.predict(padded_subset_sequences)

2023-09-05 21:01:04.506890: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-09-05 21:01:04.509494: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-09-05 21:01:04.511129: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



In [88]:
predicted_sentiments = [reverse_sentiment_mapping[np.argmax(probs)] for probs in sentiment_probs]
predicted_categories = [reverse_category_mapping[np.argmax(probs)] for probs in category_probs]

In [89]:
predictions_df = pd.DataFrame({
    'Review': subset_reviews,
    'Predicted_Sentiment': predicted_sentiments,
    'Predicted_Category': predicted_categories
})

In [103]:
num_rows = random_subset.shape[0]
print("Number of Rows in random_subset:", num_rows)

Number of Rows in random_subset: 200


In [104]:
random_subset.to_excel('predictions773.xlsx', index=False)

### Evaluating the model 

In [1]:
excel_file_path = '/home/xjoex/Downloads/predictions100.xlsx'

In [4]:
data = pd.read_excel(excel_file_path)

In [5]:
data.head(10)

Unnamed: 0,reviewText,cleaned_review,predicted_sentiment,predicted_category,Ground Truth_category,Ground Truth_Sentiment
0,Perfect case for keeping all my flash drives i...,perfect case keeping flash drives one place ev...,Positive,General,General,Positive
1,I bought a used camera that couldn't use the n...,bought used camera couldnt use newer version m...,Positive,General,General,Positive
2,it's perfect!,perfect,Positive,General,General,Positive
3,Added this to my home workstation. Very comfo...,added home workstation comfortable natural wri...,Positive,General,General,Positive
4,Solid but tough to install for some subs,solid tough install subs,Positive,General,General,Positive
5,8910 Universal Remote Really Is Universal,universal remote really universal,Positive,General,General,Positive
6,Having no problems here...,problems,Positive,General,General,Negative
7,This is exactly what I expected.,exactly expected,Positive,General,General,Positive
8,... price this this product works phenomenal v...,price product works phenomenal reliable distan...,Positive,General,General,Positive
9,Awesome Mic. Always buy these guys never have ...,awesome mic always buy guys never issue havent...,Positive,General,General,Positive


In [7]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [8]:
sentiment_accuracy = accuracy_score(data['Ground Truth_Sentiment'], data['predicted_sentiment'])

In [9]:
sentiment_precision = precision_score(data['Ground Truth_Sentiment'], data['predicted_sentiment'], average='weighted')

In [10]:
sentiment_recall = recall_score(data['Ground Truth_Sentiment'], data['predicted_sentiment'], average='weighted')

In [11]:
sentiment_f1_score = f1_score(data['Ground Truth_Sentiment'], data['predicted_sentiment'], average='weighted')

In [12]:
category_accuracy = accuracy_score(data['Ground Truth_category'], data['predicted_category'])

In [13]:
category_precision = precision_score(data['Ground Truth_category'], data['predicted_category'], average='weighted')

In [14]:
category_recall = recall_score(data['Ground Truth_category'], data['predicted_category'], average='weighted')

In [15]:
category_f1_score = f1_score(data['Ground Truth_category'], data['predicted_category'], average='weighted')

In [16]:
# Calculate the loss
incorrect_sentiment_predictions = (data['Ground Truth_Sentiment'] != data['predicted_sentiment']).sum()
incorrect_category_predictions = (data['Ground Truth_category'] != data['predicted_category']).sum()
total_predictions = len(data)

In [17]:
sentiment_loss = incorrect_sentiment_predictions / total_predictions
category_loss = incorrect_category_predictions / total_predictions

In [18]:
# Print the calculated metrics
print("Sentiment Accuracy:", sentiment_accuracy)
print("Category Accuracy:", category_accuracy)
print("Sentiment Precision:", sentiment_precision)
print("Sentiment Recall:", sentiment_recall)
print("Sentiment F1 Score:", sentiment_f1_score)
print("Category Precision:", category_precision)
print("Category Recall:", category_recall)
print("Category F1 Score:", category_f1_score)
print("Sentiment Loss:", sentiment_loss)
print("Category Loss:", category_loss)

Sentiment Accuracy: 0.898989898989899
Category Accuracy: 0.8888888888888888
Sentiment Precision: 0.898989898989899
Sentiment Recall: 0.898989898989899
Sentiment F1 Score: 0.898989898989899
Category Precision: 0.8923057737872553
Category Recall: 0.8888888888888888
Category F1 Score: 0.8904087696467419
Sentiment Loss: 0.10101010101010101
Category Loss: 0.1111111111111111


In [20]:
sentiment_accuracy_percent = sentiment_accuracy * 100
category_accuracy_percent = category_accuracy * 100
sentiment_precision_percent = sentiment_precision * 100
sentiment_recall_percent = sentiment_recall * 100
sentiment_f1_score_percent = sentiment_f1_score * 100
category_precision_percent = category_precision * 100
category_recall_percent = category_recall * 100
category_f1_score_percent = category_f1_score * 100
sentiment_loss_percent = (incorrect_sentiment_predictions / total_predictions) * 100
category_loss_percent = (incorrect_category_predictions / total_predictions) * 100

print("Sentiment Accuracy (%):", sentiment_accuracy_percent)
print("Category Accuracy (%):", category_accuracy_percent)
print("Sentiment Precision (%):", sentiment_precision_percent)
print("Sentiment Recall (%):", sentiment_recall_percent)
print("Sentiment F1 Score (%):", sentiment_f1_score_percent)
print("Category Precision (%):", category_precision_percent)
print("Category Recall (%):", category_recall_percent)
print("Category F1 Score (%):", category_f1_score_percent)
print("Sentiment Loss (%):", sentiment_loss_percent)
print("Category Loss (%):", category_loss_percent)

Sentiment Accuracy (%): 89.8989898989899
Category Accuracy (%): 88.88888888888889
Sentiment Precision (%): 89.8989898989899
Sentiment Recall (%): 89.8989898989899
Sentiment F1 Score (%): 89.8989898989899
Category Precision (%): 89.23057737872553
Category Recall (%): 88.88888888888889
Category F1 Score (%): 89.04087696467418
Sentiment Loss (%): 10.1010101010101
Category Loss (%): 11.11111111111111
