# Artificial Intelligence | Deep Learning Model | Text santiment Analysis 
### Author: Valentin Kisimov

### Importing all the libraries needed

In [None]:
import pandas as pd
import string
from keras.models import Sequential
from keras import layers
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import gensim
import matplotlib.pyplot as plt

from google.colab import drive


In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


## The data have to be in the root folder of google drive to load correctly.

In [None]:
df_train = pd.read_csv('/content/drive/My Drive/train.csv', names=['sentence', 'label'], sep=',')
df_test = pd.read_csv('/content/drive/My Drive/test.csv', names=['sentence', 'label'], sep=',')
df_val = pd.read_csv('/content/drive/My Drive/val.csv', names=['sentence', 'label'], sep=',')

## Simple Pre-Process

In [None]:
df_train['sentence'] = df_train['sentence'].str.lower()
df_train_pro = df_train['sentence'].str.replace('[^\w\s]','')

df_test['sentence'] = df_test['sentence'].str.lower()
df_test_pro = df_test['sentence'].str.replace('[^\w\s]','')

df_val['sentence'] = df_val['sentence'].str.lower()
df_val_pro = df_val['sentence'].str.replace('[^\w\s]','')

## Tokenizing, preparing the data for training and embedding numbers to each

In [None]:
from keras import regularizers


max_words = 719
tokenizer = Tokenizer(num_words=1000)

tokenized_train = tokenizer.fit_on_texts(df_train_pro)

vocab_size = len(tokenizer.word_index) + 1  # Adding 1 because of reserved 0 index

sequences_train = tokenizer.texts_to_sequences(df_train_pro)
sequences_test = tokenizer.texts_to_sequences(df_test_pro)
sequences_val = tokenizer.texts_to_sequences(df_val_pro)


X_train = pad_sequences(sequences_train)
X_test = pad_sequences(sequences_test)
X_val = pad_sequences(sequences_val)


y_train = df_train['label'].values
y_test = df_test['label'].values
y_val = df_val['label'].values


#print(X_train)
print(X_test[1])
print(vocab_size)

[  0   0   0 ...  21 727  15]
40420


In [None]:
from keras.layers import Embedding
from keras.models import Sequential
from keras import layers
from keras import regularizers
from keras import backend as K
from keras.callbacks import ModelCheckpoint

## Two functions used to display the results after training

In [None]:
import matplotlib.pyplot as plt

def plot_history(training):
# Plot history: MAE
  plt.plot(training.history['val_loss'], label='Validation loss)')
  plt.plot(training.history['accuracy'], label='Accuracy')
  plt.title('Sentiment analysis')
  plt.ylabel('value')
  plt.xlabel('epoch')
  plt.legend(loc="lower right")
  plt.show()

def display_learning_curves(history):
    fig, (ax1) = plt.subplots( figsize=(5, 5))

    ax1.plot(history.history["loss"])
    ax1.plot(history.history["val_loss"])
    ax1.legend(["train", "test"], loc="upper right")
    ax1.set_xlabel("Epochs")
    ax1.set_ylabel("Loss")
    plt.show()

## Models

### Model 1

In [None]:

def model_1():
    embedding_dim = 64

    model = Sequential()
    model.add(layers.Embedding(vocab_size, embedding_dim)) #The embedding layer
    model.add(layers.Bidirectional(layers.LSTM(12, dropout=0.6)))
    model.add(layers.Dense(1,activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

model1 = model_1()
training_1 = model1.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))



In [None]:
results1 = model1.evaluate(X_val, y_val)
print("test loss, test acc:", results1)

test loss, test acc: [0.5685977339744568, 0.7350000143051147]


In [None]:
display_learning_curves(training_1)
plot_history(training_1)

In [None]:
plot_history(training_1)

### Model 2

In [None]:
from keras.layers import Dense

def model_2():
    embedding_dim = 64

    model = Sequential()
    model.add(layers.Embedding(vocab_size, embedding_dim)) #The embedding layer
    layers.Dropout(0.2)
    model.add(layers.Bidirectional(layers.LSTM(24, dropout=0.6)))
    model.add(layers.Dense(1,activation='sigmoid'))
   
    print(model.summary())

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model



model2 = model_2()
training_2 = model2.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))



In [None]:
results2 = model2.evaluate(X_val, y_val)
print("test loss, test acc:", results2)

In [None]:
display_learning_curves(training_2)


In [None]:
plot_history(training_2)

### Model 3

In [None]:
def model_3():
    embedding_dim = 32

    model = Sequential()
    model.add(layers.Embedding(vocab_size, embedding_dim)) #The embedding layer
    model.add(layers.Bidirectional(layers.LSTM(12, dropout=0.6)))
    model.add(layers.Dense(1,activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

model3 = model_3()
training_3 = model3.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))

In [None]:
results3 = model3.evaluate(X_val, y_val)
print("test loss, test acc:", results3)

In [None]:
display_learning_curves(training_3)


In [None]:
plot_history(training_3)

### Model 4

In [None]:
def model_4():
    embedding_dim = 32

    model = Sequential()
    model.add(layers.Embedding(vocab_size, embedding_dim)) #The embedding layer
    model.add(layers.Bidirectional(layers.LSTM(16, dropout=0.6)))
    model.add(layers.Dense(1,activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

model4 = model_4()
training_4 = model4.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))

In [None]:
results4 = model3.evaluate(X_val, y_val)
print("test loss, test acc:", results4)

In [None]:
display_learning_curves(training_4)


In [None]:
plot_history(training_4)

Thank you for the oppurunity to learn all of this! I hope you liked my models and report! 