## Sentiment Analysis on Twitter COVID Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D
from tensorflow.keras.layers import Embedding

df = pd.read_csv("./Training.csv")

In [None]:
df.head()

In [None]:
df.columns

In [None]:
tweet_df = df[['TEXT','LABEL']]
print(tweet_df.shape)
tweet_df.head(5)

In [None]:
tweet_df = tweet_df[tweet_df['LABEL'] != 'neutral']
print(tweet_df.shape)
tweet_df.head(5)

In [None]:
tweet_df["LABEL"].value_counts()

In [None]:
sentiment_label = tweet_df.LABEL.factorize()
sentiment_label

In [None]:
tweet = tweet_df.TEXT.values
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(tweet)
vocab_size = len(tokenizer.word_index) + 1
encoded_docs = tokenizer.texts_to_sequences(tweet)
padded_sequence = pad_sequences(encoded_docs, maxlen=200)

In [None]:
print(tokenizer.word_index)

In [None]:
print(tweet[0])
print(encoded_docs[0])

In [None]:
print(padded_sequence[0])

In [None]:
embedding_vector_length = 32
model = Sequential() 
model.add(Embedding(vocab_size, embedding_vector_length, input_length=200) )
model.add(SpatialDropout1D(0.25))
model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid')) 
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])  
print(model.summary()) 

In [None]:
history = model.fit(padded_sequence,sentiment_label[0],validation_split=0.2, epochs=5, batch_size=32)

In [None]:
plt.plot(history.history['accuracy'], label='acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()
plt.savefig("Accuracy plot.jpg")

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.show()
plt.savefig("Loss plot.jpg")

In [None]:
def predict_sentiment(text):
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw,maxlen=200)
    prediction = int(model.predict(tw).round().item())
    print("Predicted label: ", sentiment_label[1][prediction])

In [None]:
test_sentence1 = "COVID is negative."
predict_sentiment(test_sentence1)

test_sentence2 = "This is good."
predict_sentiment(test_sentence2)