In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('twitter_training.csv').drop(['2401','Borderlands'],axis = 1)

In [None]:
data.sample(2)

Unnamed: 0,Positive,"im getting on borderlands and i will murder you all ,"
10630,Irrelevant,Been scrolling through my feed and noticed 2 t...
32717,Positive,Awww


In [None]:
data.dropna(inplace=True)

In [None]:
data.duplicated().sum()

np.int64(4227)

In [None]:
data.drop_duplicates(inplace=True)

In [None]:
positive_num = {
    "Negative" : 0,
    "Positive": 1,
    "Neutral":2,
    "Irrelevant":3
}
data['Positive']=data.Positive.map(positive_num)

In [None]:
data.rename(columns={'im getting on borderlands and i will murder you all ,': "Text"},inplace=True)

In [None]:
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
def text_classify(text):
    text = text.lower()
    text = ''.join([i for i in text if i not in string.punctuation])
    text = ' '.join([i for i in text.split() if i not in stopwords.words("english")])
    lem = WordNetLemmatizer()
    text = [lem.lemmatize(word) for word in text.split() ]


    return text

In [None]:
data['Text']=data['Text'].apply(text_classify)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
tok = Tokenizer()
tok.fit_on_texts(data['Text'])
data['Text']=tok.texts_to_sequences(data['Text'])
print(data.head())


   Positive                  Text
0         1      [290, 1955, 211]
1         1      [5, 91, 64, 211]
2         1    [5, 290, 64, 1466]
3         1  [5, 91, 64, 4, 1466]
4         1     [5, 91, 64, 1466]


In [None]:
24

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
X = pad_sequences(data['Text'], maxlen=24, padding='post')

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.23, random_state=42)

In [None]:
vocab_size_of_every_unique_words = len(tok.word_index) + 1

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    layers.Embedding(input_dim=vocab_size_of_every_unique_words,output_dim=60, input_length=(24)),
    layers.SimpleRNN(128,activation="relu", return_sequences=False),
    layers.Dense(64, activation='relu'),
    layers.Dense(4,activation="softmax")
])



In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history = model.fit(X_train,y_train,epochs = 15 , validation_split=0.23)

Epoch 1/15
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.3046 - loss: 1.3602 - val_accuracy: 0.3621 - val_loss: 1.3137
Epoch 2/15
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.3792 - loss: 1.2832 - val_accuracy: 0.4724 - val_loss: 1.2270
Epoch 3/15
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.5425 - loss: 1.0596 - val_accuracy: 0.5848 - val_loss: 1.0227
Epoch 4/15
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.6665 - loss: 0.8189 - val_accuracy: 0.6407 - val_loss: 0.9280
Epoch 5/15
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.7430 - loss: 0.6664 - val_accuracy: 0.6687 - val_loss: 0.8871
Epoch 6/15
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.7852 - loss: 0.5959 - val_accuracy: 0.6840 - val_loss: 0.8477
Epoch 7/15
[1m

In [None]:
# --- label mapping ---
classes = {
    0: "Negative",
    1: "Positive",
    2: "Neutral",
    3: "Irrelevant"
}

# --- prediction function ---
def predict_sentiment(text):
    text = text.lower()
    seq = tok.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=24)
    pred = model.predict(padded)
    print("Prediction probs:", pred)   # 👈 dekho output distribution
    label_idx = pred.argmax(axis=1)[0]
    sentiment = classes[label_idx]
    confidence = round(float(pred.max()) * 100, 2)
    return f"Sentiment: {sentiment} ({confidence}% confidence)"

# --- Gradio UI ---
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter a sentence"),
    outputs=gr.Textbox(label="Predicted Sentiment"),
    title="Sentiment Analysis (Keras Model)",
    description="Predicts whether a text is Positive, Negative, Neutral, or Irrelevant"
)

interface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://31878176caa856c456.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


