In [None]:
#loading the dataset
import numpy as np
import pandas as pd
ds=pd.read_csv(r"C:\Users\Zunnurain.Badar\IMDB dataset.csv")
print(ds.isnull())


In [None]:
#viewing columns of ds
ds.columns

In [None]:
#performing lowercasing
ds['review'] = ds['review'].str.lower()
ds['sentiment'] = ds['sentiment'].str.lower()


In [None]:
#downloading stopwords
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
 



In [None]:
#performing tokenization
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt_tab')
tokens = ds['review'].astype(str).map(word_tokenize)
print (tokens.head())

In [None]:
#viewing stopwords
stop_words = stopwords.words('english')
print (stop_words)

In [None]:
#removing the stopwords
filtered_tokens = [word for word in tokens if word not in stop_words]
print(filtered_tokens[:10])

In [None]:
#veectorization through tfidf vectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
text_data = ds['review'].tolist()
tfidf = TfidfVectorizer(max_features = 20000)
tfidf_matrix = tfidf.fit_transform(text_data)
print(tfidf_matrix.shape)

In [None]:
#splitting the training and testing dataset
from sklearn.model_selection import train_test_split
x = tfidf_matrix
y = ds['sentiment']
x_train , x_test , y_train, y_test = train_test_split(x , y , test_size = 0.2, random_state = 42)
print(x_train.shape)
print(x_test.shape)

In [None]:
#checking accuracy , f1 score, recall
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
accuracy = accuracy_score (y_pred,y_test)
print("accuracy:", accuracy)
print("classification_report:" , classification_report(y_pred,y_test))


In [None]:
#saving the model and vectorizer for deployment
import joblib
joblib.dump(model , "model.pkl")
joblib.dump(tfidf , "vectorizer.pkl")
print("model and vectorizer saved successfully")

In [None]:
!pip install streamlit

In [None]:
%%writefile app.py
import streamlit as st
import joblib
model = joblib.load("model.pkl")
vectorizer = joblib.load("vectorizer.pkl")
st.title("Sentiment Analysis App 😊😠")
user_input = st.text_area("Enter your text:")
if st.button("Analyze Sentiment"):
    if user_input:
        transformed_text = vectorizer.transform([user_input])
        prediction = model.predict(transformed_text)
        sentiment = "Positive 😃" if prediction[0] == "positive" else "Negative 😠"
        st.write("Sentiment:", sentiment)
    else:
        st.warning("⚠️ Please enter some text!")


In [None]:
!streamlit run app.py
