In [None]:
import pandas as pd
import numpy as np


In [None]:
!pip install datasets


In [None]:
from datasets import load_dataset

dataset = load_dataset("imdb")

dataset


In [None]:
train_df = pd.DataFrame(dataset["train"])
train_df.head()

In [None]:
train_df = train_df.sample(5000, random_state=42)

train_df["label"].value_counts()


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)

X = vectorizer.fit_transform(train_df["text"])

X


In [None]:
from sklearn.model_selection import train_test_split

y = train_df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])



In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)

model.fit(X_train, y_train)



In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
!pip install gradio


In [None]:
def predict_sentiment(text):
    vectorized_text = vectorizer.transform([text])
    prediction = model.predict(vectorized_text)[0]
    probability = model.predict_proba(vectorized_text)[0][prediction]

    label = "Positive" if prediction == 1 else "Negative"

    return f"Prediction: {label} (Confidence: {probability:.2f})"


In [None]:
import gradio as gr

interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=4, placeholder="Enter movie review here..."),
    outputs="text",
    title="IMDb Sentiment Analysis",
    description="Enter a movie review and get sentiment prediction."
)

interface.launch()
