<a href="https://colab.research.google.com/github/zmohammadyit-cmyk/sentiment-analysis/blob/main/SentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install vaderSentiment==3.3.2 scikit-learn==1.4.2 pandas==2.2.2 numpy==1.26.4 joblib==1.4.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')
BASE_DIR = "/content/drive/MyDrive/sentiment-colab"
import os
os.makedirs(f"{BASE_DIR}/data", exist_ok=True)
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)

Mounted at /content/drive


In [None]:
import pandas as pd, os
BASE_DIR = os.getcwd()
os.makedirs(f"{BASE_DIR}/data", exist_ok=True)

csv_path = f"{BASE_DIR}/data/sample.csv"
csv_path = f"{BASE_DIR}/data/sample.csv"
df = pd.DataFrame({
    "text": [
        "I love this product, it's amazing!",
        "This is the worst purchase I've ever made.",
        "It works fine, nothing special.",
        "Absolutely fantastic experience, highly recommend.",
        "Terrible quality and very disappointed.",
        "It's okay, could be better.",
        "The service was excellent!",
        "I hate it, waste of money.",
        "Not bad, not great either.",
        "This phone is incredible for the price.",
        "The food was awful and cold.",
        "Mediocre performance overall."
    ],
    "label": [
        "positive","negative","neutral","positive","negative","neutral",
        "positive","negative","neutral","positive","negative","neutral"
    ]
})
df.to_csv(csv_path, index=False)
print("Saved:", csv_path)
df.head()

Saved: /content/data/sample.csv


Unnamed: 0,text,label
0,"I love this product, it's amazing!",positive
1,This is the worst purchase I've ever made.,negative
2,"It works fine, nothing special.",neutral
3,"Absolutely fantastic experience, highly recomm...",positive
4,Terrible quality and very disappointed.,negative


In [None]:
#!pip install --upgrade --force-reinstall numpy
#!pip install --upgrade --force-reinstall pandas scipy scikit-learn

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def vader_predict(text: str):
    s = analyzer.polarity_scores(text)
    c = s["compound"]
    if c >= 0.05:
        label = "positive"
    elif c <= -0.05:
        label = "negative"
    else:
        label = "neutral"
    return label, s

tests = [
    "I absolutely love this phone!",
    "This food is terrible.",
    "It's okay, nothing special."
]

for t in tests:
    label, scores = vader_predict(t)
    print(f"Text: {t}\n→ {label} | {scores}\n")

Text: I absolutely love this phone!
→ positive | {'neg': 0.0, 'neu': 0.455, 'pos': 0.545, 'compound': 0.6989}

Text: This food is terrible.
→ negative | {'neg': 0.508, 'neu': 0.492, 'pos': 0.0, 'compound': -0.4767}

Text: It's okay, nothing special.
→ negative | {'neg': 0.367, 'neu': 0.325, 'pos': 0.309, 'compound': -0.092}



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
from joblib import dump
import os
BASE_DIR = "/content"
os.makedirs(f"{BASE_DIR}/models", exist_ok=True)  # اگر پوشه models وجود نداشت، بسازش
os.makedirs(f"{BASE_DIR}/data", exist_ok=True)
DATA_PATH = f"{BASE_DIR}/data/sample.csv"
MODEL_PATH = f"{BASE_DIR}/models/tfidf_logreg.pkl"

df = pd.read_csv(DATA_PATH)
X = df["text"].astype(str).values
y = df["label"].astype(str).values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

pipe = Pipeline([
    ("tfidf", TfidfVectorizer(lowercase=True, ngram_range=(1,2), max_features=5000)),
    ("clf", LogisticRegression(max_iter=1000))
])

pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.3f}")
print(classification_report(y_test, y_pred, digits=3))

dump(pipe, MODEL_PATH)
print("Model saved to:", MODEL_PATH)


Accuracy: 0.333
              precision    recall  f1-score   support

    negative      0.000     0.000     0.000         1
     neutral      0.500     1.000     0.667         1
    positive      0.000     0.000     0.000         1

    accuracy                          0.333         3
   macro avg      0.167     0.333     0.222         3
weighted avg      0.167     0.333     0.222         3

Model saved to: /content/models/tfidf_logreg.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
from joblib import load

MODEL_PATH = f"{BASE_DIR}/models/tfidf_logreg.pkl"
pipe = load(MODEL_PATH)

def classical_predict(text: str):
    label = pipe.predict([text])[0]
    proba = None
    if hasattr(pipe, "predict_proba"):
        probs = pipe.predict_proba([text])[0]
        classes = pipe.classes_
        proba = {c: float(p) for c, p in zip(classes, probs)}
    return label, proba

samples = [
    "I absolutely love this camera.",
    "This was a terrible experience.",
    "It's okay, not bad."
]
for s in samples:
    label, proba = classical_predict(s)
    print(f"Text: {s}\n→ Predicted: {label}\n   Probabilities: {proba}\n")


Text: I absolutely love this camera.
→ Predicted: positive
   Probabilities: {'negative': 0.2789041073352131, 'neutral': 0.2663789455485547, 'positive': 0.4547169471162322}

Text: This was a terrible experience.
→ Predicted: negative
   Probabilities: {'negative': 0.36856711017808125, 'neutral': 0.2758795394808342, 'positive': 0.3555533503410844}

Text: It's okay, not bad.
→ Predicted: neutral
   Probabilities: {'negative': 0.2686198899304568, 'neutral': 0.4386792400106008, 'positive': 0.29270087005894235}



In [None]:
!pip -q install gradio==4.44.0

import gradio as gr

def predict_ui(text, engine):
    if engine == "VADER":
        label, scores = vader_predict(text)
        return f"[VADER] {label} | scores={scores}"
    else:
        label, proba = classical_predict(text)
        return f"[TFIDF+LR] {label} | proba={proba}"

demo = gr.Interface(
    fn=predict_ui,
    inputs=[gr.Textbox(label="Enter text"), gr.Radio(["VADER","TFIDF+LR"], value="VADER")],
    outputs=gr.Textbox(label="Result"),
    title="Sentiment Analysis (Colab Demo)"
)
demo.launch(share=True)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.1/18.1 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.7/318.7 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.2/131.2 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scikit-image 0.25.2 requires scipy>=1.11.4, which is not installed.
sentence-transformers 5.1.2 requires scipy, which is not installed.
fastai 2.8.5 requires scipy, which is not installed.
matplotlib-venn 1.1.2 requires scipy, which is not installed.
missingno 0.5.2 requires scipy, which is not installed.
arviz 0.22.0 requires scipy>=1.1

--------


Running on public URL: https://2a00aa6265b94d7eb9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


