In [None]:
#%pip install sentence-transformers scikit-learn pandas sklearn

Collecting sentence-transformers
  Using cached sentence_transformers-5.1.2-py3-none-any.whl.metadata (16 kB)
Collecting sklearn
  Downloading sklearn-0.0.post12.tar.gz (2.6 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × Getting requirements to build wheel did not run successfully.
  │ exit code: 1
  ╰─> [15 lines of output]
      The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
      rather than 'sklearn' for pip commands.
      
      Here is how to fix this error in the main use cases:
      - use 'pip install scikit-learn' rather than 'pip install sklearn'
      - replace 'sklearn' by 'scikit-learn' in your pip requirements files
        (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
      - if the 'sklearn' package is used by one of your dependencies,
        it would be great if you take some time to track which package uses
        'sklearn' instead of 'scikit-learn' and report it to their issue tracker
      - as a last resort, set the environment variable
        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
      
      More information is available at
      https://github.com/scikit-learn/sklearn-

In [1]:


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sentence_transformers import SentenceTransformer


# 1. Load the  dataset (text + label)

df = pd.read_csv("datasets/mood-data.csv")  
df = df[['text', 'label']] 


print("Dataset Loaded:")
print(df.head())

# ---------------------------------------------------------
# 2. Convert labels to integers 
# ---------------------------------------------------------

# If labels already 0–4, no need to encode, but let's be safe:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Map label IDs to emotion names
label_names = {0: "sadness", 1: "joy", 2: "love", 3: "anger", 4: "fear", 5:"surprise"}

df['label_name'] = df['label'].map(label_names)

# ---------------------------------------------------------
# 3. Train-test split
# ---------------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    df['text'].values,
    df['label'].values,
    test_size=0.2,
    random_state=42,
    stratify=df['label'].values
)

# ---------------------------------------------------------
# 4. Generate sentence embeddings
# ---------------------------------------------------------

print("\nLoading SentenceTransformer model...")
embedder = SentenceTransformer('all-MiniLM-L6-v2')

print("Encoding text into embeddings...")
X_train_emb = embedder.encode(X_train, batch_size=32, convert_to_numpy=True)
X_test_emb = embedder.encode(X_test, batch_size=32, convert_to_numpy=True)

print("Embedding shape:", X_train_emb.shape)

# ---------------------------------------------------------
# 5. Train SVM classifier
# ---------------------------------------------------------

svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train_emb, y_train)

# ---------------------------------------------------------
# 6. Predictions
# ---------------------------------------------------------

y_pred = svm_model.predict(X_test_emb)

# ---------------------------------------------------------
# 7. Evaluation
# ---------------------------------------------------------

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=[label_names[i] for i in sorted(label_names.keys())]))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


  from .autonotebook import tqdm as notebook_tqdm


Dataset Loaded:
                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3

Loading SentenceTransformer model...
Encoding text into embeddings...
Embedding shape: (12800, 384)

Classification Report:
              precision    recall  f1-score   support

     sadness       0.69      0.78      0.74       933
         joy       0.71      0.82      0.76      1072
        love       0.58      0.34      0.43       261
       anger       0.68      0.60      0.64       432
        fear       0.66      0.55      0.60       387
    surprise       0.73      0.29      0.41       115

    accuracy                           0.69      3200
   macro avg       0.68      0.56      0.60      3200
weighted av