# Music Mood Classifier — Full Workflow
This notebook performs EDA, training, evaluation, and model export.

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import joblib

plt.rcParams.update({'figure.figsize': (8,5)})
df = pd.read_csv('../../data/music/songs.csv')
df.head()

## Class Distribution

In [None]:
df['mood'].value_counts().plot(kind='bar')
plt.title('Mood Class Distribution'); plt.xlabel('mood'); plt.ylabel('count'); plt.show()

## Numeric Summary

In [None]:
df.describe(include=np.number)

## Simple Correlation (imshow)

In [None]:
corr = df.select_dtypes(include=[np.number]).corr()
plt.imshow(corr, aspect='auto'); plt.title('Correlation'); plt.colorbar(); plt.show()

## Train/Test Split & Model

In [None]:
features = ['valence','energy','danceability','tempo','loudness','speechiness','acousticness','instrumentalness','liveness']
X = df[features]
y = df['mood'].astype('category')
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

grid = GridSearchCV(RandomForestClassifier(random_state=42),
                    {'n_estimators':[100,200],'max_depth':[None,5,10]},
                    scoring='f1_macro', cv=3, n_jobs=-1)
grid.fit(Xtr, ytr)
pred = grid.best_estimator_.predict(Xte)
print('Best params:', grid.best_params_)
print(classification_report(yte, pred))
print(confusion_matrix(yte, pred))

## Export Model

In [None]:
joblib.dump(grid.best_estimator_, '../../models/music/model.joblib')
print('Saved ../../models/music/model.joblib')