# Day 09. Exercise 02
# Metrics

## 0. Imports

In [12]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score


## 1. Preprocessing

1. Create the same dataframe as in the previous exercise.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test`. Use the additional parameter `stratify`.

In [13]:
df_unscaled = pd.read_csv('../data/day-of-week-not-scaled.csv')
df_original = pd.read_csv('../data/dayofweek.csv')
df_unscaled['dayofweek'] = df_original['dayofweek']


In [14]:
X = df_unscaled.drop('dayofweek', axis=1)
y = df_unscaled['dayofweek']


In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=21, stratify=y
)


## 2. SVM

1. Use the best parameters from the previous exercise and train the model of SVM.
2. You need to calculate `accuracy`, `precision`, `recall`, `ROC AUC`.

 - `precision` and `recall` should be calculated for each class (use `average='weighted'`)
 - `ROC AUC` should be calculated for each class against any other class (all possible pairwise combinations) and then weighted average should be applied for the final metric
 - the code in the cell should display the result as below:

```
accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878
```

In [None]:
svm_model = SVC(kernel='rbf', C=5, gamma='scale', class_weight='balanced', random_state=21, probability=True)
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)
y_pred_proba_svm = svm_model.predict_proba(X_test)

svm_accuracy = accuracy_score(y_test, y_pred_svm)
svm_precision = precision_score(y_test, y_pred_svm, average='weighted')
svm_recall = recall_score(y_test, y_pred_svm, average='weighted')
svm_roc_auc = roc_auc_score(y_test, y_pred_proba_svm, multi_class='ovo', average='weighted')

print(f"accuracy is {svm_accuracy:.5f}")
print(f"precision is {svm_precision:.5f}")
print(f"recall is {svm_recall:.5f}")
print(f"roc_auc is {svm_roc_auc:.5f}")


accuracy is 0.39349
precision is 0.55994
recall is 0.39349
roc_auc is 0.77220


## 3. Decision tree

1. The same task for decision tree

In [None]:
dt_model = DecisionTreeClassifier(max_depth=10, class_weight=None, criterion='gini', random_state=21)
dt_model.fit(X_train, y_train)

y_pred_dt = dt_model.predict(X_test)
y_pred_proba_dt = dt_model.predict_proba(X_test)

dt_accuracy = accuracy_score(y_test, y_pred_dt)
dt_precision = precision_score(y_test, y_pred_dt, average='weighted')
dt_recall = recall_score(y_test, y_pred_dt, average='weighted')
dt_roc_auc = roc_auc_score(y_test, y_pred_proba_dt, multi_class='ovo', average='weighted')

print(f"accuracy is {dt_accuracy:.5f}")
print(f"precision is {dt_precision:.5f}")
print(f"recall is {dt_recall:.5f}")
print(f"roc_auc is {dt_roc_auc:.5f}")


accuracy is 0.73373
precision is 0.75231
recall is 0.73373
roc_auc is 0.90803


## 4. Random forest

1. The same task for random forest.

In [None]:
rf_model = RandomForestClassifier(n_estimators=50, max_depth=28, class_weight=None, criterion='gini', random_state=21)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
y_pred_proba_rf = rf_model.predict_proba(X_test)

rf_accuracy = accuracy_score(y_test, y_pred_rf)
rf_precision = precision_score(y_test, y_pred_rf, average='weighted')
rf_recall = recall_score(y_test, y_pred_rf, average='weighted')
rf_roc_auc = roc_auc_score(y_test, y_pred_proba_rf, multi_class='ovo', average='weighted')

print(f"accuracy is {rf_accuracy:.5f}")
print(f"precision is {rf_precision:.5f}")
print(f"recall is {rf_recall:.5f}")
print(f"roc_auc is {rf_roc_auc:.5f}")


accuracy is 0.92899
precision is 0.93009
recall is 0.92899
roc_auc is 0.99033


## 5. Predictions

1. Choose the best model.
2. Analyze: for which `weekday` your model makes the most errors (in % of the total number of samples of that class in your full dataset), for which `labname` and for which `users`.
3. Save the model.

In [19]:
best_model = rf_model
y_pred_best = y_pred_rf


In [20]:
analysis_df = pd.DataFrame({'actual': y_test.values, 'predicted': y_pred_best})

error_rates_weekday = {}
for day in range(7):
    total_samples = len(analysis_df[analysis_df['actual'] == day])
    errors = len(analysis_df[(analysis_df['actual'] == day) & (analysis_df['predicted'] != day)])
    
    if total_samples > 0:
        error_rates_weekday[day] = (errors / total_samples) * 100
    else:
        error_rates_weekday[day] = 0

print("Error rates by weekday (in %):")
for day, rate in error_rates_weekday.items():
    print(f"Weekday {day}: {rate:.2f}%")

worst_weekday = max(error_rates_weekday, key=error_rates_weekday.get)
print(f"\nWorst performing weekday: {worst_weekday} with error rate {error_rates_weekday[worst_weekday]:.2f}%")


Error rates by weekday (in %):
Weekday 0: 25.93%
Weekday 1: 10.91%
Weekday 2: 6.67%
Weekday 3: 2.50%
Weekday 4: 14.29%
Weekday 5: 5.56%
Weekday 6: 1.41%

Worst performing weekday: 0 with error rate 25.93%


In [None]:
df_test = pd.DataFrame({
    'X_test': [X_test.index[i] for i in range(len(X_test))],
    'actual': y_test.values,
    'predicted': y_pred_best
})

X_test_with_indices = X_test.reset_index()
df_test_with_features = X_test_with_indices.copy()
df_test_with_features['actual'] = y_test.values
df_test_with_features['predicted'] = y_pred_best
df_test_with_features['error'] = df_test_with_features['actual'] != df_test_with_features['predicted']

if 'labname' in df_test_with_features.columns:
    print("\nError rates by labname (in %):")
    for labname in df_test_with_features['labname'].unique():
        labname_data = df_test_with_features[df_test_with_features['labname'] == labname]
        total = len(labname_data)
        errors = labname_data['error'].sum()
        error_rate = (errors / total) * 100 if total > 0 else 0
        print(f"Labname {labname}: {error_rate:.2f}%")

if 'users' in df_test_with_features.columns:
    print("\nError rates by users (in %):")
if 'users' in df_test_with_features.columns:
    print("\nError rates by users (in %):")
    for user in df_test_with_features['users'].unique()[:5]:  
        user_data = df_test_with_features[df_test_with_features['users'] == user]
        total = len(user_data)
        errors = user_data['error'].sum()
        error_rate = (errors / total) * 100 if total > 0 else 0
        print(f"User {user}: {error_rate:.2f}%")

joblib.dump(best_model, 'best_model.joblib')

['best_model.joblib']

## 6. Function

1. Write a function that takes a list of different models and a corresponding list of parameters (dicts) and returns a dict that contains all the 4 metrics for each model.

In [22]:
def evaluate_models(models_list, params_list, X_train, y_train, X_test, y_test):
    results = {}
    
    for model_class, params in zip(models_list, params_list):
        model_name = model_class.__name__
        
        model = model_class(**params)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovo', average='weighted')
        
        results[model_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'roc_auc': roc_auc
        }
    
    return results

models = [SVC, DecisionTreeClassifier, RandomForestClassifier]
params = [
    {'kernel': 'rbf', 'C': 5, 'gamma': 'scale', 'class_weight': 'balanced', 'random_state': 21, 'probability': True},
    {'max_depth': 10, 'class_weight': None, 'criterion': 'gini', 'random_state': 21},
    {'n_estimators': 50, 'max_depth': 28, 'class_weight': None, 'criterion': 'gini', 'random_state': 21}
]

results_dict = evaluate_models(models, params, X_train, y_train, X_test, y_test)

for model_name, metrics in results_dict.items():
    print(f"\n{model_name}:")
    for metric_name, value in metrics.items():
        print(f"  {metric_name}: {value:.5f}")



SVC:
  accuracy: 0.39349
  precision: 0.55994
  recall: 0.39349
  roc_auc: 0.77220

DecisionTreeClassifier:
  accuracy: 0.73373
  precision: 0.75231
  recall: 0.73373
  roc_auc: 0.90803

RandomForestClassifier:
  accuracy: 0.92899
  precision: 0.93009
  recall: 0.92899
  roc_auc: 0.99033
