# Day 09. Exercise 02
# Metrics

## 0. Imports

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import LabelBinarizer
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import joblib

## 1. Preprocessing

1. Create the same dataframe as in the previous exercise.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test`. Use the additional parameter `stratify`.

In [2]:
df_not_scaled = pd.read_csv('../ex01/data/day-of-week-not-scaled.csv')
df_dayofweek = pd.read_csv('../ex00/data/dayofweek.csv')
df_not_scaled['dayofweek'] = df_dayofweek['dayofweek']
df_not_scaled

Unnamed: 0,numTrials,hour,uid_user_0,uid_user_1,uid_user_10,uid_user_11,uid_user_12,uid_user_13,uid_user_14,uid_user_15,...,labname_lab03,labname_lab03s,labname_lab05s,labname_laba04,labname_laba04s,labname_laba05,labname_laba06,labname_laba06s,labname_project1,dayofweek
0,1,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
1,2,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
2,3,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
3,4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
4,5,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1681,9,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3
1682,6,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3
1683,7,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3
1684,8,20,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3


In [3]:
X = df_not_scaled.drop('dayofweek', axis=1)
y = df_not_scaled['dayofweek']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=y)

print(f"X_train shakli: {X_train.shape}")
print(f"y_train shakli: {y_train.shape}")
print(f"X_test shakli: {X_test.shape}")
print(f"y_test shakli: {y_test.shape}")

X_train shakli: (1348, 43)
y_train shakli: (1348,)
X_test shakli: (338, 43)
y_test shakli: (338,)


## 2. SVM

1. Use the best parameters from the previous exercise and train the model of SVM.
2. You need to calculate `accuracy`, `precision`, `recall`, `ROC AUC`.

 - `precision` and `recall` should be calculated for each class (use `average='weighted'`)
 - `ROC AUC` should be calculated for each class against any other class (all possible pairwise combinations) and then weighted average should be applied for the final metric
 - the code in the cell should display the result as below:

```
accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878
```

In [5]:
best_svm_params = {
    'C': 10,
    'class_weight': None,
    'gamma': 'auto',
    'kernel': 'rbf'
}

svm_best = SVC(probability=True, random_state=21, **best_svm_params)

svm_best.fit(X_train, y_train)

y_pred_svm = svm_best.predict(X_test)

# ROC AUC uchun ehtimolliklar kerak bo'ladi
y_pred_proba_svm = svm_best.predict_proba(X_test)

In [6]:
accuracy_svm = accuracy_score(y_test, y_pred_svm)

precision_svm = precision_score(y_test, y_pred_svm, average='weighted')

recall_svm = recall_score(y_test, y_pred_svm, average='weighted')

lb = LabelBinarizer()
y_test_binarized = lb.fit_transform(y_test)


roc_auc_svm = roc_auc_score(y_test, y_pred_proba_svm, multi_class='ovo', average='weighted', labels=lb.classes_)

print(f"\naccuracy is {accuracy_svm:.5f}")
print(f"precision is {precision_svm:.5f}")
print(f"recall is {recall_svm:.5f}")
print(f"roc_auc is {roc_auc_svm:.5f}")


accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97878


## 3. Decision tree

1. The same task for decision tree

In [7]:
best_tree_params = {
    'class_weight': 'balanced',
    'criterion': 'gini',
    'max_depth': 21
}

tree_best = DecisionTreeClassifier(random_state=21, **best_tree_params)

tree_best.fit(X_train, y_train)

y_pred_tree = tree_best.predict(X_test)

y_pred_proba_tree = tree_best.predict_proba(X_test)

In [8]:
accuracy_tree = accuracy_score(y_test, y_pred_tree)
precision_tree = precision_score(y_test, y_pred_tree, average='weighted')
recall_tree = recall_score(y_test, y_pred_tree, average='weighted')
lb = LabelBinarizer()
y_test_binarized = lb.fit_transform(y_test)

roc_auc_tree = roc_auc_score(y_test, y_pred_proba_tree, multi_class='ovo', average='weighted', labels=lb.classes_)

print(f"accuracy is {accuracy_tree:.5f}")
print(f"precision is {precision_tree:.5f}")
print(f"recall is {recall_tree:.5f}")
print(f"roc_auc is {roc_auc_tree:.5f}")

accuracy is 0.88462
precision is 0.88765
recall is 0.88462
roc_auc is 0.93528


## 4. Random forest

1. The same task for random forest.

In [9]:
best_forest_params = {
    'n_estimators': 100,
    'max_depth': 24,
    'class_weight': 'balanced',
    'criterion': 'entropy'
}

forest_best = RandomForestClassifier(random_state=21, **best_forest_params)

forest_best.fit(X_train, y_train)

y_pred_forest = forest_best.predict(X_test)

y_pred_proba_forest = forest_best.predict_proba(X_test)

In [10]:
accuracy_forest = accuracy_score(y_test, y_pred_forest)
precision_forest = precision_score(y_test, y_pred_forest, average='weighted')
recall_forest = recall_score(y_test, y_pred_forest, average='weighted')

lb = LabelBinarizer()
y_test_binarized = lb.fit_transform(y_test)

roc_auc_forest = roc_auc_score(y_test, y_pred_proba_forest, multi_class='ovo', average='weighted', labels=lb.classes_)

print(f"\naccuracy is {accuracy_forest:.5f}")
print(f"precision is {precision_forest:.5f}")
print(f"recall is {recall_forest:.5f}")
print(f"roc_auc is {roc_auc_forest:.5f}")


accuracy is 0.92604
precision is 0.92754
recall is 0.92604
roc_auc is 0.98939


## 5. Predictions

1. Choose the best model.
2. Analyze: for which `weekday` your model makes the most errors (in % of the total number of samples of that class in your full dataset), for which `labname` and for which `users`.
3. Save the model.

In [11]:
best_forest_params = {
    'n_estimators': 100,
    'max_depth': 24,
    'class_weight': 'balanced',
    'criterion': 'entropy'
}
best_model = RandomForestClassifier(random_state=21, **best_forest_params)

best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)

errors_mask = (y_test != y_pred)

errors = y_test[errors_mask]

X_test_errors = X_test[errors_mask] # Noto'g'ri bashorat qilingan X_test qatorlari

total_dayofweek_counts = y_test.value_counts().sort_index()
error_dayofweek_counts = errors.value_counts().sort_index()

for day, error_count in error_dayofweek_counts.items():
    total_count = total_dayofweek_counts.get(day, 0)
    if total_count > 0:
        error_percentage = (error_count / total_count) * 100
        print(f"  Day {day}: {error_count} ta xato / {total_count} ta umumiy namuna ({error_percentage:.2f}%)")
    else:
        print(f"  Day {day}: {error_count} ta xato (umumiy namuna yo'q - bu holat yuzaga kelmasligi kerak)")

if not error_dayofweek_counts.empty:
    most_errors_day = error_dayofweek_counts.idxmax()
    most_errors_day_percent = (error_dayofweek_counts[most_errors_day] / total_dayofweek_counts[most_errors_day]) * 100
    print(f"\n  Model eng ko'p xatolarni Haftaning kuni **{most_errors_day}** uchun qildi: **{most_errors_day_percent:.2f}%** xato.")
else:
    print("\n  Haftaning kunlari bo'yicha xato topilmadi.")

  Day 0: 6 ta xato / 27 ta umumiy namuna (22.22%)
  Day 1: 4 ta xato / 55 ta umumiy namuna (7.27%)
  Day 2: 2 ta xato / 30 ta umumiy namuna (6.67%)
  Day 3: 3 ta xato / 80 ta umumiy namuna (3.75%)
  Day 4: 3 ta xato / 21 ta umumiy namuna (14.29%)
  Day 5: 5 ta xato / 54 ta umumiy namuna (9.26%)
  Day 6: 2 ta xato / 71 ta umumiy namuna (2.82%)

  Model eng ko'p xatolarni Haftaning kuni **0** uchun qildi: **22.22%** xato.


In [12]:
# 2. Laboratoriya nomlari (`labname`) bo'yicha xatolar
labname_cols = [col for col in X.columns if 'labname_' in col]
if not X_test_errors.empty and len(labname_cols) > 0:
    print("\n2. Laboratoriya nomlari bo'yicha xatolar:")
    labname_error_counts = {}
    labname_total_counts = {}

    for lab_col in labname_cols:
        errors_in_lab = X_test_errors[lab_col].sum()
        total_in_lab = X_test[lab_col].sum()

        if errors_in_lab > 0:
            labname_error_counts[lab_col] = errors_in_lab
            labname_total_counts[lab_col] = total_in_lab

    sorted_labname_errors = sorted(labname_error_counts.items(), key=lambda item: item[1], reverse=True)

    if sorted_labname_errors:
        for lab_col, error_count in sorted_labname_errors:
            total_count = labname_total_counts.get(lab_col, 0)
            if total_count > 0:
                error_percentage = (error_count / total_count) * 100
                print(f"  **{lab_col.replace('labname_', '')}**: {int(error_count)} ta xato / {int(total_count)} ta umumiy ({error_percentage:.2f}%)")
        
        most_errors_labname = sorted_labname_errors[0][0].replace('labname_', '')
        most_errors_labname_percent = (sorted_labname_errors[0][1] / labname_total_counts[sorted_labname_errors[0][0]]) * 100
        print(f"\n  Model eng ko'p xatolarni **'{most_errors_labname}'** laboratoriya nomi uchun qildi: **{most_errors_labname_percent:.2f}%** xato.")
    else:
        print("  Laboratoriya nomlari bo'yicha xato topilmadi.")
else:
    print("\n2. Laboratoriya nomlari bo'yicha tahlil uchun xato namunalari yoki labname ustunlari topilmadi.")



2. Laboratoriya nomlari bo'yicha xatolar:
  **project1**: 10 ta xato / 186 ta umumiy (5.38%)
  **laba04**: 6 ta xato / 35 ta umumiy (17.14%)
  **laba04s**: 2 ta xato / 25 ta umumiy (8.00%)
  **laba06s**: 2 ta xato / 15 ta umumiy (13.33%)
  **code_rvw**: 1 ta xato / 13 ta umumiy (7.69%)
  **lab03**: 1 ta xato / 1 ta umumiy (100.00%)
  **lab05s**: 1 ta xato / 6 ta umumiy (16.67%)
  **laba05**: 1 ta xato / 47 ta umumiy (2.13%)
  **laba06**: 1 ta xato / 9 ta umumiy (11.11%)

  Model eng ko'p xatolarni **'project1'** laboratoriya nomi uchun qildi: **5.38%** xato.


In [13]:
# 3. Foydalanuvchilar (`users`) bo'yicha xatolar
user_cols = [col for col in X.columns if 'uid_user_' in col]

if not X_test_errors.empty and len(user_cols) > 0:
    print("\n3. Foydalanuvchilar bo'yicha xatolar:")
    user_error_counts = {}
    user_total_counts = {}

    for user_col in user_cols:
        errors_for_user = X_test_errors[user_col].sum()
        total_for_user = X_test[user_col].sum()

        if errors_for_user > 0:
            user_error_counts[user_col] = errors_for_user
            user_total_counts[user_col] = total_for_user

    sorted_user_errors = sorted(user_error_counts.items(), key=lambda item: item[1], reverse=True)

    if sorted_user_errors:
        for user_col, error_count in sorted_user_errors:
            total_count = user_total_counts.get(user_col, 0)
            if total_count > 0:
                error_percentage = (error_count / total_count) * 100
                print(f"  **{user_col.replace('uid_user_', 'User ')}**: {int(error_count)} ta xato / {int(total_count)} ta umumiy ({error_percentage:.2f}%)")
            else:
                print(f"  **{user_col.replace('uid_user_', 'User ')}**: {int(error_count)} ta xato (umumiy namuna yo'q)")

        most_errors_user = sorted_user_errors[0][0].replace('uid_user_', 'User ')
        most_errors_user_percent = (sorted_user_errors[0][1] / user_total_counts[sorted_user_errors[0][0]]) * 100
        print(f"\n  Model eng ko'p xatolarni **'{most_errors_user}'** foydalanuvchisi uchun qildi: **{most_errors_user_percent:.2f}%** xato.")
    else:
        print("  Foydalanuvchilar bo'yicha xato topilmadi.")
else:
    print("\n3. Foydalanuvchilar bo'yicha tahlil uchun xato namunalari yoki foydalanuvchi ustunlari topilmadi.")


3. Foydalanuvchilar bo'yicha xatolar:
  **User 2**: 3 ta xato / 28 ta umumiy (10.71%)
  **User 19**: 2 ta xato / 19 ta umumiy (10.53%)
  **User 25**: 2 ta xato / 22 ta umumiy (9.09%)
  **User 3**: 2 ta xato / 14 ta umumiy (14.29%)
  **User 31**: 2 ta xato / 18 ta umumiy (11.11%)
  **User 4**: 2 ta xato / 27 ta umumiy (7.41%)
  **User 6**: 2 ta xato / 4 ta umumiy (50.00%)
  **User 10**: 1 ta xato / 12 ta umumiy (8.33%)
  **User 13**: 1 ta xato / 17 ta umumiy (5.88%)
  **User 14**: 1 ta xato / 31 ta umumiy (3.23%)
  **User 16**: 1 ta xato / 5 ta umumiy (20.00%)
  **User 18**: 1 ta xato / 6 ta umumiy (16.67%)
  **User 22**: 1 ta xato / 1 ta umumiy (100.00%)
  **User 24**: 1 ta xato / 11 ta umumiy (9.09%)
  **User 27**: 1 ta xato / 6 ta umumiy (16.67%)
  **User 29**: 1 ta xato / 11 ta umumiy (9.09%)
  **User 30**: 1 ta xato / 8 ta umumiy (12.50%)

  Model eng ko'p xatolarni **'User 2'** foydalanuvchisi uchun qildi: **10.71%** xato.


In [14]:
model_filename = "best_dayofweek_random_forest_model.pkl"
joblib.dump(best_model, model_filename)

['best_dayofweek_random_forest_model.pkl']

## 6. Function

1. Write a function that takes a list of different models and a corresponding list of parameters (dicts) and returns a dict that contains all the 4 metrics for each model.

In [15]:
def evaluate_models_metrics(models: list, params_list: list) -> dict:
    """
    Berilgan modellar ro'yxatini o'rgatadi va ularning asosiy metrikalarini hisoblaydi.

    Args:
        models (list): Scikit-learn klassifikator obyektlari ro'yxati (masalan, [SVC(), DecisionTreeClassifier()]).
        params_list (list): Har bir modelga mos keladigan parametrlar lug'atlari ro'yxati.

    Returns:
        dict: Har bir model nomi uchun metrikalar lug'atini o'z ichiga olgan lug'at.
              {
                  'ModelNomi1': {'accuracy': ..., 'precision': ..., 'recall': ..., 'roc_auc': ...},
                  'ModelNomi2': {...}
              }
    """
    if X_train is None or X_test is None:
        print("Xatolik: O'quv/test ma'lumotlari mavjud emas. Funksiya ishlay olmaydi.")
        return {}

    results = {}
    lb = LabelBinarizer()
    y_test_binarized = lb.fit_transform(y_test)

    for i, model_obj in enumerate(models):
        model_name = type(model_obj).__name__ # Model nomini olish (e.g., 'SVC', 'DecisionTreeClassifier')
        current_params = params_list[i]

        print(f"\n--- Model: {model_name} ---")


        if model_name == 'SVC':
            current_model = SVC(random_state=21, probability=True, **current_params)
        elif model_name == 'DecisionTreeClassifier':
            current_model = DecisionTreeClassifier(random_state=21, **current_params)
        elif model_name == 'RandomForestClassifier':
            current_model = RandomForestClassifier(random_state=21, **current_params)
        else:
            current_model = model_obj.__class__(random_state=21, **current_params)

        current_model.fit(X_train, y_train)

        y_pred = current_model.predict(X_test)
        
        y_pred_proba = None
        if hasattr(current_model, 'predict_proba'):
            y_pred_proba = current_model.predict_proba(X_test)
        else:
            print(f"Ogohlantirish: {model_name} modelida predict_proba metodi mavjud emas. ROC AUC hisoblanmaydi.")

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0) 
        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)

        roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovo', average='weighted', labels=lb.classes_)

        results[model_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'roc_auc': roc_auc
        }

        print(f"  accuracy is {accuracy:.5f}")
        print(f"  precision is {precision:.5f}")
        print(f"  recall is {recall:.5f}")
        print(f"  roc_auc is {roc_auc:.5f}")

    return results

In [16]:
models_to_evaluate = [
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier()
]

params_for_models = [
    {'C': 10, 'class_weight': None, 'gamma': 'auto', 'kernel': 'rbf'},
    {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 21},
    {'n_estimators': 100, 'max_depth': 24, 'class_weight': 'balanced', 'criterion': 'entropy'}
]

all_metrics = evaluate_models_metrics(models_to_evaluate, params_for_models)
print("\n\n--- Barcha modellar uchun umumiy metrikalar ---")
for model_name, metrics in all_metrics.items():
    print(f"\nModel: {model_name}")
    for metric_name, value in metrics.items():
        print(f"  {metric_name}: {value:.5f}")


--- Model: SVC ---
  accuracy is 0.88757
  precision is 0.89267
  recall is 0.88757
  roc_auc is 0.97878

--- Model: DecisionTreeClassifier ---
  accuracy is 0.88462
  precision is 0.88765
  recall is 0.88462
  roc_auc is 0.93528

--- Model: RandomForestClassifier ---
  accuracy is 0.92604
  precision is 0.92754
  recall is 0.92604
  roc_auc is 0.98939


--- Barcha modellar uchun umumiy metrikalar ---

Model: SVC
  accuracy: 0.88757
  precision: 0.89267
  recall: 0.88757
  roc_auc: 0.97878

Model: DecisionTreeClassifier
  accuracy: 0.88462
  precision: 0.88765
  recall: 0.88462
  roc_auc: 0.93528

Model: RandomForestClassifier
  accuracy: 0.92604
  precision: 0.92754
  recall: 0.92604
  roc_auc: 0.98939
