# Day 09. Exercise 03
# Ensembles

## 0. Imports

In [67]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score


## 1. Preprocessing

1. Create the same dataframe as in the previous exercise.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test` and then get `X_train`, `y_train`, `X_valid`, `y_valid` from the previous `X_train`, `y_train`. Use the additional parameter `stratify`.

In [68]:
df_unscaled = pd.read_csv('../data/day-of-week-not-scaled.csv')
df_original = pd.read_csv('../data/dayofweek.csv')
df_unscaled['dayofweek'] = df_original['dayofweek']


In [69]:
X = df_unscaled.drop('dayofweek', axis=1)
y = df_unscaled['dayofweek']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=21, stratify=y
)


In [70]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.2, random_state=21, stratify=y_train
)


## 2. Individual classifiers

1. Train SVM, decision tree and random forest again with the best parameters that you got from the 01 exercise with `random_state=21` for all of them.
2. Evaluate `accuracy`, `precision`, and `recall` for them on the validation set.
3. The result of each cell of the section should look like this:

```
accuracy is 0.87778
precision is 0.88162
recall is 0.87778
```

In [71]:
svm_clf = SVC(kernel='rbf', C=5, gamma='scale', class_weight='balanced', random_state=21, probability=True)
svm_clf.fit(X_train, y_train)

y_pred_svm = svm_clf.predict(X_valid)
print(f"accuracy is {accuracy_score(y_valid, y_pred_svm):.5f}")
print(f"precision is {precision_score(y_valid, y_pred_svm, average='weighted'):.5f}")
print(f"recall is {recall_score(y_valid, y_pred_svm, average='weighted'):.5f}")


accuracy is 0.33333
precision is 0.50286
recall is 0.33333


In [72]:
dt_clf = DecisionTreeClassifier(max_depth=10, class_weight=None, criterion='gini', random_state=21)
dt_clf.fit(X_train, y_train)

y_pred_dt = dt_clf.predict(X_valid)
print(f"accuracy is {accuracy_score(y_valid, y_pred_dt):.5f}")
print(f"precision is {precision_score(y_valid, y_pred_dt, average='weighted'):.5f}")
print(f"recall is {recall_score(y_valid, y_pred_dt, average='weighted'):.5f}")


accuracy is 0.74074
precision is 0.77392
recall is 0.74074


In [73]:
rf_clf = RandomForestClassifier(n_estimators=50, max_depth=28, class_weight=None, criterion='gini', random_state=21)
rf_clf.fit(X_train, y_train)

y_pred_rf = rf_clf.predict(X_valid)
print(f"accuracy is {accuracy_score(y_valid, y_pred_rf):.5f}")
print(f"precision is {precision_score(y_valid, y_pred_rf, average='weighted'):.5f}")
print(f"recall is {recall_score(y_valid, y_pred_rf, average='weighted'):.5f}")


accuracy is 0.89259
precision is 0.89361
recall is 0.89259


## 3. Voting classifiers

1. Using `VotingClassifier` and the three models that you have just trained, calculate the `accuracy`, `precision`, and `recall` on the validation set.
2. Play with the other parameteres.
3. Calculate the `accuracy`, `precision` and `recall` on the test set for the model with the best weights in terms of accuracy (if there are several of them with equal values, choose the one with the higher precision).

In [74]:
voting_weights = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (1, 1, 2), (2, 2, 1), (2, 1, 2), (1, 2, 2), (2, 2, 2)]
voting_results = []

for weights in voting_weights:
    voting_clf = VotingClassifier(
        estimators=[('svm', svm_clf), ('dt', dt_clf), ('rf', rf_clf)],
        weights=weights,
        voting='soft'
    )
    voting_clf.fit(X_train, y_train)
    
    y_pred_voting = voting_clf.predict(X_valid)
    acc = accuracy_score(y_valid, y_pred_voting)
    prec = precision_score(y_valid, y_pred_voting, average='weighted')
    rec = recall_score(y_valid, y_pred_voting, average='weighted')
    
    voting_results.append({
        'weights': weights,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'model': voting_clf
    })
    
    print(f"Weights {weights}: accuracy={acc:.5f}, precision={prec:.5f}, recall={rec:.5f}")


Weights (1, 1, 1): accuracy=0.84815, precision=0.85296, recall=0.84815
Weights (2, 1, 1): accuracy=0.82963, precision=0.83901, recall=0.82963
Weights (2, 1, 1): accuracy=0.82963, precision=0.83901, recall=0.82963
Weights (1, 2, 1): accuracy=0.77037, precision=0.79400, recall=0.77037
Weights (1, 2, 1): accuracy=0.77037, precision=0.79400, recall=0.77037
Weights (1, 1, 2): accuracy=0.87037, precision=0.87175, recall=0.87037
Weights (1, 1, 2): accuracy=0.87037, precision=0.87175, recall=0.87037
Weights (2, 2, 1): accuracy=0.77407, precision=0.80051, recall=0.77407
Weights (2, 2, 1): accuracy=0.77407, precision=0.80051, recall=0.77407
Weights (2, 1, 2): accuracy=0.86667, precision=0.86784, recall=0.86667
Weights (2, 1, 2): accuracy=0.86667, precision=0.86784, recall=0.86667
Weights (1, 2, 2): accuracy=0.86296, precision=0.86486, recall=0.86296
Weights (1, 2, 2): accuracy=0.86296, precision=0.86486, recall=0.86296
Weights (2, 2, 2): accuracy=0.84815, precision=0.85296, recall=0.84815
Weight

In [75]:
best_voting = max(voting_results, key=lambda x: (x['accuracy'], x['precision']))
print(f"\nBest voting weights: {best_voting['weights']}")
print(f"Validation accuracy: {best_voting['accuracy']:.5f}")
print(f"Validation precision: {best_voting['precision']:.5f}")
print(f"Validation recall: {best_voting['recall']:.5f}")



Best voting weights: (1, 1, 2)
Validation accuracy: 0.87037
Validation precision: 0.87175
Validation recall: 0.87037


In [76]:
y_pred_voting_test = best_voting['model'].predict(X_test)
voting_test_acc = accuracy_score(y_test, y_pred_voting_test)
voting_test_prec = precision_score(y_test, y_pred_voting_test, average='weighted')
voting_test_rec = recall_score(y_test, y_pred_voting_test, average='weighted')

print(f"Test accuracy is {voting_test_acc:.5f}")
print(f"Test precision is {voting_test_prec:.5f}")
print(f"Test recall is {voting_test_rec:.5f}")


Test accuracy is 0.88166
Test precision is 0.88904
Test recall is 0.88166


In [77]:
bagging_estimators = [5, 10, 20, 50, 100]
bagging_results = []

for n_est in bagging_estimators:
    bagging_clf = BaggingClassifier(
        estimator=SVC(kernel='rbf', C=5, gamma='scale', class_weight='balanced', probability=True),
        n_estimators=n_est,
        random_state=21
    )
    bagging_clf.fit(X_train, y_train)
    
    y_pred_bagging = bagging_clf.predict(X_valid)
    acc = accuracy_score(y_valid, y_pred_bagging)
    prec = precision_score(y_valid, y_pred_bagging, average='weighted', zero_division=0)
    rec = recall_score(y_valid, y_pred_bagging, average='weighted')
    
    bagging_results.append({
        'n_estimators': n_est,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'model': bagging_clf
    })
    
    print(f"n_estimators={n_est}: accuracy={acc:.5f}, precision={prec:.5f}, recall={rec:.5f}")

n_estimators=5: accuracy=0.37778, precision=0.47569, recall=0.37778
n_estimators=10: accuracy=0.35926, precision=0.38565, recall=0.35926
n_estimators=10: accuracy=0.35926, precision=0.38565, recall=0.35926
n_estimators=20: accuracy=0.36296, precision=0.40099, recall=0.36296
n_estimators=20: accuracy=0.36296, precision=0.40099, recall=0.36296
n_estimators=50: accuracy=0.35185, precision=0.30051, recall=0.35185
n_estimators=50: accuracy=0.35185, precision=0.30051, recall=0.35185
n_estimators=100: accuracy=0.35185, precision=0.29983, recall=0.35185
n_estimators=100: accuracy=0.35185, precision=0.29983, recall=0.35185


In [78]:
best_bagging = max(bagging_results, key=lambda x: (x['accuracy'], x['precision']))
print(f"\nBest bagging n_estimators: {best_bagging['n_estimators']}")
print(f"Validation accuracy: {best_bagging['accuracy']:.5f}")
print(f"Validation precision: {best_bagging['precision']:.5f}")
print(f"Validation recall: {best_bagging['recall']:.5f}")



Best bagging n_estimators: 5
Validation accuracy: 0.37778
Validation precision: 0.47569
Validation recall: 0.37778


In [79]:
y_pred_bagging_test = best_bagging['model'].predict(X_test)
bagging_test_acc = accuracy_score(y_test, y_pred_bagging_test)
bagging_test_prec = precision_score(y_test, y_pred_bagging_test, average='weighted', zero_division=0)
bagging_test_rec = recall_score(y_test, y_pred_bagging_test, average='weighted')

print(f"Test accuracy is {bagging_test_acc:.5f}")
print(f"Test precision is {bagging_test_prec:.5f}")
print(f"Test recall is {bagging_test_rec:.5f}")

Test accuracy is 0.41124
Test precision is 0.26049
Test recall is 0.41124


In [80]:
stacking_n_splits = [2, 3, 4, 5, 6, 7]
stacking_passthrough = [True, False]
stacking_results = []

for n_splits in stacking_n_splits:
    for passthrough in stacking_passthrough:
        cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=21)
        
        stacking_clf = StackingClassifier(
            estimators=[('svm', svm_clf), ('dt', dt_clf), ('rf', rf_clf)],
            final_estimator=LogisticRegression(solver='lbfgs', max_iter=1000, random_state=21),
            cv=cv,
            passthrough=passthrough
        )
        stacking_clf.fit(X_train, y_train)
        
        y_pred_stacking = stacking_clf.predict(X_valid)
        acc = accuracy_score(y_valid, y_pred_stacking)
        prec = precision_score(y_valid, y_pred_stacking, average='weighted')
        rec = recall_score(y_valid, y_pred_stacking, average='weighted')
        
        stacking_results.append({
            'n_splits': n_splits,
            'passthrough': passthrough,
            'accuracy': acc,
            'precision': prec,
            'recall': rec,
            'model': stacking_clf
        })
        
        print(f"n_splits={n_splits}, passthrough={passthrough}: accuracy={acc:.5f}, precision={prec:.5f}, recall={rec:.5f}")


n_splits=2, passthrough=True: accuracy=0.89630, precision=0.89670, recall=0.89630
n_splits=2, passthrough=False: accuracy=0.90000, precision=0.90058, recall=0.90000
n_splits=2, passthrough=False: accuracy=0.90000, precision=0.90058, recall=0.90000
n_splits=3, passthrough=True: accuracy=0.88889, precision=0.88936, recall=0.88889
n_splits=3, passthrough=True: accuracy=0.88889, precision=0.88936, recall=0.88889
n_splits=3, passthrough=False: accuracy=0.88519, precision=0.88915, recall=0.88519
n_splits=3, passthrough=False: accuracy=0.88519, precision=0.88915, recall=0.88519
n_splits=4, passthrough=True: accuracy=0.89259, precision=0.89233, recall=0.89259
n_splits=4, passthrough=True: accuracy=0.89259, precision=0.89233, recall=0.89259
n_splits=4, passthrough=False: accuracy=0.89630, precision=0.89677, recall=0.89630
n_splits=4, passthrough=False: accuracy=0.89630, precision=0.89677, recall=0.89630
n_splits=5, passthrough=True: accuracy=0.88889, precision=0.88830, recall=0.88889
n_splits=5

## 4. Bagging classifiers

1. Using `BaggingClassifier` and `SVM` with the best parameters create an ensemble, try different values of the `n_estimators`, use `random_state=21`.
2. Play with the other parameters.
3. Calculate the `accuracy`, `precision`, and `recall` for the model with the best parameters (in terms of accuracy) on the test set (if there are several of them with equal values, choose the one with the higher precision)

In [81]:
bagging_results = []

for n_est in bagging_estimators:
    bagging_clf = BaggingClassifier(
        estimator=SVC(kernel='rbf', C=5, gamma='scale', class_weight='balanced', probability=True),
        n_estimators=n_est,
        random_state=21
    )
    bagging_clf.fit(X_train, y_train)
    
    y_pred_bagging = bagging_clf.predict(X_valid)
    acc = accuracy_score(y_valid, y_pred_bagging)
    prec = precision_score(y_valid, y_pred_bagging, average='weighted', zero_division=0)
    rec = recall_score(y_valid, y_pred_bagging, average='weighted')
    
    bagging_results.append({
        'n_estimators': n_est,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'model': bagging_clf
    })
    
    print(f"n_estimators={n_est}: accuracy={acc:.5f}, precision={prec:.5f}, recall={rec:.5f}")

n_estimators=5: accuracy=0.37778, precision=0.47569, recall=0.37778
n_estimators=10: accuracy=0.35926, precision=0.38565, recall=0.35926
n_estimators=10: accuracy=0.35926, precision=0.38565, recall=0.35926
n_estimators=20: accuracy=0.36296, precision=0.40099, recall=0.36296
n_estimators=20: accuracy=0.36296, precision=0.40099, recall=0.36296
n_estimators=50: accuracy=0.35185, precision=0.30051, recall=0.35185
n_estimators=50: accuracy=0.35185, precision=0.30051, recall=0.35185
n_estimators=100: accuracy=0.35185, precision=0.29983, recall=0.35185
n_estimators=100: accuracy=0.35185, precision=0.29983, recall=0.35185


In [82]:
best_bagging = max(bagging_results, key=lambda x: (x['accuracy'], x['precision']))
print(f"\nBest bagging n_estimators: {best_bagging['n_estimators']}")
print(f"Validation accuracy: {best_bagging['accuracy']:.5f}")
print(f"Validation precision: {best_bagging['precision']:.5f}")
print(f"Validation recall: {best_bagging['recall']:.5f}")



Best bagging n_estimators: 5
Validation accuracy: 0.37778
Validation precision: 0.47569
Validation recall: 0.37778


In [83]:
y_pred_bagging_test = best_bagging['model'].predict(X_test)
bagging_test_acc = accuracy_score(y_test, y_pred_bagging_test)
bagging_test_prec = precision_score(y_test, y_pred_bagging_test, average='weighted', zero_division=0)
bagging_test_rec = recall_score(y_test, y_pred_bagging_test, average='weighted')

print(f"Test accuracy is {bagging_test_acc:.5f}")
print(f"Test precision is {bagging_test_prec:.5f}")
print(f"Test recall is {bagging_test_rec:.5f}")

Test accuracy is 0.41124
Test precision is 0.26049
Test recall is 0.41124


## 5. Stacking classifiers

1. To achieve reproducibility in this case you will have to create an object of cross-validation generator: `StratifiedKFold(n_splits=n, shuffle=True, random_state=21)`, where `n` you will try to optimize (the details are below).
2. Using `StackingClassifier` and the three models that you have recently trained, calculate the `accuracy`, `precision` and `recall` on the validation set, try different values of `n_splits` `[2, 3, 4, 5, 6, 7]` in the cross-validation generator and parameter `passthrough` in the classifier itself,
3. Calculate the `accuracy`, `precision`, and `recall` for the model with the best parameters (in terms of accuracy) on the test set (if there are several of them with equal values, choose the one with the higher precision). Use `final_estimator=LogisticRegression(solver='liblinear')`.

In [84]:
best_stacking = max(stacking_results, key=lambda x: (x['accuracy'], x['precision']))
print(f"\nBest stacking: n_splits={best_stacking['n_splits']}, passthrough={best_stacking['passthrough']}")
print(f"Validation accuracy: {best_stacking['accuracy']:.5f}")
print(f"Validation precision: {best_stacking['precision']:.5f}")
print(f"Validation recall: {best_stacking['recall']:.5f}")



Best stacking: n_splits=2, passthrough=False
Validation accuracy: 0.90000
Validation precision: 0.90058
Validation recall: 0.90000


In [85]:
y_pred_stacking_test = best_stacking['model'].predict(X_test)
stacking_test_acc = accuracy_score(y_test, y_pred_stacking_test)
stacking_test_prec = precision_score(y_test, y_pred_stacking_test, average='weighted')
stacking_test_rec = recall_score(y_test, y_pred_stacking_test, average='weighted')

print(f"Test accuracy is {stacking_test_acc:.5f}")
print(f"Test precision is {stacking_test_prec:.5f}")
print(f"Test recall is {stacking_test_rec:.5f}")


Test accuracy is 0.90237
Test precision is 0.90594
Test recall is 0.90237


In [86]:
all_models = [
    ('Voting', voting_test_acc, voting_test_prec, best_voting['model']),
    ('Bagging', bagging_test_acc, bagging_test_prec, best_bagging['model']),
    ('Stacking', stacking_test_acc, stacking_test_prec, best_stacking['model'])
]

best_model = max(all_models, key=lambda x: (x[1], x[2]))
print(f"\nBest overall model: {best_model[0]}")
print(f"Test accuracy: {best_model[1]:.5f}")
print(f"Test precision: {best_model[2]:.5f}")



Best overall model: Stacking
Test accuracy: 0.90237
Test precision: 0.90594


In [87]:
best_final_model = best_model[3]
y_pred_final = best_final_model.predict(X_test)

analysis_df = pd.DataFrame({'actual': y_test.values, 'predicted': y_pred_final})

error_rates_weekday = {}
for day in range(7):
    total_samples = len(analysis_df[analysis_df['actual'] == day])
    errors = len(analysis_df[(analysis_df['actual'] == day) & (analysis_df['predicted'] != day)])
    
    if total_samples > 0:
        error_rates_weekday[day] = (errors / total_samples) * 100
    else:
        error_rates_weekday[day] = 0

print("Error rates by weekday (in %):")
for day, rate in error_rates_weekday.items():
    print(f"Weekday {day}: {rate:.2f}%")

worst_weekday = max(error_rates_weekday, key=error_rates_weekday.get)
print(f"\nWorst performing weekday: {worst_weekday} with error rate {error_rates_weekday[worst_weekday]:.2f}%")


Error rates by weekday (in %):
Weekday 0: 33.33%
Weekday 1: 5.45%
Weekday 2: 10.00%
Weekday 3: 2.50%
Weekday 4: 19.05%
Weekday 5: 12.96%
Weekday 6: 7.04%

Worst performing weekday: 0 with error rate 33.33%


In [88]:
X_test_with_indices = X_test.reset_index()
df_test_with_features = X_test_with_indices.copy()
df_test_with_features['actual'] = y_test.values
df_test_with_features['predicted'] = y_pred_final
df_test_with_features['error'] = df_test_with_features['actual'] != df_test_with_features['predicted']

if 'labname' in df_test_with_features.columns:
    print("\nError rates by labname (in %):")
    for labname in df_test_with_features['labname'].unique():
        labname_data = df_test_with_features[df_test_with_features['labname'] == labname]
        total = len(labname_data)
        errors = labname_data['error'].sum()
        error_rate = (errors / total) * 100 if total > 0 else 0
        print(f"Labname {labname}: {error_rate:.2f}%")

if 'users' in df_test_with_features.columns:
    print("\nError rates by users (top 10):")
    for user in df_test_with_features['users'].unique()[:10]:
        user_data = df_test_with_features[df_test_with_features['users'] == user]
        total = len(user_data)
        errors = user_data['error'].sum()
        error_rate = (errors / total) * 100 if total > 0 else 0
        print(f"User {user}: {error_rate:.2f}%")


In [89]:
joblib.dump(best_final_model, 'best_model.joblib')
print("Model saved successfully!")


Model saved successfully!


## 6. Predictions

1. Choose the best model in terms of accuracy (if there are several of them with equal values, choose the one with the higher precision).
2. Analyze: for which weekday your model makes the most errors (in % of the total number of samples of that class in your full dataset), for which labname and for which users.
3. Save the model.

In [90]:
all_models = [
    ('Voting', voting_test_acc, voting_test_prec, best_voting['model']),
    ('Bagging', bagging_test_acc, bagging_test_prec, best_bagging['model']),
    ('Stacking', stacking_test_acc, stacking_test_prec, best_stacking['model'])
]

best_model = max(all_models, key=lambda x: (x[1], x[2]))
print(f"Best overall model: {best_model[0]}")
print(f"Test accuracy: {best_model[1]:.5f}")
print(f"Test precision: {best_model[2]:.5f}")


Best overall model: Stacking
Test accuracy: 0.90237
Test precision: 0.90594


In [91]:
best_final_model = best_model[3]
y_pred_final = best_final_model.predict(X_test)

analysis_df = pd.DataFrame({'actual': y_test.values, 'predicted': y_pred_final})

error_rates_weekday = {}
for day in range(7):
    total_samples = len(analysis_df[analysis_df['actual'] == day])
    errors = len(analysis_df[(analysis_df['actual'] == day) & (analysis_df['predicted'] != day)])
    
    if total_samples > 0:
        error_rates_weekday[day] = (errors / total_samples) * 100
    else:
        error_rates_weekday[day] = 0

print("Error rates by weekday (in %):")
for day, rate in error_rates_weekday.items():
    print(f"Weekday {day}: {rate:.2f}%")

worst_weekday = max(error_rates_weekday, key=error_rates_weekday.get)
print(f"\nWorst performing weekday: {worst_weekday} with error rate {error_rates_weekday[worst_weekday]:.2f}%")


Error rates by weekday (in %):
Weekday 0: 33.33%
Weekday 1: 5.45%
Weekday 2: 10.00%
Weekday 3: 2.50%
Weekday 4: 19.05%
Weekday 5: 12.96%
Weekday 6: 7.04%

Worst performing weekday: 0 with error rate 33.33%


In [92]:
X_test_with_indices = X_test.reset_index()
df_test_with_features = X_test_with_indices.copy()
df_test_with_features['actual'] = y_test.values
df_test_with_features['predicted'] = y_pred_final
df_test_with_features['error'] = df_test_with_features['actual'] != df_test_with_features['predicted']

if 'labname' in df_test_with_features.columns:
    print("\nError rates by labname (in %):")
    for labname in df_test_with_features['labname'].unique():
        labname_data = df_test_with_features[df_test_with_features['labname'] == labname]
        total = len(labname_data)
        errors = labname_data['error'].sum()
        error_rate = (errors / total) * 100 if total > 0 else 0
        print(f"Labname {labname}: {error_rate:.2f}%")

if 'users' in df_test_with_features.columns:
    print("\nError rates by users (in %):")
    for user in df_test_with_features['users'].unique():
        user_data = df_test_with_features[df_test_with_features['users'] == user]
        total = len(user_data)
        errors = user_data['error'].sum()
        error_rate = (errors / total) * 100 if total > 0 else 0
        print(f"User {user}: {error_rate:.2f}%")


In [93]:
joblib.dump(best_final_model, 'best_model.joblib')
print("Model saved successfully!")


Model saved successfully!
