In [35]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [36]:
df = pd.read_csv('/content/drive/MyDrive/MAJOR PROJECT/notebook/cpu_scheduling_dataset.csv')

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 732 entries, 0 to 731
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   num_processes         732 non-null    float64
 1   avg_burst_time        732 non-null    float64
 2   std_burst_time        732 non-null    float64
 3   min_burst_time        732 non-null    float64
 4   max_burst_time        732 non-null    float64
 5   avg_arrival_time      732 non-null    float64
 6   arrival_spread        732 non-null    float64
 7   avg_inter_arrival     732 non-null    float64
 8   avg_priority          732 non-null    float64
 9   std_priority          732 non-null    float64
 10  time_quantum          732 non-null    float64
 11  quantum_to_avg_burst  732 non-null    float64
 12  burst_variance_ratio  732 non-null    float64
 13  total_burst_time      732 non-null    float64
 14  system_load           732 non-null    float64
 15  best_algorithm        7

In [38]:
X, y = df.drop('best_algorithm', axis = 1), df.best_algorithm

In [39]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [40]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
le = LabelEncoder()
sl = StandardScaler()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
X_train = sl.fit_transform(X_train)
X_test = sl.transform(X_test)

In [41]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
def evaluation(y_test, y_pred):
  print('Accuracy Score: ', accuracy_score(y_test, y_pred))
  print('Confusion Matrix: \n', confusion_matrix(y_test, y_pred))
  print('Classification Report: \n', classification_report(y_test, y_pred))

In [42]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

models = {
    'ElasticNetCV': LogisticRegression(),
    'DecisionTreeClassifier': DecisionTreeClassifier(),
    'RandomForestClassifier': RandomForestClassifier(),
    'SVC': SVC(),
    'XGBClassifier': XGBClassifier()
}

for name, model in models.items():
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  print(name)
  evaluation(y_test, y_pred)
  print('_'*50)


ElasticNetCV
Accuracy Score:  0.8571428571428571
Confusion Matrix: 
 [[51  0  1 10]
 [ 0 11  1  0]
 [ 1  0 12  1]
 [ 7  0  0 52]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.86      0.82      0.84        62
           1       1.00      0.92      0.96        12
           2       0.86      0.86      0.86        14
           3       0.83      0.88      0.85        59

    accuracy                           0.86       147
   macro avg       0.89      0.87      0.88       147
weighted avg       0.86      0.86      0.86       147

__________________________________________________
DecisionTreeClassifier
Accuracy Score:  0.8299319727891157
Confusion Matrix: 
 [[47  0  3 12]
 [ 0 12  0  0]
 [ 0  1 12  1]
 [ 8  0  0 51]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.85      0.76      0.80        62
           1       0.92      1.00      0.96        12
           2       0.80      0.

In [43]:
from sklearn.model_selection import RandomizedSearchCV
xgb_param_grid = {
    'n_estimators': [100, 200, 300],             # Number of boosting rounds
    'max_depth': [3, 5, 7, 10],                  # Max depth of each tree
    'learning_rate': [0.01, 0.05, 0.1, 0.2],    # Step size shrinkage
    'subsample': [0.6, 0.8, 1.0],                # Fraction of samples for each tree
    'colsample_bytree': [0.6, 0.8, 1.0],        # Fraction of features for each tree
    'gamma': [0, 0.1, 0.3],                      # Min loss reduction for split
    'reg_alpha': [0, 0.01, 0.1],                 # L1 regularization
    'reg_lambda': [1, 1.5, 2],                   # L2 regularization
    'scale_pos_weight': [1]                       # Useful for imbalanced datasets
}
rf_param_grid = {
    'n_estimators': [100, 200, 300, 500],            # Number of trees
    'max_depth': [None, 10, 20, 30, 50],             # Max depth of trees
    'min_samples_split': [2, 5, 10],                 # Min samples to split internal node
    'min_samples_leaf': [1, 2, 4],                   # Min samples at leaf node
    'max_features': ['sqrt', 'log2', None],          # Max features considered for split
    'bootstrap': [True, False],                      # Use bootstrap samples or not
    'criterion': ['gini', 'entropy']                 # Impurity measure
}


In [44]:
random = [
    ('RandomForestClassifier', RandomForestClassifier(), rf_param_grid),
    ('XgboostClassifier', XGBClassifier(), xgb_param_grid),
]
best_param = {}
for name, model, param in gridcv:
  grid = RandomizedSearchCV(model, param, cv = 5, verbose = 2)
  grid.fit(X_train, y_train)
  best_param[name] = grid.best_params_
print(best_param)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END bootstrap=True, criterion=entropy, max_depth=None, max_features=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   0.6s
[CV] END bootstrap=True, criterion=entropy, max_depth=None, max_features=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END bootstrap=True, criterion=entropy, max_depth=None, max_features=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   0.6s
[CV] END bootstrap=True, criterion=entropy, max_depth=None, max_features=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   0.8s
[CV] END bootstrap=True, criterion=entropy, max_depth=None, max_features=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   0.7s
[CV] END bootstrap=False, criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, min_samples_split=2, n_estimators=300; total time=   1.4s
[

Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.01, max_depth=7, n_estimators=100, reg_alpha=0, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.01, max_depth=7, n_estimators=100, reg_alpha=0, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.01, max_depth=7, n_estimators=100, reg_alpha=0, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.01, max_depth=7, n_estimators=100, reg_alpha=0, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.01, max_depth=7, n_estimators=100, reg_alpha=0, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=100, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.8; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=100, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.8; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=100, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.8; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=100, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.8; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=100, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.8; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.1, max_depth=7, n_estimators=100, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.1, max_depth=7, n_estimators=100, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.1, max_depth=7, n_estimators=100, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.1, max_depth=7, n_estimators=100, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.1, max_depth=7, n_estimators=100, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=0.6; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.3, learning_rate=0.01, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.5s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.3, learning_rate=0.01, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.5s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.3, learning_rate=0.01, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.5s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.3, learning_rate=0.01, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.5s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.3, learning_rate=0.01, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.5s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=3, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.4s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=3, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.4s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=3, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.4s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=3, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   0.5s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.05, max_depth=3, n_estimators=300, reg_alpha=0.01, reg_lambda=2, scale_pos_weight=1, subsample=1.0; total time=   1.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.01, max_depth=7, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   1.6s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.01, max_depth=7, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.8s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.01, max_depth=7, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.8s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.01, max_depth=7, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.8s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0, learning_rate=0.01, max_depth=7, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.8s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.6; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.05, max_depth=5, n_estimators=200, reg_alpha=0.1, reg_lambda=1, scale_pos_weight=1, subsample=0.6; total time=   0.3s
[CV] END colsample_bytree=0.6, gamma=0.3, learning_rate=0.01, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.1s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.6, gamma=0.3, learning_rate=0.01, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.3, learning_rate=0.01, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.1s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.6, gamma=0.3, learning_rate=0.01, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.1s
[CV] END colsample_bytree=0.6, gamma=0.3, learning_rate=0.01, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1.0; total time=   0.1s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=200, reg_alpha=0, reg_lambda=1.5, scale_pos_weight=1, subsample=1.0; total time=   0.3s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=200, reg_alpha=0, reg_lambda=1.5, scale_pos_weight=1, subsample=1.0; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=200, reg_alpha=0, reg_lambda=1.5, scale_pos_weight=1, subsample=1.0; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=200, reg_alpha=0, reg_lambda=1.5, scale_pos_weight=1, subsample=1.0; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, gamma=0.3, learning_rate=0.1, max_depth=5, n_estimators=200, reg_alpha=0, reg_lambda=1.5, scale_pos_weight=1, subsample=1.0; total time=   0.2s


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


{'RandomForestClassifier': {'n_estimators': 500, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None, 'criterion': 'gini', 'bootstrap': False}, 'XgboostClassifier': {'subsample': 0.6, 'scale_pos_weight': 1, 'reg_lambda': 2, 'reg_alpha': 0, 'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.01, 'gamma': 0.3, 'colsample_bytree': 1.0}}


In [45]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Define models with given hyperparameters
models = {
    'RandomForestClassifier': RandomForestClassifier(
        n_estimators=200,
        min_samples_split=2,
        min_samples_leaf=2,
        max_features='log2',
        max_depth=30,
        criterion='gini',
        bootstrap=False
    ),
    'XGBoostClassifier': XGBClassifier(
        subsample=0.6,
        scale_pos_weight=1,
        reg_lambda=2,
        reg_alpha=0,
        n_estimators=100,
        max_depth=10,
        learning_rate=0.05,
        gamma=0.3,
        colsample_bytree=0.8,
        use_label_encoder=False,
        eval_metric='logloss'
    )
}

# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Model: {name}")
    evaluation(y_test, y_pred)
    print('_'*50)


Model: RandomForestClassifier
Accuracy Score:  0.8775510204081632
Confusion Matrix: 
 [[53  0  1  8]
 [ 0 12  0  0]
 [ 1  0 12  1]
 [ 7  0  0 52]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.87      0.85      0.86        62
           1       1.00      1.00      1.00        12
           2       0.92      0.86      0.89        14
           3       0.85      0.88      0.87        59

    accuracy                           0.88       147
   macro avg       0.91      0.90      0.90       147
weighted avg       0.88      0.88      0.88       147

__________________________________________________


Parameters: { "scale_pos_weight", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Model: XGBoostClassifier
Accuracy Score:  0.8707482993197279
Confusion Matrix: 
 [[52  0  2  8]
 [ 0 12  0  0]
 [ 1  0 12  1]
 [ 7  0  0 52]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.87      0.84      0.85        62
           1       1.00      1.00      1.00        12
           2       0.86      0.86      0.86        14
           3       0.85      0.88      0.87        59

    accuracy                           0.87       147
   macro avg       0.89      0.89      0.89       147
weighted avg       0.87      0.87      0.87       147

__________________________________________________


In [47]:
rfc = RandomForestClassifier(
        n_estimators=200,
        min_samples_split=2,
        min_samples_leaf=2,
        max_features='log2',
        max_depth=30,
        criterion='gini',
        bootstrap=False
    )
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
evaluation(y_test, y_pred)

Accuracy Score:  0.8843537414965986
Confusion Matrix: 
 [[54  0  0  8]
 [ 0 12  0  0]
 [ 1  0 12  1]
 [ 7  0  0 52]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.87      0.87      0.87        62
           1       1.00      1.00      1.00        12
           2       1.00      0.86      0.92        14
           3       0.85      0.88      0.87        59

    accuracy                           0.88       147
   macro avg       0.93      0.90      0.92       147
weighted avg       0.89      0.88      0.88       147



In [48]:
import pickle
pickle.dump(rfc, open('RadonForest.pkl', 'wb'))
pickle.dump(sl, open('scaler.pkl', 'wb'))