In [None]:
# !pip install xgboost

In [None]:
#XGBoost Regression

from xgboost import XGBRegressor

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [None]:
X, y = make_regression(n_samples=1000, n_features=2,noise=10, random_state=1)

In [None]:
X

array([[-0.91542437, -1.94504696],
       [-2.30490794, -0.59246129],
       [-0.43750898, -0.78191168],
       ...,
       [-0.46867382,  0.02186284],
       [-0.44265541,  0.55753264],
       [-0.13971173,  0.85328219]])

In [None]:
y

array([-1.98367142e+02, -7.18777056e+01, -8.69703239e+01, -6.96492401e+01,
        1.82745760e+01, -3.77114686e+01,  3.92726728e+01, -9.78831863e+01,
       -9.79418533e+01, -1.18326213e+02,  7.37176065e+01,  1.15947984e+01,
       -1.14639116e+02,  1.18554758e+02, -1.09479268e+02,  4.55822523e+01,
        1.22163385e+01, -7.55247901e+01, -1.07584299e+02, -1.52546032e+02,
        6.76376929e+00,  9.63814831e+00, -2.16279129e+01,  7.04567440e+01,
        1.07700199e+02,  1.09477518e+01,  1.17165036e+02,  2.78449353e+01,
        3.24248766e+01, -1.55808165e+01,  9.64321593e+01, -2.68980403e+02,
        3.04398725e+01, -5.03731238e+01,  1.83865434e+01, -4.20020243e+01,
        2.60722898e+01, -4.24419694e+01, -2.84245422e+01,  7.87732991e+01,
        6.38527251e+01,  1.05600811e+02,  2.54784577e+01, -4.37001825e+01,
       -4.56639834e+01,  2.25475298e+01, -2.44159979e+01,  2.14189935e+00,
       -7.55772986e+01,  9.71903066e+01, -1.01696566e+02,  1.10063672e+02,
        3.74493102e+01,  

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
X_train.shape, X_test.shape

((700, 2), (300, 2))

In [None]:
regressor = XGBRegressor()
regressor

In [None]:
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)

# Evaluate the model
print("Current model performance:")
print(f"R2 score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Current model performance:
R2 score: 0.9766041606160056
Mean Absolute Error: 10.46115674730029
Mean Squared Error: 193.37775910652263


In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
#     'max_depth': [3, 4, 5, 6],
#     'subsample': [0.8, 0.9, 1.0],
#     'colsample_bytree': [0.8, 0.9, 1.0],
#     'reg_alpha': [0, 0.1, 0.5, 1],
#     'reg_lambda': [1, 1.5, 2, 3]
}

In [None]:
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv = 5, n_jobs = -1, verbose=3)

In [None]:
grid_search

In [None]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [None]:
grid_search.best_params_

{'learning_rate': 0.05, 'n_estimators': 100}

In [None]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model
print("Tuned model performance:")
print(f"R2 score: {r2_score(y_test, y_pred_tuned)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)}")

Tuned model performance:
R2 score: 0.9794754547745094
Mean Absolute Error: 9.595064400384274
Mean Squared Error: 169.64514490132566


In [None]:
#XGBoost classfier
from xgboost import XGBClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Generate dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [None]:
X

array([[-2.04582165, -0.13791624, -0.08071423, ...,  2.48194524,
         0.74236675,  0.23154789],
       [-0.98726024,  1.30120189,  2.37734888, ...,  0.55445754,
        -0.21892143, -0.37608578],
       [ 0.57335921,  0.09375582,  0.4662521 , ..., -0.6088508 ,
         0.79903499, -0.17121177],
       ...,
       [-0.70737159,  1.07650943,  0.58510456, ..., -1.51337602,
         0.90239871, -0.69230951],
       [-0.20706849,  1.17319848, -1.94478665, ..., -0.32820676,
         1.5711921 ,  1.14877729],
       [-2.16769231, -2.54871672,  2.89359255, ...,  0.71535366,
         0.34329241,  1.07350284]])

In [None]:
y

array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

In [None]:
X_train.shape, X_test.shape

((700, 20), (300, 20))

In [None]:
clf = XGBClassifier()
clf

In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

# Evaluate the model
print("Current model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Current model performance:
Accuracy: 0.85
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.87      0.84       139
           1       0.88      0.83      0.86       161

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300

Confusion Matrix:
[[121  18]
 [ 27 134]]


In [None]:
#hyperparameter tuning
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
#     'max_depth': [3, 4, 5, 6],
#     'subsample': [0.8, 0.9, 1.0],
#     'colsample_bytree': [0.8, 0.9, 1.0],
#     'reg_alpha': [0, 0.1, 0.5, 1],
#     'reg_lambda': [1, 1.5, 2, 3]
}

In [None]:
grid_search = GridSearchCV(estimator = clf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=3)

In [None]:
grid_search

In [None]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [None]:
grid_search.best_params_

{'learning_rate': 0.05, 'n_estimators': 100}

In [None]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model
print("Tuned model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tuned)}")
print("Classification Report:")
print(classification_report(y_test, y_pred_tuned))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))


Tuned model performance:
Accuracy: 0.8633333333333333
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.88      0.86       139
           1       0.89      0.85      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

Confusion Matrix:
[[122  17]
 [ 24 137]]
