<a href="https://colab.research.google.com/github/vaibhavnarute/TE_Task_2_MLSC/blob/main/TE_Task_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load the dataset
file_path = 'bank-additional-full.csv'
data = pd.read_csv(file_path, delimiter=';')

# Display the first few rows of the dataset
data.head()


Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Check for missing values
data.isnull().sum()

# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    if column != 'y':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Encode the target variable
data['y'] = data['y'].map({'yes': 1, 'no': 0})

# Split the data into features and target variable
X = data.drop('y', axis=1)
y = data['y']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

# Initialize and train the model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'ROC-AUC: {roc_auc}')
print(report)


Accuracy: 0.9101723719349356
ROC-AUC: 0.9320378439746703
              precision    recall  f1-score   support

           0       0.93      0.97      0.95     10968
           1       0.66      0.41      0.50      1389

    accuracy                           0.91     12357
   macro avg       0.80      0.69      0.73     12357
weighted avg       0.90      0.91      0.90     12357



In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Train the Gradient Boosting model
gbm = GradientBoostingClassifier(random_state=42)
gbm.fit(X_train, y_train)

# Make predictions
y_pred_train_gbm = gbm.predict(X_train)
y_pred_test_gbm = gbm.predict(X_test)

# Evaluate the model
roc_auc_train_gbm = roc_auc_score(y_train, gbm.predict_proba(X_train)[:, 1])
roc_auc_test_gbm = roc_auc_score(y_test, gbm.predict_proba(X_test)[:, 1])

print(f"Gradient Boosting Training ROC-AUC: {roc_auc_train_gbm}")
print(f"Gradient Boosting Testing ROC-AUC: {roc_auc_test_gbm}")
print(classification_report(y_test, y_pred_test_gbm))


Gradient Boosting Training ROC-AUC: 0.9520057039044221
Gradient Boosting Testing ROC-AUC: 0.9483873894027209
              precision    recall  f1-score   support

           0       0.94      0.97      0.95     10968
           1       0.68      0.51      0.58      1389

    accuracy                           0.92     12357
   macro avg       0.81      0.74      0.77     12357
weighted avg       0.91      0.92      0.91     12357

