In [45]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib

In [46]:
# Load the dataset
data = pd.read_csv('/content/bank-full.csv', sep=';')

In [47]:

data.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [48]:
# Data preprocessing: Encode categorical features
label_encoders = {}
categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome', 'y']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le


In [49]:
# Define features (X) and target (y)
X = data.drop('y', axis=1)  # Features
y = data['y']               # Target variable

In [50]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [51]:
# Normalize numerical features after splitting
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform the training data
X_test = scaler.transform(X_test)        # Transform the test data


In [52]:
print(svm_model.decision_function(X_test))

[-1.00040594 -1.00025721 -1.00014321 ... -1.00027125 -1.00048831
 -0.99995334]


In [53]:
print(y.value_counts())

y
0    39922
1     5289
Name: count, dtype: int64


In [54]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))

[[11966     0]
 [ 1598     0]]


In [56]:
svm_model = SVC(kernel='rbf', class_weight='balanced', probability=True, random_state=42)
svm_model.fit(X_train, y_train)
print(svm_model)

SVC(class_weight='balanced', probability=True, random_state=42)


In [57]:
probas = svm_model.predict_proba(X_test)
print(probas)

[[9.92544508e-01 7.45549224e-03]
 [9.94692969e-01 5.30703132e-03]
 [8.91103510e-01 1.08896490e-01]
 ...
 [9.73265389e-01 2.67346109e-02]
 [9.99680804e-01 3.19196163e-04]
 [8.32607633e-01 1.67392367e-01]]


In [58]:
# Make predictions
y_pred = svm_model.predict(X_test)

In [59]:
# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Print classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Model Accuracy: 0.82

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.82      0.89     11966
           1       0.38      0.83      0.53      1598

    accuracy                           0.82     13564
   macro avg       0.68      0.83      0.71     13564
weighted avg       0.90      0.82      0.85     13564



In [85]:
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(svm_model, 'svm_model.pkl')
for col, encoder in label_encoders.items():
    joblib.dump(encoder, f'label_encoder_{col}.pkl')

In [86]:
# List of categorical columns encoded during training
categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome', 'y']


In [87]:
# Assuming saved the label encoders as well
for col in categorical_columns[:-1]:  # Exclude 'y' since it's the target
    label_encoders[col] = joblib.load(f'label_encoder_{col}.pkl')


In [88]:
new_data = [[55, "entrepreneur", "married", "tertiary", "no", 4500, "yes", "no", "cellular", 10, "aug", 900, 1, 0, 2, "success"]]

In [89]:
new_data_df = pd.DataFrame(new_data, columns=['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome'])


In [90]:
# Now, encode categorical columns using the saved label encoders
for col in categorical_columns[:-1]:  # Exclude 'y' (target column)
    new_data_df[col] = label_encoders[col].transform([new_data_df[col][0]])[0]

In [91]:
# Scale the new data using the saved scaler
new_data_scaled = scaler.transform(new_data_df)

In [92]:
# Make a prediction using the trained SVM model
prediction = svm_model.predict(new_data_scaled)

In [93]:
# Output the prediction
print(f"Prediction: {prediction[0]}")

Prediction: 1


In [95]:
if 'y' in categorical_columns:
    y_encoder = label_encoders['y']
    prediction_label = y_encoder.inverse_transform([prediction[0]])[0]  # Decoding the prediction
    print(f"Prediction: {prediction_label}")

    # Correctly interpret the result:
    if prediction_label == 'yes':  # If the model predicts 'yes', the customer will subscribe
        print("The customer will subscribe to a term deposit.")
    else:  # If the model predicts 'no', the customer will not subscribe
        print("The customer will not subscribe to a term deposit.")

Prediction: yes
The customer will subscribe to a term deposit.


In [96]:
new_data = [[35, "management", "married", "primary", "yes", 1500, "yes", "no", "unknown", 5, "may", 250, 1, -1, 0, "unknown"]]


In [98]:
new_data_df = pd.DataFrame(new_data, columns=['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome'])

In [99]:
for col in categorical_columns[:-1]:  # Exclude 'y' (target column)
    new_data_df[col] = label_encoders[col].transform([new_data_df[col][0]])[0]

In [100]:
new_data_scaled = scaler.transform(new_data_df)

In [101]:
prediction = svm_model.predict(new_data_scaled)

In [102]:
print(f"Prediction: {prediction[0]}")

Prediction: 0


In [103]:
if 'y' in categorical_columns:
    y_encoder = label_encoders['y']
    prediction_label = y_encoder.inverse_transform([prediction[0]])[0]  # Decoding the prediction
    print(f"Prediction: {prediction_label}")

    # Correctly interpret the result:
    if prediction_label == 'yes':  # If the model predicts 'yes', the customer will subscribe
        print("The customer will subscribe to a term deposit.")
    else:  # If the model predicts 'no', the customer will not subscribe
        print("The customer will not subscribe to a term deposit.")

Prediction: no
The customer will not subscribe to a term deposit.
