<a href="https://colab.research.google.com/github/wmezadev/CSE-450-TEAM-4/blob/austinsChanges/Low_consumer_confidence_model_w04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##  Trained Decision Tree Classifier for Bank Term Deposit Subscription Prediction using low/ and high consumer confidence

## Import Libraries and Read Data

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import math

# Read the data from the CSV file:
campaign = pd.read_csv('https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/bank.csv')

## Prepare and Preprocess the Data


In [None]:
#We need to seperate the low from the high consumer confidence values for these two models

count = 0

campaignSorted = campaign.sort_values('cons.conf.idx', ascending=False)
campaignSorted.head()
campaignSortedMiddle = math.floor(len(campaignSorted)/2)
LowestConfidenceData = campaignSorted[campaignSortedMiddle:]



In [None]:
# Split data 80/20 for train and test
train_data_low, test_data_low = train_test_split(LowestConfidenceData, test_size=0.2, random_state=42) 
# Prepare the training data
low_X_train = train_data_low.drop('y', axis=1)
low_y_train = train_data_low['y'].map({'yes': 1, 'no': 0})


# Prepare the test data
low_X_test = test_data_low.drop('y', axis=1)
low_y_test = test_data_low['y'].map({'yes': 1, 'no': 0})

# Preprocessing: Define column transformer
numeric_features = ['age', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed']
categorical_features = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome']
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)])

## Create and train the model

In [None]:
# Train the Decision Tree model
low_model = DecisionTreeClassifier(max_depth=5, random_state=42)

# Create a pipeline for the model
model_low = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', low_model)
])


# Fit the model on the training data

model_low.fit(low_X_train, low_y_train)

## Evaluate the model's performance:

In [None]:


low_y_train_pred = model_low.predict(low_X_train)

# Evaluate the model on the training data
print("Training accuracy:", accuracy_score(low_y_train, low_y_train_pred))
print("Training classification report:\n", classification_report(low_y_train, low_y_train_pred))
print("Training confusion matrix:\n", confusion_matrix(low_y_train, low_y_train_pred))


# Make predictions on the test data
low_y_test_pred = model_low.predict(low_X_test)

# Evaluate the model on the test data
print("\nTest accuracy:", accuracy_score(low_y_test, low_y_test_pred))
print("Test classification report:\n", classification_report(low_y_test, low_y_test_pred))
print("Test confusion matrix:\n", confusion_matrix(low_y_test, low_y_test_pred))
print("First 10 test predictions:", low_y_test_pred[:10])




Training accuracy: 0.9132721877528999
Training classification report:
               precision    recall  f1-score   support

           0       0.92      1.00      0.95     13459
           1       0.71      0.10      0.18      1369

    accuracy                           0.91     14828
   macro avg       0.81      0.55      0.57     14828
weighted avg       0.90      0.91      0.88     14828

Training confusion matrix:
 [[13402    57]
 [ 1229   140]]

Test accuracy: 0.9020771513353115
Test classification report:
               precision    recall  f1-score   support

           0       0.91      0.99      0.95      3343
           1       0.51      0.07      0.13       364

    accuracy                           0.90      3707
   macro avg       0.71      0.53      0.54      3707
weighted avg       0.87      0.90      0.87      3707

Test confusion matrix:
 [[3318   25]
 [ 338   26]]
First 10 test predictions: [0 0 0 0 0 0 0 0 0 0]


## Test the Holdout Data and Export Results

In [None]:
from google.colab import files
# Load the holdout dataset
holdout_data = pd.read_csv('https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/bank_holdout_test.csv')

# Make predictions using the trained model
holdout_predictions = model_low.predict(holdout_data)

# Create a DataFrame with the predictions
predictions_df = pd.DataFrame(holdout_predictions, columns=['predictions'])

# Save the predictions to a CSV file
predictions_df.to_csv('predictions.csv', index=False)

# Download the CSV file
files.download('predictions.csv')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>