In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from zipfile import ZipFile
import io
import requests

# URL of the ZIP archive
zip_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"

# Download and extract the ZIP archive
with requests.get(zip_url) as response:
    with ZipFile(io.BytesIO(response.content)) as zip_file:
        # Extract the CSV file from the ZIP archive
        with zip_file.open('bank-additional/bank-additional-full.csv') as csv_file:
            # Load the dataset from the extracted CSV file
            df = pd.read_csv(csv_file, sep=';')

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())
# Data preprocessing
# Convert categorical variables to numerical using Label Encoding
label_encoder = LabelEncoder()
df['job'] = label_encoder.fit_transform(df['job'])
df['marital'] = label_encoder.fit_transform(df['marital'])
df['education'] = label_encoder.fit_transform(df['education'])
df['default'] = label_encoder.fit_transform(df['default'])
df['housing'] = label_encoder.fit_transform(df['housing'])
df['loan'] = label_encoder.fit_transform(df['loan'])
df['contact'] = label_encoder.fit_transform(df['contact'])
df['month'] = label_encoder.fit_transform(df['month'])
df['day_of_week'] = label_encoder.fit_transform(df['day_of_week'])
df['poutcome'] = label_encoder.fit_transform(df['poutcome'])
df['y'] = label_encoder.fit_transform(df['y'])

# Split the data into features (X) and target variable (y)
X = df.drop('y', axis=1)
y = df['y']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build a decision tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)

classification_rep = classification_report(y_test, y_pred)
print("\nClassification Report:\n", classification_rep)
