In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

def main():
    """
    Main function to load data, train a decision tree classifier,
    and evaluate its performance.
    """
    try:
        # Load the training and testing datasets from the uploaded CSV files.
        # The file names are accessible directly in this environment.
        train_df = pd.read_csv(r"C:\Users\vishn\Downloads\training_dataset_03.csv")
        test_df = pd.read_csv(r"C:\Users\vishn\Downloads\testing_dataset_03_.csv")

        # Identify features (X) and the target variable (y).
        # The 'y' column is the target, representing whether the customer subscribed.
        # All other columns are used as features.
        feature_columns = [col for col in train_df.columns if col != 'y']
        
        X_train = train_df[feature_columns]
        y_train = train_df['y']
        
        X_test = test_df[feature_columns]
        y_test = test_df['y']
        
        # Initialize the Decision Tree Classifier.
        # Using a fixed random_state for reproducibility.
        # You can tune hyperparameters like max_depth, min_samples_leaf, etc.
        # to improve model performance.
        dtree = DecisionTreeClassifier(random_state=42)

        print("Training the Decision Tree Classifier...")
        # Train the model using the training data.
        dtree.fit(X_train, y_train)

        print("Model training complete. Making predictions on the test data.")
        # Make predictions on the test set.
        y_pred = dtree.predict(X_test)
        
        # Evaluate the model's performance.
        print("\n--- Model Evaluation ---")
        
        # Generate and print the classification report.
        # This provides precision, recall, and F1-score for each class.
        print("\nClassification Report:\n")
        print(classification_report(y_test, y_pred, zero_division=0))
        
        # Generate and print the confusion matrix.
        # This shows the number of correct and incorrect predictions for each class.
        # For a binary classification like this, it's typically a 2x2 matrix:
        # [[True Negatives, False Positives],
        #  [False Negatives, True Positives]]
        print("\nConfusion Matrix:\n")
        print(confusion_matrix(y_test, y_pred))

    except FileNotFoundError as e:
        print(f"Error: One of the data files was not found. Please ensure both 'training_dataset_03.csv' and 'testing_dataset_03_.csv' are uploaded. Details: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == "__main__":
    main()


Training the Decision Tree Classifier...
Model training complete. Making predictions on the test data.

--- Model Evaluation ---

Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.84      0.87     11105
           1       0.09      0.15      0.11      1251

    accuracy                           0.77     12356
   macro avg       0.50      0.49      0.49     12356
weighted avg       0.82      0.77      0.79     12356


Confusion Matrix:

[[9290 1815]
 [1064  187]]
