In [18]:
# Import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# Define the file ID from the Google Drive sharing link
file_id = '1Gb3AiXXEKhkaZZtIXxqcV0jIdVckcrLB'

# Download the dataset
download_link = f'https://drive.google.com/uc?id={file_id}'
dataset = pd.read_csv(download_link)

# Define the number of splits as per user input
num_splits = int(input("Enter the number of splits for Decision Trees: "))

# Extract categorical columns for encoding
categorical_columns = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# Initialize a list to store individual Decision Trees
individual_trees = []

# Split and train Decision Trees
for i in range(num_splits):
    train_data, test_data = train_test_split(dataset, test_size=0.3, random_state=i)

    # Perform label encoding for categorical columns
    label_encoder = LabelEncoder()
    for col in categorical_columns:
        train_data[col] = label_encoder.fit_transform(train_data[col])
        test_data[col] = label_encoder.transform(test_data[col])

    X_train = train_data.drop('HeartDisease', axis=1)
    y_train = train_data['HeartDisease']
    X_test = test_data.drop('HeartDisease', axis=1)
    y_test = test_data['HeartDisease']

    # Create and train a Decision Tree
    decision_tree = DecisionTreeClassifier(random_state=42)
    decision_tree.fit(X_train, y_train)

    # Append the Decision Tree to the list
    individual_trees.append(decision_tree)

    # Calculate and print accuracy for the current Decision Tree
    y_pred = decision_tree.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy for Decision Tree {i + 1}: {accuracy}")

# Now, let's create a Random Forest classifier using the individual Decision Trees
random_forest = RandomForestClassifier(n_estimators=num_splits, random_state=42)

# Fit the Random Forest using the individual Decision Trees
random_forest.fit(X_train, y_train)

# Make predictions and calculate accuracy for the Random Forest
rf_predictions = random_forest.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)

# Calculate and display the confusion matrix for the Random Forest
conf_matrix = confusion_matrix(y_test, rf_predictions)

# Print the results
print(f"Accuracy of Random Forest: {rf_accuracy}")
print("Confusion Matrix for Random Forest:")
print(conf_matrix)


Enter the number of splits for Decision Trees: 6
Accuracy for Decision Tree 1: 0.782608695652174
Accuracy for Decision Tree 2: 0.7753623188405797
Accuracy for Decision Tree 3: 0.8007246376811594
Accuracy for Decision Tree 4: 0.8007246376811594
Accuracy for Decision Tree 5: 0.8260869565217391
Accuracy for Decision Tree 6: 0.7717391304347826
Accuracy of Random Forest: 0.8405797101449275
Confusion Matrix for Random Forest:
[[101  18]
 [ 26 131]]
