In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.modelselection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

Load datasets

In [None]:
train_url = "https://raw.githubusercontent.com/zetomic/dataset/main/train.csv"
test_url = "https://raw.githubusercontent.com/zetomic/dataset/main/test.csv"

train_data = pd.read_csv(train_url)
test_data = pd.read_csv(test_url)



Preprocessing: Drop non-numeric columns and rows with NaN values for simplicity

In [None]:
train_data = train_data.select_dtypes(include=['float64', 'int64']).dropna()
test_data = test_data.select_dtypes(include=['float64', 'int64']).dropna()



Splitting train data into features and target

In [None]:

X = train_data.drop('hospital_death', axis=1)
y = train_data['hospital_death']



Train DecisionTree model

In [None]:
clf = DecisionTreeClassifier()
clf.fit(X, y)


Test the model

In [None]:
X_test = test_data.drop('hospital_death', axis=1)
y_test = test_data['hospital_death']
y_pred = clf.predict(X_test)


Calculate accuracy

In [None]:

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")



Feature importance

In [None]:

importances = clf.feature_importances
features = X.columns
plt.barh(features, importances)
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.show()



Visualize the Decision Tree

In [None]:

plt.figure(figsize=(20,10))
plot_tree(clf, filled=True, feature_names=features, class_names=['Alive', 'Deceased'], rounded=True)
plt.show()