# Decision Tree Model

In [None]:
# import required libraries
from sklearn import tree
import pandas as pd
import os

In [None]:
# load stroke positive data subset
stroke_positive_subset = pd.read_csv("data/combined_subsets.csv")
stroke_positive_subset.head()

In [None]:
# split dataset in features and target variables

# target
target = stroke_positive_subset["stroke"]
target_names = ["no", "yes"]

# features
stroke_positive_data = stroke_positive_subset.drop(["stroke"], axis=1)
feature_names = stroke_positive_subset.columns

stroke_positive_data.head()

In [None]:
# split dataset into training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(stroke_positive_data, target, random_state=42)

In [None]:
# create Decision Tree Classifier object
clf = tree.DecisionTreeClassifier()

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)

In [None]:
print(f"Accuracy using the Decision Tree Model for training: {train_score}")
print(f"Accuracy using the Decision Tree Model for testing: {test_score}")

### Used *Entropy* Criterion

In [None]:
# create Decision Tree Classifier object with entropy
clf = tree.DecisionTreeClassifier(criterion="entropy")

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

entropy_train_score = train_score = clf.score(X_train, y_train)
entropy_test_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with entropy criterion for training: {entropy_train_score}")
print(f"Accuracy using the Decision Tree Model with entropy criterion for testing: {entropy_test_score}")

### Used *Entropy* Criterion with *Best* Splitter

In [None]:
# create Decision Tree Classifier object with entropy
clf = tree.DecisionTreeClassifier(criterion="entropy", splitter="best")

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

entropy_best_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with entropy criterion and best split: {entropy_best_score}")

### Used *Entropy* Criterion with *Random* Splitter

In [None]:
# create Decision Tree Classifier object with entropy
clf = tree.DecisionTreeClassifier(criterion="entropy", splitter="random")

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

entropy_random_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with entropy criterion and random split: {entropy_random_score}")

### Used *Entropy* Criterion with *Max Depth*

In [None]:
# create Decision Tree Classifier object with entropy
clf = tree.DecisionTreeClassifier(criterion="entropy", max_depth=3)

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

entropy_max_depth3_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with entropy criterion and max depth of 3: {entropy_max_depth3_score}")

### Used *Gini* Criterion

In [None]:
# create Decision Tree Classifier object with gini
clf = tree.DecisionTreeClassifier(criterion="gini")

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

gini_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with gini criterion: {gini_score}")

### Used *Gini* Criterion with *Best* Splitter

In [None]:
# create Decision Tree Classifier object with gini and best splitter
clf = tree.DecisionTreeClassifier(criterion="gini", splitter="best")

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

gini_best_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with gini criterion and best split: {gini_best_score}")

### Used *Gini* Criterion with *Random* Splitter

In [None]:
# create Decision Tree Classifier object with gini and random splitter
clf = tree.DecisionTreeClassifier(criterion="gini", splitter="random")

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

gini_random_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with gini criterion and random split: {gini_random_score}")

### Used *Gini* Criterion with *Max Depth*

In [None]:
# create Decision Tree Classifier object with gini and max depth of 3
clf = tree.DecisionTreeClassifier(criterion="gini", max_depth=3)

# train the Decision Tree Classifier
clf = clf.fit(X_train, y_train)

gini_max_depth3_score = clf.score(X_test, y_test)

print(f"Accuracy using the Decision Tree Model with gini criterion and max depth of 3: {gini_max_depth3_score}")