## Make decision tree using by Scikit-learn

In [11]:
import monkdata as m
import numpy as np

### Data Pre-Processing

In [12]:
def dataset(data):
    value = []
    target = []

    for i in data:
        value.append(i.attribute)
        target.append(i.positive)

    values_list = [list(d.values()) for d in value]
    values = np.array(values_list)

    target = [int(i) for i in target]
    targets = np.array(target)

    return values, targets

### Build Tree

In [13]:
from sklearn.tree import DecisionTreeClassifier

In [14]:
def build_tree(data):
    values, targets = dataset(data)
    clf = DecisionTreeClassifier(criterion='entropy')
    clf = clf.fit(values, targets)
    return clf

In [15]:
def evaluate_tree(tree, test_data):
    values, targets = dataset(test_data)
    predicted = tree.predict(values)
    return sum(predicted == targets) / len(targets)

### Visualize

In [16]:
from pydotplus import graph_from_dot_data
from sklearn.tree import export_graphviz

In [17]:
def visualize_tree(tree, file_name):
    dot_data = export_graphviz(tree, out_file=None,
                         filled=True, rounded=True)
    graph = graph_from_dot_data(dot_data)
    graph.progs = {'dot': u"C:\\Program Files\\Graphviz\\bin\\dot.exe"}
    graph.write_png(file_name)

## Run the Code

In [18]:
datasets = [[m.monk1, m.monk1test], [m.monk2, m.monk2test], [m.monk3, m.monk3test]]

for i in datasets:
    tree = build_tree(i[0])
    eval = evaluate_tree(tree, i[1])
    print(eval)
    
    file_name = f'monk{datasets.index(i)+1}.png'
    visualize_tree(tree, file_name)



0.8935185185185185
0.8194444444444444
0.9004629629629629


In [19]:
for i in datasets:
    tree = build_tree(i[0])
    eval = evaluate_tree(tree, i[0])
    print(eval)

1.0
1.0
1.0
