In [20]:
from anytree import Node, RenderTree
from anytree.exporter import DotExporter
import pandas as pd

# defaults
adjacency_path = ("flightdelay.csv")
target_column = 'DEP_DEL15'
learning_set = []
testing_set = []

# input
input_data = [1, 7, '0800-0859', 2, 1, 25, 143, 'Southwest Airlines Co.', 13056, 107363, 5873, 1903352, 13382999, 6.178236301460919e-05,
              9.889412309998219e-05, 8, 'McCarran International', 36.08, -115.152, 'NONE', 0, 0, 0, 65, 2.91]

# classes
class Tree:
    def __init__(self, label, children=None):
        self.label = label
        self.children = {}
        if children is not None:
            for child in children:
                self.add_child(child)

    def add_child(self, key, node):
        assert isinstance(node, Tree), 'A child must be a Tree type'
        self.children[key] = node
    
    def to_anytree(self, parent=None):
        root = Node(self.label, parent=parent)
        for key, child in self.children.items():
            child.to_anytree(root)
        return root

# end classes

def read_data(): # takes ~ 10 seconds 
    return pd.read_csv(adjacency_path, low_memory = False) 

def divide_datasets(data, target_column): # will divide the dataset into a learning set and a test set
    x = data.drop(columns=[target_column])
    y = data[target_column]
    return x, y

def get_attributes(data): # returns the tree attributes <=> data columns \ target column
    attributes = data.columns.drop(target_column)
    return attributes

def pre_proccess(ratio): # reads the data and divides it into a learning set and a testing set and returns the attributes (columns \ target)
    assert 0 <= ratio <= 1, 'Ratio must be between 0 and 1'
    data = read_data()
    learning_set = data.sample(frac = ratio, random_state = 42)
    testing_set = data.drop(learning_set.index)
    attributes = get_attributes(learning_set)
    return learning_set, testing_set, attributes

def plurality_value(exs): # returns most common value in the target column (is the flight delayed)
    target_values = exs[target_column]
    return target_values.mode()[0]

def same_classification(exs): # will return a tuple of (T, classification) or (F, None)
    target_values = exs[target_column]
    if len(target_values.unique()) == 1:
        return True, target_values.iloc[0]
    else:
        return False, None

def importance(attribute, examples): # entropy
    return len(examples[attribute].unique())

def get_values(examples, attribute): # returns the unique values under a specific attribute in a df
    return examples[attribute].unique()

def decision_tree_learning(examples, attributes, parent_examples): # builds the decision tree base on decision_tree_learning algo.
    if examples.empty:
        return Tree(label=plurality_value(parent_examples))
    elif same_classification(examples)[0]: 
        return Tree(label=same_classification(examples)[1])
    elif len(attributes) == 0:
        return Tree(label=plurality_value(examples))
    else:
        A = max(attributes, key=lambda a: importance(a, examples))
        tree = Tree(label=A)
        for v in get_values(examples, A):
            exs = examples[examples[A] == v]
            subtree = decision_tree_learning(exs, attributes.drop(A), examples)
            tree.add_child(f'{A} = {v}', subtree)
        return tree

def build_tree(ratio):
    global learning_set, testing_set
    learning_set, testing_set, attributes = pre_proccess(ratio)
    tree = decision_tree_learning(learning_set, attributes, None)
    return tree

def tree_error(k):
    pass

def is_late(row_input):
    pass

if __name__ == '__main__':
    tree = build_tree(ratio = 0.0001)
    root = tree.to_anytree()
    for pre, fill, node in RenderTree(root):
        print("%s%s" % (pre, node.name))
    
    DotExporter(root).to_picture("decision_tree.png")

AIRLINE_AIRPORT_FLIGHTS_MONTH
├── 0
├── 0
├── 0
├── DAY_OF_WEEK
│   ├── 1
│   └── 0
├── DAY_OF_WEEK
│   ├── 0
│   └── 1
├── 0
├── 1
├── 0
├── 0
├── 0
├── 1
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── DAY_OF_WEEK
│   ├── 0
│   └── 1
├── 1
├── 0
├── 0
├── 1
├── 0
├── 0
├── 0
├── 0
├── 0
├── 1
├── 0
├── 1
├── 0
├── 0
├── 1
├── 0
├── 0
├── 0
├── 1
├── 1
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 1
├── 0
├── 0
├── 0
├── 0
├── 1
├── DEP_TIME_BLK
│   ├── 0
│   └── 1
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 1
├── 1
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
├── 0
└── 0


FileNotFoundError: [Errno 2] No such file or directory: 'dot'