In [48]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import copy

In [49]:
dataset = pd.read_csv('/content/tennis.csv')
X = dataset.iloc[:, 1:].values
# print(X)
attribute = ['outlook', 'temp', 'humidity', 'wind']

In [50]:
class Node(object):
    def __init__(self):
        self.value = None  #attribute
        self.decision = None  #high/normal/sunny/windy?
        self.childs = None    

In [40]:
def findEntropy(data, rows): #Function to find the overall entropy of final yes/no
    yes = 0
    no = 0
    ans = -1
    idx = len(data[0]) - 1
    entropy = 0
    for i in rows:
        if data[i][idx] == 'Yes':  #counter for yes/no.
            yes = yes + 1
        else:
            no = no + 1

    x = yes/(yes+no)
    y = no/(yes+no)
    if x != 0 and y != 0:
        entropy = -1 * (x*math.log2(x) + y*math.log2(y))  #entropy calculation formula
    if x == 1:
        ans = 1
    if y == 1:
        ans = 0
    return entropy, ans  #return entropy, and ans. ans (0/1) denotes the presence of leaf node.

In [41]:
def findMaxGain(data, rows, columns):  #function to calculate the maximum info gain
    maxGain = 0
    retidx = -1
    entropy, ans = findEntropy(data, rows) #call entropy function

    if entropy == 0:  #managing extremas
        """if ans == 1:
            print("Yes")
        else:
            print("No")"""
        return maxGain, retidx, ans

    for j in columns:    #counter for each attribute element.
        mydict = {}
        idx = j
        for i in rows:
            key = data[i][idx]
            if key not in mydict:
                mydict[key] = 1
            else:
                mydict[key] = mydict[key] + 1
        gain = entropy

        print(mydict)
        for key in mydict:    #counter for each mydict element which yeilds either yes/no in final play decision.
            yes = 0
            no = 0
            for k in rows:
                if data[k][j] == key:
                    if data[k][-1] == 'Yes':
                        yes = yes + 1
                    else:
                        no = no + 1
            # print(yes, no)
            x = yes/(yes+no)
            y = no/(yes+no)
            # print(x, y)
            if x != 0 and y != 0:
                gain += (mydict[key] * (x*math.log2(x) + y*math.log2(y)))/14    #gain calculation
        # print(gain)
        if gain > maxGain:  #update modified gain
            # print("hello")
            maxGain = gain
            retidx = j

    return maxGain, retidx, ans   #return max gain, corresponding index id, and ans to denote if it is a leaf node or not

In [42]:
def buildTree(data, rows, columns): # This is a recursice function

    maxGain, idx, ans = findMaxGain(X, rows, columns) #idx = attribute
    root = Node()  #object instance of node class
    root.childs = []
    # print(maxGain)

    if maxGain == 0:   #check for leaf node
        if ans == 1:
            root.value = 'Yes'
        else:
            root.value = 'No'
        return root

    root.value = attribute[idx]   #we have top-most important attribute value now, calculated from findmaxgain()
    mydict = {}
    for i in rows:
        key = data[i][idx]   #recalculate the corresponding probabilities, given a particular attribute at parent node. 
        if key not in mydict:
            mydict[key] = 1
        else:
            mydict[key] += 1

    newcolumns = copy.deepcopy(columns)  #for creating a copy of the columns by value.
    newcolumns.remove(idx)     #remove the top parent node from the newcolums
    for key in mydict:          #append the rows for the corresponding newcolumns.
        newrows = []
        for i in rows:
            if data[i][idx] == key:
                newrows.append(i)
        # print(newrows)
        temp = buildTree(data, newrows, newcolumns)  #recursively call this function again to find the next important attribute and so on.
        temp.decision = key    
        root.childs.append(temp)
    return root

In [43]:
def traverse(root): #again, a recursive function to print all the elements of the tree structure created above.
    print('Decision:     ',root.decision)    
    print('Value:       ',root.value)

    n = len(root.childs)
    if n > 0:
        for i in range(0, n):
            traverse(root.childs[i])


def calculate():  
    rows = [i for i in range(0, 14)]  #initiate with number of rows
    columns = [i for i in range(0, 4)]  #initiate with number of attribute columns
    root = buildTree(X, rows, columns)   #entire Decision tree is built by calling this function.
    root.decision = 'Start'   # Begin!!
    traverse(root)  # Traverse through the tree and print all the values.

In [44]:

calculate() #code starts from here. Invokes the calculate function

{'Sunny': 5, 'Overcast': 4, 'Rain': 5}
{'Hot': 4, 'Mild': 6, 'Cool': 4}
{'High': 7, 'Normal': 7}
{'Weak': 8, 'Strong': 6}
{'Hot': 2, 'Mild': 2, 'Cool': 1}
{'High': 3, 'Normal': 2}
{'Weak': 3, 'Strong': 2}
{'Mild': 3, 'Cool': 2}
{'High': 2, 'Normal': 3}
{'Weak': 3, 'Strong': 2}
Decision:      Start
Value:        outlook
Decision:      Sunny
Value:        humidity
Decision:      High
Value:        No
Decision:      Normal
Value:        Yes
Decision:      Overcast
Value:        Yes
Decision:      Rain
Value:        wind
Decision:      Weak
Value:        Yes
Decision:      Strong
Value:        No
