In [6]:
import pandas as pd
import numpy as np
import math

class Node:
    def __init__(self):
        self.value = None
        self.children = {}
        self.isLeaf = False
        self.pred = None

def entropy(examples):
    pos = 0.0
    neg = 0.0
    for _, row in examples.iterrows():
        if row["PlayTennis"] == "yes":
            pos += 1
        else:
            neg += 1
    if pos == 0.0 or neg == 0.0:
        return 0.0
    else:
        p = pos / (pos + neg)
        n = neg / (pos + neg)
        return -(p * math.log(p, 2) + n * math.log(n, 2))

def info_gain(examples, attr):
    uniq = np.unique(examples[attr])
    gain = entropy(examples)
    for u in uniq:
        subdata = examples[examples[attr] == u]
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(examples))) * sub_e
    return gain

def ID3(examples, attrs):
    root = Node()

    if len(np.unique(examples["PlayTennis"])) == 1:
        root.isLeaf = True
        root.pred = examples["PlayTennis"].iloc[0]
        return root

    if len(attrs) == 0:
        root.isLeaf = True
        root.pred = examples["PlayTennis"].value_counts().idxmax()
        return root

    max_gain = -1
    max_feat = ""
    for attr in attrs:
        gain = info_gain(examples, attr)
        if gain > max_gain:
            max_gain = gain
            max_feat = attr

    root.value = max_feat
    uniq = np.unique(examples[max_feat])
    for u in uniq:
        subdata = examples[examples[max_feat] == u]
        new_attrs = attrs.copy()
        new_attrs.remove(max_feat)
        root.children[u] = ID3(subdata, new_attrs)

    return root

def printTree(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    if root.isLeaf:
        print(root.pred)
    else:
        print(root.value)
        for value, child in root.children.items():
            print("\t" * (depth + 1), value)
            printTree(child, depth + 2)

def classify(root: Node, new):
    if root.isLeaf:
        return root.pred
    else:
        value = new[root.value]
        if value in root.children:
            return classify(root.children[value], new)
        else:
            return None

data = pd.read_csv("lab3.csv")
features = [feat for feat in data]
features.remove("PlayTennis")
root = ID3(data, features)
print("Decision Tree is:")
printTree(root)
print("------------------")
#new = {"Outlook": "sunny", "Temperature": "hot", "Humidity": "normal", "Wind": "strong"}
#print("Classification:", classify(root, new))


Decision Tree is:
Outlook
	 Overcast
		Yes
	 Rainy
		Temperature
			 Cool
				Humidity
					 Normal
						Windy
							 Strong
								No
							 Weak
								Yes
			 Mild
				Humidity
					 High
						Windy
							 Strong
								No
							 Weak
								Yes
					 Normal
						Yes
	 Sunny
		Temperature
			 Cool
				Yes
			 Hot
				No
			 Mild
				Humidity
					 High
						No
					 Normal
						Yes
------------------


In [4]:
data.describe()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,PlayTennis
count,14,14,14,14,14
unique,3,3,2,2,2
top,Sunny,Mild,High,Weak,Yes
freq,5,6,7,8,9


In [7]:
data.head(14)

Unnamed: 0,Outlook,Temperature,Humidity,Windy,PlayTennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rainy,Mild,High,Weak,Yes
4,Rainy,Cool,Normal,Weak,Yes
5,Rainy,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rainy,Mild,Normal,Weak,Yes
