In [1]:
import pandas as pd
import math
from collections import Counter

# Create dataset
data = pd.DataFrame({
    'Outlook': ['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast','Sunny','Sunny','Rain'],
    'Temperature': ['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild'],
    'Humidity': ['High','High','High','High','Normal','Normal','Normal','High','Normal','Normal'],
    'Wind': ['Weak','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Weak','Weak'],
    'PlayTennis': ['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes']
})

# Entropy function
def entropy(col):
    counts = Counter(col)
    total = len(col)
    return -sum((count/total)*math.log2(count/total) for count in counts.values())

# Information Gain
def info_gain(data, attr, target):
    total_entropy = entropy(data[target])
    values = data[attr].unique()
    weighted_entropy = sum(
        (len(data[data[attr]==v]) / len(data)) * entropy(data[data[attr]==v][target])
        for v in values
    )
    return total_entropy - weighted_entropy

# ID3 algorithm
def id3(data, target, attributes):
    if len(set(data[target])) == 1:
        return data[target].iloc[0]
    if not attributes:
        return Counter(data[target]).most_common(1)[0][0]

    gains = {attr: info_gain(data, attr, target) for attr in attributes}
    best_attr = max(gains, key=gains.get)

    tree = {best_attr: {}}
    for value in data[best_attr].unique():
        subset = data[data[best_attr] == value]
        subtree = id3(subset, target, [a for a in attributes if a != best_attr])
        tree[best_attr][value] = subtree

    return tree

attributes = list(data.columns[:-1])
tree = id3(data, 'PlayTennis', attributes)

print("Decision Tree:")
print(tree)


Decision Tree:
{'Outlook': {'Sunny': {'Temperature': {'Hot': 'No', 'Mild': 'No', 'Cool': 'Yes'}}, 'Overcast': 'Yes', 'Rain': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}
