In [1]:
import math
from collections import Counter

In [2]:
def entropy(data, target_attr):
    """Calculate the entropy of a dataset for a target attribute."""
    values = [example[target_attr] for example in data]
    value_counts = Counter(values)
    total = len(data)
    
    ent = 0.0
    for count in value_counts.values():
        probability = count / total
        ent -= probability * math.log2(probability)
    return ent

def information_gain(data, attr, target_attr):
    """Calculate the information gain of an attribute."""
    total_entropy = entropy(data, target_attr)
    attr_values = [example[attr] for example in data]
    value_counts = Counter(attr_values)
    
    weighted_entropy = 0.0
    total = len(data)
    
    for val in value_counts:
        subset = [example for example in data if example[attr] == val]
        weighted_entropy += (len(subset) / total) * entropy(subset, target_attr)
    
    return total_entropy - weighted_entropy

def majority_class(data, target_attr):
    """Return the most common target value in the data."""
    values = [example[target_attr] for example in data]
    return Counter(values).most_common(1)[0][0]

def id3(data, attributes, target_attr):
    """Build the decision tree using ID3."""
    # Base case 1: If all examples have same target value, return it
    targets = [example[target_attr] for example in data]
    if len(set(targets)) == 1:
        return targets[0]

    # Base case 2: If no more attributes to split on, return majority
    if not attributes:
        return majority_class(data, target_attr)

    # Choose best attribute based on info gain
    best_attr = max(attributes, key=lambda attr: information_gain(data, attr, target_attr))

    # Create the tree as a dictionary
    tree = {best_attr: {}}
    attr_values = set(example[best_attr] for example in data)

    for val in attr_values:
        subset = [example for example in data if example[best_attr] == val]
        if not subset:
            tree[best_attr][val] = majority_class(data, target_attr)
        else:
            remaining_attrs = [attr for attr in attributes if attr != best_attr]
            tree[best_attr][val] = id3(subset, remaining_attrs, target_attr)
    
    return tree

def print_tree(tree, indent=""):
    """Recursively print the decision tree."""
    if isinstance(tree, dict):
        for attr, branches in tree.items():
            for val, subtree in branches.items():
                print(f"{indent}{attr} = {val}:")
                print_tree(subtree, indent + "  ")
    else:
        print(f"{indent}→ {tree}")

In [3]:
# Example dataset
dataset = [
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': False, 'Play': 'No'},
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': True, 'Play': 'No'},
    {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': True, 'Play': 'No'},
    {'Outlook': 'Overcast', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': True, 'Play': 'Yes'},
    {'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': False, 'Play': 'No'},
    {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Windy': True, 'Play': 'Yes'},
    {'Outlook': 'Overcast', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': True, 'Play': 'Yes'},
    {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rain', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': True, 'Play': 'No'},
]

attributes = ['Outlook', 'Temperature', 'Humidity', 'Windy']
target_attr = 'Play'

tree = id3(dataset, attributes, target_attr)
print("Decision Tree:")
print_tree(tree)

Decision Tree:
Outlook = Rain:
  Windy = False:
    → Yes
  Windy = True:
    → No
Outlook = Overcast:
  → Yes
Outlook = Sunny:
  Humidity = Normal:
    → Yes
  Humidity = High:
    → No
