<a href="https://colab.research.google.com/github/varekarprajwal/Fuzzylogic/blob/DecisionTree/ID3_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
# Load dataset
data = pd.read_csv("3-dataset.csv")
# Extract features and target
target = "answer"
features = [col for col in data.columns if col != target]

In [3]:
# Node class for Decision Tree
class Node:
    def __init__(self, value=None, is_leaf=False, prediction=None):
        self.value = value
        self.is_leaf = is_leaf
        self.prediction = prediction
        self.children = {}

# Calculate entropy
def entropy(examples):
    counts = examples[target].value_counts()
    total = len(examples)
    entropy_value = 0.0
    for count in counts:
        p = count / total
        entropy_value -= p * math.log(p, 2)
    return entropy_value

# Information Gain
def info_gain(examples, attr):
    total_entropy = entropy(examples)
    values = examples[attr].unique()
    weighted_entropy = 0.0

    for val in values:
        subset = examples[examples[attr] == val]
        weighted_entropy += (len(subset) / len(examples)) * entropy(subset)

    return total_entropy - weighted_entropy

In [4]:
# ID3 Algorithm
def ID3(examples, attrs):
    # If all examples have the same target value
    if len(examples[target].unique()) == 1:
        return Node(is_leaf=True, prediction=examples[target].iloc[0])

    # If no attributes left
    if not attrs:
        majority_class = examples[target].mode()[0]
        return Node(is_leaf=True, prediction=majority_class)

    # Find attribute with max information gain
    gains = {attr: info_gain(examples, attr) for attr in attrs}
    best_attr = max(gains, key=gains.get)

    root = Node(value=best_attr)

    for val in examples[best_attr].unique():
        subset = examples[examples[best_attr] == val]
        if subset.empty:
            majority_class = examples[target].mode()[0]
            root.children[val] = Node(is_leaf=True, prediction=majority_class)
        else:
            new_attrs = [a for a in attrs if a != best_attr]
            root.children[val] = ID3(subset, new_attrs)

    return root

In [5]:
# Print Tree
def print_tree(node, depth=0):
    indent = "\t" * depth
    if node.is_leaf:
        print(f"{indent}Predict -> {node.prediction}")
    else:
        print(f"{indent}[{node.value}]")
        for val, child in node.children.items():
            print(f"{indent}  ({val})")
            print_tree(child, depth + 2)


In [6]:
# Predict for a single instance
def predict(node, instance):
    if node.is_leaf:
        return node.prediction
    value = instance[node.value]
    if value in node.children:
        return predict(node.children[value], instance)
    else:
        return None  # or majority class as fallback

In [7]:
# Build tree
root = ID3(data, features)

# Print tree
print_tree(root)

# Example prediction
test_instance = data.iloc[0]
print("Prediction:", predict(root, test_instance))

[outlook]
  (sunny)
		[humidity]
		  (high)
				Predict -> no
		  (normal)
				Predict -> yes
  (overcast)
		Predict -> yes
  (rain)
		[wind]
		  (weak)
				Predict -> yes
		  (strong)
				Predict -> no
Prediction: no
