In [1]:
import pandas as pd
import math

In [3]:
data = pd.read_excel("/content/3rd data set.xlsx")
print(data)

     Outlook Temperature Humidity    Wind PlayTennis
0      Sunny         Hot     High    Weak         No
1      Sunny         Hot     High  Strong         No
2   Overcast         Hot     High    Weak        Yes
3       Rain        Mild     High    Weak        Yes
4       Rain        Cool   Normal    Weak        Yes
5       Rain        Cool   Normal  Strong         No
6   Overcast        Cool   Normal  Strong        Yes
7      Sunny        Mild     High    Weak         No
8      Sunny        Cool   Normal    Weak        Yes
9       Rain        Mild   Normal    Weak        Yes
10     Sunny        Mild   Normal  Strong        Yes
11  Overcast        Mild     High  Strong        Yes
12  Overcast         Hot   Normal    Weak        Yes
13      Rain        Mild     High  Strong         No


In [4]:
def entropy(target):
    values = target.value_counts()
    total = len(target)
    ent = 0
    for count in values:
        p = count / total
        ent -= p * math.log2(p)
    return ent

In [5]:
def information_gain(data, feature, target_name):
    total_entropy = entropy(data[target_name])
    values = data[feature].unique()

    weighted_entropy = 0
    for value in values:
        subset = data[data[feature] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset[target_name])

    return total_entropy - weighted_entropy

In [6]:
def id3(data, features, target_name):
    if len(data[target_name].unique()) == 1:
        return data[target_name].iloc[0]

    if len(features) == 0:
        return data[target_name].mode()[0]

    gains = {feature: information_gain(data, feature, target_name) for feature in features}
    best_feature = max(gains, key=gains.get)

    tree = {best_feature: {}}

    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        remaining_features = [f for f in features if f != best_feature]
        tree[best_feature][value] = id3(subset, remaining_features, target_name)

    return tree

In [7]:
features = list(data.columns[:-1])
target = "PlayTennis"

decision_tree = id3(data, features, target)
print("Decision Tree:")
print(decision_tree)

Decision Tree:
{'Outlook': {'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}, 'Overcast': 'Yes', 'Rain': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}
