<a href="https://colab.research.google.com/github/vindhya-sri/machine-learning-lab/blob/main/Untitled20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
class CandidateElimination:
    def __init__(self, attributes):
        self.attributes = attributes
        self.S = []  # Specific hypotheses
        self.G = []  # General hypotheses

    def initialize(self):
        # Initialize S with the most specific hypothesis
        self.S = [['0'] * len(self.attributes)]
        # Initialize G with the most general hypothesis
        self.G = [['?'] * len(self.attributes)]

    def is_consistent(self, hypothesis, example):
        for h, e in zip(hypothesis, example):
            if h != '?' and h != e:
                return False
        return True

    def update_hypotheses(self, example, target):
        if target == '1':  # Positive example
            # Update S
            new_S = []
            for s in self.S:
                if self.is_consistent(s, example):
                    new_S.append(s)
                else:
                    # Generalize s
                    new_hypothesis = s[:]
                    for i in range(len(s)):
                        if s[i] != example[i]:
                            new_hypothesis[i] = '?'
                    new_S.append(new_hypothesis)
            self.S = new_S

            # Update G
            new_G = []
            for g in self.G:
                if self.is_consistent(g, example):
                    new_G.append(g)
            self.G = new_G

        elif target == '0':  # Negative example
            # Update G
            new_G = []
            for g in self.G:
                if not self.is_consistent(g, example):
                    new_G.append(g)
                else:
                    # Specialize g
                    for i in range(len(g)):
                        if g[i] == '?' and example[i] == '1':
                            new_hypothesis = g[:]
                            new_hypothesis[i] = '0'
                            new_G.append(new_hypothesis)
            self.G = new_G

    def fit(self, training_data):
        self.initialize()
        for example, target in training_data:
            self.update_hypotheses(example, target)

    def get_hypotheses(self):
        return self.S, self.G


# Example usage
if __name__ == "__main__":
    # Define the attributes
    attributes = ['color', 'size', 'shape']

    # Create an instance of the Candidate Elimination algorithm
    ce = CandidateElimination(attributes)

    # Define training data: (example, target)
    training_data = [
        (['1', '1', '0'], '1'),  # Positive example
        (['0', '1', '0'], '0'),  # Negative example
        (['1', '0', '1'], '1'),  # Positive example
        (['0', '0', '1'], '0'),  # Negative example
    ]

    # Fit the model to the training data
    ce.fit(training_data)

    # Get the specific and general hypotheses
    S, G = ce.get_hypotheses()
    print("Specific Hypotheses (S):", S)
    print("General Hypotheses (G):", G)

Specific Hypotheses (S): [['?', '?', '?']]
General Hypotheses (G): [['?', '0', '0']]


In [2]:
import pandas as pd
import numpy as np

# Function to calculate entropy
def entropy(target):
    values, counts = np.unique(target, return_counts=True)
    probabilities = counts / len(target)
    return -np.sum(probabilities * np.log2(probabilities))

# Function to calculate information gain
def information_gain(data, attribute, target_name):
    total_entropy = entropy(data[target_name])
    values, counts = np.unique(data[attribute], return_counts=True)
    weighted_entropy = np.sum((counts[i] / np.sum(counts) *
                               entropy(data[data[attribute] == values[i]][target_name])
                               for i in range(len(values))))
    return total_entropy - weighted_entropy

# ID3 algorithm to build the decision tree
def id3(data, target_name, attributes):
    if len(np.unique(data[target_name])) == 1:
        return np.unique(data[target_name])[0]

    if len(attributes) == 0:
        return data[target_name].mode()[0]

    gains = [information_gain(data, attr, target_name) for attr in attributes]
    best_attr = attributes[np.argmax(gains)]

    tree = {best_attr: {}}
    values = np.unique(data[best_attr])

    for value in values:
        subset = data[data[best_attr] == value]
        subtree = id3(subset, target_name, [attr for attr in attributes if attr != best_attr])
        tree[best_attr][value] = subtree

    return tree

# Function to classify a new sample using the decision tree
def classify(tree, sample):
    if not isinstance(tree, dict):
        return tree  # Leaf node
    attribute = next(iter(tree))  # Get the attribute to split on
    attribute_value = sample[attribute]
    if attribute_value in tree[attribute]:
        return classify(tree[attribute][attribute_value], sample)
    else:
        return None  # Unknown attribute value

# Sample dataset
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'High', 'Normal'],
    'Windy': [False, True, False, False, False, True, True, False, True, True],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
})

# Define the target variable and attributes
target_name = 'Play'
attributes = list(data.columns[:-1])

# Build the decision tree
decision_tree = id3(data, target_name, attributes)

# Print the decision tree
print("Decision Tree:")
print(decision_tree)

# Classify a new sample
new_sample = {
    'Outlook': 'Sunny',
    'Temperature': 'Cool',
    'Humidity': 'High',
    'Windy': False
}

# Classify the new sample
result = classify(decision_tree, new_sample)
print("\nClassification Result for the new sample:", result)

Decision Tree:
{'Outlook': {'Overcast': 'Yes', 'Rainy': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}, 'Sunny': {'Windy': {np.False_: 'No', np.True_: {'Temperature': {'Hot': {'Humidity': {'High': 'No'}}}}}}}}

Classification Result for the new sample: No


  weighted_entropy = np.sum((counts[i] / np.sum(counts) *
