<a href="https://colab.research.google.com/github/xeeteex/Data-Mining-ACHS/blob/main/DWDM_lab3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Lab 3
1. Write a program to implement ID3.
(use the attached laptop_buy_data.csv)


In [2]:
import pandas as pd
import math
from collections import Counter

# Load CSV file
def load_csv(file_path):
    return pd.read_csv(file_path)

# Calculate entropy
def entropy(data, target_attr):
    values = data[target_attr]
    freq = Counter(values)
    ent = 0.0
    for f in freq.values():
        p = f / len(data)
        ent -= p * math.log2(p)
    return ent

# Calculate information gain
def info_gain(data, attr, target_attr):
    total_entropy = entropy(data, target_attr)
    vals = data[attr].unique()
    weighted_entropy = 0.0
    for val in vals:
        subset = data[data[attr] == val]
        weight = len(subset) / len(data)
        weighted_entropy += weight * entropy(subset, target_attr)
    return total_entropy - weighted_entropy

# Choose the best attribute
def choose_best_attr(data, attributes, target_attr):
    best_gain = -1
    best_attr = None
    for attr in attributes:
        gain = info_gain(data, attr, target_attr)
        if gain > best_gain:
            best_gain = gain
            best_attr = attr
    return best_attr

# Return the majority class
def majority_class(data, target_attr):
    return data[target_attr].mode()[0]

# Recursive ID3 algorithm
def id3(data, attributes, target_attr):
    unique_classes = data[target_attr].unique()

    # Base case 1: if all examples are of same class
    if len(unique_classes) == 1:
        return unique_classes[0]

    # Base case 2: no more attributes
    if len(attributes) == 0:
        return majority_class(data, target_attr)

    # Choose the best attribute
    best_attr = choose_best_attr(data, attributes, target_attr)
    tree = {best_attr: {}}

    for val in data[best_attr].unique():
        subset = data[data[best_attr] == val]
        if subset.empty:
            tree[best_attr][val] = majority_class(data, target_attr)
        else:
            new_attrs = [attr for attr in attributes if attr != best_attr]
            tree[best_attr][val] = id3(subset, new_attrs, target_attr)

    return tree

# Nicely print the decision tree
def print_tree(tree, indent=""):
    if not isinstance(tree, dict):
        print(indent + "➤ " + str(tree))
        return
    for attr, branches in tree.items():
        for val, subtree in branches.items():
            print(f"{indent}[{attr} = {val}]")
            print_tree(subtree, indent + "  ")

# Main function
if __name__ == "__main__":
    # Path to your CSV
    file_path = "/content/laptop_buy_data.csv"
    df = load_csv(file_path)

    target_attr = 'Class'  # Change if your label column is different
    attributes = [col for col in df.columns if col != target_attr]

    # Build and print the decision tree
    tree = id3(df, attributes, target_attr)
    print("Decision Tree:")
    print_tree(tree)


Decision Tree:
[Age = Youth]
  [Student = Yes]
    ➤ Buy
  [Student = No]
    [Credit_Rating = Excellent]
      ➤ No
    [Credit_Rating = Fair]
      ➤ Buy
[Age = Middle_Aged]
  [Income = Low]
    [Credit_Rating = Excellent]
      ➤ No
    [Credit_Rating = Fair]
      [Student = Yes]
        ➤ Buy
  [Income = Medium]
    ➤ No
  [Income = High]
    ➤ No
[Age = Senior]
  [Credit_Rating = Fair]
    ➤ No
  [Credit_Rating = Excellent]
    [Income = Low]
      ➤ Buy
    [Income = High]
      [Student = No]
        ➤ Buy
    [Income = Medium]
      ➤ Buy


2. Write a program to implement Naive Bayesian algorithm.
(use the attached laptop_buy_data.csv)


In [3]:
import pandas as pd
from collections import defaultdict, Counter

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probs = {}              # P(Class)
        self.cond_probs = defaultdict(dict)  # P(Attribute=value | Class)

    def fit(self, data, target_attr):
        total_count = len(data)
        self.attributes = [col for col in data.columns if col != target_attr]

        # Calculate prior probabilities P(Class)
        class_counts = data[target_attr].value_counts()
        self.class_probs = {cls: count / total_count for cls, count in class_counts.items()}

        # Calculate conditional probabilities P(Attribute=value | Class)
        for attr in self.attributes:
            for cls in class_counts.index:
                subset = data[data[target_attr] == cls]
                value_counts = subset[attr].value_counts()
                total_cls = len(subset)
                for val in data[attr].unique():
                    # Apply Laplace smoothing
                    count = value_counts.get(val, 0)
                    prob = (count + 1) / (total_cls + len(data[attr].unique()))
                    self.cond_probs[(attr, val)][cls] = prob

    def predict(self, instance):
        posteriors = {}
        for cls in self.class_probs:
            prob = self.class_probs[cls]
            for attr in self.attributes:
                val = instance.get(attr)
                prob *= self.cond_probs.get((attr, val), {}).get(cls, 1e-6)  # handle unseen values
            posteriors[cls] = prob
        return max(posteriors, key=posteriors.get)

    def predict_all(self, data):
        return [self.predict(row) for _, row in data.iterrows()]


if __name__ == "__main__":
    # Load dataset
    df = pd.read_csv("/content/laptop_buy_data.csv")

    # Instantiate and train the model
    nb = NaiveBayesClassifier()
    nb.fit(df, target_attr='Class')

    # Test prediction
    test_instance = {
        'Age': 'Senior',
        'Income': 'Medium',
        'Student': 'No',
        'Credit_Rating': 'Excellent'
    }

    prediction = nb.predict(test_instance)
    print("Prediction for test instance:", prediction)


Prediction for test instance: No


3. Write a to implement classification by backpropagation on following data.

X1
X2
t

-1
-1
-1

-1
1
1

1
-1
1

1
1
-1




In [4]:
import numpy as np

# Sigmoid activation and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Input data (4 samples, 2 features)
X = np.array([
    [-1, -1],
    [-1,  1],
    [ 1, -1],
    [ 1,  1]
])

# Target values reshaped to column vector
y = np.array([[-1], [1], [1], [-1]])

# Scale y to range [0, 1] to match sigmoid output
y_scaled = (y + 1) / 2

# Seed for reproducibility
np.random.seed(1)

# Initialize weights and biases
input_size = 2
hidden_size = 2
output_size = 1

# Weights
W1 = 2 * np.random.rand(input_size, hidden_size) - 1
b1 = np.zeros((1, hidden_size))

W2 = 2 * np.random.rand(hidden_size, output_size) - 1
b2 = np.zeros((1, output_size))

# Training parameters
epochs = 10000
learning_rate = 0.1

# Training loop
for epoch in range(epochs):
    # --- Forward Pass ---
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    # --- Backward Pass ---
    error = y_scaled - a2
    d_output = error * sigmoid_derivative(a2)

    d_hidden = d_output.dot(W2.T) * sigmoid_derivative(a1)

    # --- Weight Update ---
    W2 += a1.T.dot(d_output) * learning_rate
    b2 += np.sum(d_output, axis=0, keepdims=True) * learning_rate

    W1 += X.T.dot(d_hidden) * learning_rate
    b1 += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

# Final prediction
output = sigmoid(np.dot(sigmoid(np.dot(X, W1) + b1), W2) + b2)
predicted = (output > 0.5).astype(int)
true_label = (y_scaled > 0.5).astype(int)

print("Predicted outputs (0 or 1):\n", predicted)
print("Actual outputs:\n", true_label)


Predicted outputs (0 or 1):
 [[0]
 [1]
 [1]
 [0]]
Actual outputs:
 [[0]
 [1]
 [1]
 [0]]
