In [1]:
import pandas as pd 
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

iris = load_iris()
X, y = iris.data, iris.target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._fit(X, y, depth=0)

    def _fit(self, X, y, depth):
        unique_classes, counts = np.unique(y, return_counts=True)

        # If only one class or max depth reached, create a leaf node
        if len(unique_classes) == 1 or (self.max_depth is not None and depth == self.max_depth):
            return {'class': unique_classes[0], 'count': counts[0]}

        # Find the best split
        best_split = self._find_best_split(X, y)

        if best_split is None:
            return {'class': unique_classes[np.argmax(counts)], 'count': counts[0]}

        feature_index, threshold = best_split
        left_mask = X[:, feature_index] <= threshold
        right_mask = ~left_mask

        # Recursively build the left and right subtrees
        left_subtree = self._fit(X[left_mask], y[left_mask], depth + 1)
        right_subtree = self._fit(X[right_mask], y[right_mask], depth + 1)

        return {'feature_index': feature_index, 'threshold': threshold,
                'left': left_subtree, 'right': right_subtree}

    def _find_best_split(self, X, y):
        m, n = X.shape
        if m <= 1:
            return None

        num_classes = len(np.unique(y))
        if num_classes == 1:
            return None

        # Calculate the impurity before the split
        base_impurity = self._calculate_impurity(y)

        best_impurity_reduction = 0
        best_split = None

        for feature_index in range(n):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                left_mask = X[:, feature_index] <= threshold
                right_mask = ~left_mask

                if np.sum(left_mask) > 0 and np.sum(right_mask) > 0:
                    left_impurity = self._calculate_impurity(y[left_mask])
                    right_impurity = self._calculate_impurity(y[right_mask])

                    # Weighted impurity reduction
                    impurity_reduction = base_impurity - (np.sum(left_mask) / m) * left_impurity \
                                         - (np.sum(right_mask) / m) * right_impurity

                    # Update the best split if needed
                    if impurity_reduction > best_impurity_reduction:
                        best_impurity_reduction = impurity_reduction
                        best_split = (feature_index, threshold)

        return best_split

    def _calculate_impurity(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        impurity = 1 - np.sum(probabilities ** 2)
        return impurity

    def predict(self, X):
        predictions = [self._predict_single(x) for x in X]
        return np.array(predictions)

    def _predict_single(self, x, node=None):
        if node is None:
            node = self.tree

        if 'class' in node:
            return node['class']
        else:
            if x[node['feature_index']] <= node['threshold']:
                return self._predict_single(x, node['left'])
            else:
                return self._predict_single(x, node['right'])

from sklearn.metrics import accuracy_score

# Create and train the Decision Tree model
tree_model = DecisionTree(max_depth=3)
tree_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = tree_model.predict(X_test)
print(("The predicted value using Decision tree{}"),y_pred)
# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracyof the model:", accuracy)


The predicted value using Decision tree{} [1 0 1 1 1 0 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0]
Accuracyof the model: 0.6333333333333333


In [10]:

class SVM:
    def __init__(self, C=1.0):
        self.C = C
        self.alpha = None
        self.b = None

    def _linear_kernel(self, X1, X2):
        return np.dot(X1, X2.T)

    def _calculate_error(self, X, y, alpha, b, kernel):
        predictions = np.dot((alpha * y).T, kernel(X, X)) + b
        errors = predictions - y
        return errors

    def _select_random_pair(self, m, i):
        j = i
        while j == i:
            j = np.random.randint(m)
        return j

    def _clip_alpha(self, alpha, L, H):
        return max(L, min(alpha, H))
    
    
    def _take_step(self, i, j, X, y, alpha, b, errors, kernel):
        if i == j:
            return 0, alpha, b

        alpha_i, alpha_j = alpha[i], alpha[j]
        y_i, y_j = y[i], y[j]
        E_i, E_j = self._calculate_error(X, y, alpha, b, kernel)[i], self._calculate_error(X, y, alpha, b, kernel)[j]

    # Compute L and H
        if y_i != y_j:
            L = max(0, alpha_j - alpha_i)
            H= min(self.C, self.C + alpha_j - alpha_i)
        else:
            L = max(0, alpha_i + alpha_j - self.C)
            H = min(self.C, alpha_i + alpha_j)

        if L == H:
            return 0, alpha, b

    # Compute kernel values
        k_ij = kernel(X[i], X[i]) + kernel(X[j], X[j]) - 2 * kernel(X[i], X[j])

        if k_ij <= 0:
            return 0, alpha, b

    # Update alpha_j
        new_alpha_j = alpha_j + y_j * (E_i - E_j) / k_ij
        new_alpha_j = self._clip_alpha(new_alpha_j, L, H)

        if np.abs(new_alpha_j - alpha_j) < 1e-5:
            return 0, alpha, b

    # Update alpha_i
        new_alpha_i = alpha_i + y_i * y_j * (alpha_j - new_alpha_j)

    # Update b
        b_i = E_i + y_i * (new_alpha_i - alpha_i) * kernel(X[i], X[i]) \
              + y_j * (new_alpha_j - alpha_j) * kernel(X[i], X[j]) + b
        b_j = E_j + y_i * (new_alpha_i - alpha_i) * kernel(X[i], X[j]) \
              + y_j * (new_alpha_j - alpha_j) * kernel(X[j], X[j]) + b
        b = (b_i + b_j) / 2

    # Update alpha values
        alpha[i] = new_alpha_i
        alpha[j] = new_alpha_j

        return 1, alpha, b


    
    def fit(self, X, y, max_iter=100):
        m, n = X.shape
        self.alpha = np.zeros(m)
        self.b = 0
        self.X_train = X  # Add this line to store X_train
        self.y_train = y  # Add this line to store y_train

        kernel = self._linear_kernel

        for _ in range(max_iter):
            alpha_changed = 0
            for i in range(m):
                E_i = self._calculate_error(X, y, self.alpha, self.b, kernel)[i]
                if ((y[i] * E_i < -1e-5) and (self.alpha[i] < self.C)) or \
                      ((y[i] * E_i > 1e-5) and (self.alpha[i] > 0)):
                    j = self._select_random_pair(m, i)
                    change, self.alpha, self.b = self._take_step(i, j, X, y, self.alpha, self.b, E_i, kernel)
                    alpha_changed += change

            if alpha_changed == 0:
                break

        return self.alpha, self.b

    def predict(self, X, alpha, b):
        kernel = self._linear_kernel
        predictions = np.dot(alpha * self.y_train, kernel(X, self.X_train).T) + b

        return np.sign(predictions)

# Convert labels to binary for a two-class SVM
y_binary = np.where(y == 0, -1, 1)

# Create and train the SVM model
svm_model = SVM(C=1.0)
alpha, b = svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test, alpha, b)
print(("The predicted value using SVM{}"),y_pred)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("The Accuracy of the model:", accuracy)


The predicted value using SVM{} [-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
The Accuracy of the model: 0.0


In [11]:
class LinearRegression:
    def __init__(self):
        self.theta = None

    def fit(self, X, y):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add a bias term (intercept)
        self.theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

    def predict(self, X):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        return X_b.dot(self.theta)

# Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
print(("The predicted value using Linear Regression{}"),y_pred)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

The predicted value using Linear Regression{} [ 1.23071715 -0.04010441  2.21970287  1.34966889  1.28429336  0.02248402
  1.05726124  1.82403704  1.36824643  1.06766437  1.70031437 -0.07357413
 -0.15562919 -0.06569402 -0.02128628  1.39659966  2.00022876  1.04812731
  1.28102792  1.97283506  0.03184612  1.59830192  0.09450931  1.91807547
  1.83296682  1.87877315  1.78781234  2.03362373  0.03594506  0.02619043]
Mean Squared Error: 0.03711379440797648


In [13]:
# Define a function to calculate the Pearson correlation coefficient
def pearson_correlation(x, y):
    n = len(x)

    mean_x = np.mean(x)
    mean_y = np.mean(y)

    numerator = np.sum((x - mean_x) * (y - mean_y))
    denominator_x = np.sum((x - mean_x)**2)
    denominator_y = np.sum((y - mean_y)**2)

    correlation = numerator / np.sqrt(denominator_x * denominator_y)

    return correlation

# Calculate the Pearson correlation coefficient between two columns of the Iris dataset
feature1 = X[:, 0]  # Choose the first feature
feature2 = X[:, 1]  # Choose the second feature

correlation_coefficient = pearson_correlation(feature1, feature2)
print(f"Pearson Correlation Coefficient: {correlation_coefficient}")


Pearson Correlation Coefficient: -0.11756978413300201


In [15]:

def fisher_discriminant_ratio(X, y):
    # Calculate class means
    class_means = []
    classes = np.unique(y)
    for c in classes:
        class_means.append(np.mean(X[y == c], axis=0))
    
    # Calculate the overall mean
    overall_mean = np.mean(X, axis=0)
    
    # Calculate within-class scatter matrix (Sw) and between-class scatter matrix (Sb)
    S_within = np.zeros((X.shape[1], X.shape[1]))
    S_between = np.zeros((X.shape[1], X.shape[1]))
    for i, c_mean in zip(classes, class_means):
        # Within-class scatter matrix
        diff_within = X[y == i] - c_mean
        S_within += np.dot(diff_within.T, diff_within)
        
        # Between-class scatter matrix
        diff_between = (c_mean - overall_mean).reshape(-1, 1)
        S_between += np.dot(diff_between, diff_between.T)
    
    # Calculate eigenvalues and eigenvectors of Sw^-1 * Sb
    eigen_values, eigen_vectors = np.linalg.eig(np.dot(np.linalg.inv(S_within), S_between))
    
    # Sort eigenvalues and eigenvectors in descending order
    idx = np.argsort(eigen_values)[::-1]
    eigen_values = eigen_values[idx]
    eigen_vectors = eigen_vectors[:, idx]
    
    return eigen_vectors[:, 0]  # Return the eigenvector corresponding to the largest eigenvalue

# Example usage:
# Assuming X is your feature matrix and y is the corresponding labels
# X and y should be appropriately prepared before using this function
# eigenvector = fisher_discriminant_ratio(X, y)