In [79]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold


In [80]:
iris = load_iris()
X = iris.data
y = iris.target

In [81]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [82]:
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]   #to create an identity matrix of shape(num_classes, num_classes)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def softmax(z):
    exp_z = np.exp(z)
    sum_exp_z = np.sum(exp_z, axis=0)
    softmax_probs = exp_z / sum_exp_z
    return softmax_probs


def initialize_parameters(dim):
    w = np.zeros((dim, 1))
    b = 0
    return w, b

def propagate(w, b, X, y):
    m = X.shape[1]
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    epsilon = 1e-8  # A small constant to avoid division by zero
    cost = (-1/m) * np.sum(y * np.log(A + epsilon) +(1 - y) * np.log(1 - A + epsilon))
    dw = (1/m) * np.dot(X, (A - y).T)
    db = (1/m) * np.sum(A - y)
    grads = {"dw": dw, "db": db}
    return grads, cost

def optimize(w, b, X, y, num_iterations, learning_rate):
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, y)
        dw = grads["dw"]
        db = grads["db"]
        w -= learning_rate * dw
        b -= learning_rate * db
    params = {"w": w, "b": b}
    return params, costs

def predict(w, b, X):
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    return A

In [83]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [84]:
kf = KFold(n_splits=5,random_state=None)
print(kf)

accuracies = []
best_accuracy = 0
best_fold = 0

for fold, (train_index, test_index) in enumerate(kf.split(X_train)):
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]
    models = []

    for class_label in np.unique(y_train_fold):
        y_binary = (y_train_fold == class_label).astype(int)
        w, b = initialize_parameters(X_train_fold.shape[1])
        num_iterations = 10000
        learning_rate = 0.06
        params, _ = optimize(w, b, X_train_fold.T, y_binary.reshape(1, -1), num_iterations, learning_rate)
        models.append(params)

    z_values = np.zeros((len(models), X_test_fold.shape[0]))
    for i, params in enumerate(models):
        w, b = params["w"], params["b"]
        z_values[i] = np.dot(w.T, X_test_fold.T) + b

    class_probabilities = softmax(z_values)
    predictions = np.argmax(class_probabilities, axis=0)
    accuracy = (predictions == y_test_fold).mean()
    accuracies.append(accuracy)
    print(f"accuracy of {fold} : {accuracy}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_fold = fold
        
print(f"Best Fold: {best_fold} & Accuracy: {best_accuracy}")

KFold(n_splits=5, random_state=None, shuffle=False)
accuracy of 0 : 0.9629629629629629
accuracy of 1 : 0.7407407407407407
accuracy of 2 : 0.9259259259259259
accuracy of 3 : 0.8888888888888888
accuracy of 4 : 0.9259259259259259
Best Fold: 0 & Accuracy: 0.9629629629629629
