In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

# Read the data
Data = pd.read_csv("/content/Iris.csv")
Data
# Separate features and target variable
X = Data.drop("Species", axis=1)
y = Data["Species"]

# Encode the target variable
y_encoded = LabelEncoder().fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.33, random_state=4)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

K_cent = 8
km = KMeans(n_clusters=K_cent, max_iter=100)
km.fit(X_train_scaled)
cent = km.cluster_centers_

# Compute sigma
max_distance = 0
for i in range(K_cent):
    for j in range(K_cent):
        distance = np.linalg.norm(cent[i] - cent[j])
        if distance > max_distance:
            max_distance = distance

sigma = max_distance / np.sqrt(2 * K_cent)

# Compute Gaussian kernel matrix for training data
row_train = X_train_scaled.shape[0]
column_train = K_cent
G_train = np.empty((row_train, column_train), dtype=float)
for i in range(row_train):
    for j in range(column_train):
        dist = np.linalg.norm(X_train_scaled[i] - cent[j])
        G_train[i][j] = np.exp(-np.power(dist, 2) / np.power(2 * sigma, 2))

# Compute Gaussian kernel matrix for testing data
row_test = X_test_scaled.shape[0]
column_test = K_cent
G_test = np.empty((row_test, column_test), dtype=float)
for i in range(row_test):
    for j in range(column_test):
        dist = np.linalg.norm(X_test_scaled[i] - cent[j])
        G_test[i][j] = np.exp(-np.power(dist, 2) / np.power(2 * sigma, 2))

# Compute pseudo-inverse of GTG
GTG = np.dot(G_train.T, G_train)
GTG_inv = np.linalg.inv(GTG)
fac = np.dot(GTG_inv, G_train.T)

# Compute weights
W = np.dot(fac, y_train)

# Predict on test data
prediction = np.dot(G_test, W)
prediction = 0.5 * (np.sign(prediction - 0.5) + 1)

# Calculate accuracy
score = accuracy_score(prediction, y_test)
print("Accuracy Score:", score)


Accuracy Score: 0.7


