In [1]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split 
from scipy.stats import uniform

class EnhancedPerceptron(BaseEstimator, ClassifierMixin):
    def __init__(self, learning_rate=0.01, epochs=1000, regularization=0.01):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.regularization = regularization

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        X = np.insert(X, 0, 1, axis=1)  
        self.weights = np.zeros(X.shape[1])  

        for epoch in range(self.epochs):
            for i in range(X.shape[0]):
                net_input = np.dot(X[i], self.weights)
                output = self.sigmoid(net_input)
                error = y[i] - output

                
                regularization_term = self.regularization * self.weights
                self.weights += self.learning_rate * error * X[i] - regularization_term

    def predict(self, X):
        X = np.insert(X, 0, 1, axis=1)  
        net_input = np.dot(X, self.weights)
        output = self.sigmoid(net_input)
        return (output >= 0.5).astype(int)  

def load_preprocessed_txt(file_path):
    data = []
    labels = []
    
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()  
            parts = line.split()  
            
            label = int(parts[0])
            labels.append(1 if label == 1 else 0)  
            
            feature_vector = np.zeros(9)  
            
            for feature in parts[1:]:
                index, value = feature.split(':')
                index = int(index) - 1  
                feature_vector[index] = float(value)  
            
            data.append(feature_vector)  
    
    return np.array(data), np.array(labels)


file_path = r'F:\AUSTRALIA\EDU\UOA\TRI 4\DLF\A1\data\diabetes_scale.txt'
X_preprocessed, y_preprocessed = load_preprocessed_txt(file_path)

X_train_pre, X_test_pre, y_train_pre, y_test_pre = train_test_split(X_preprocessed, y_preprocessed, test_size=0.2, random_state=42)

param_distributions = {
    'learning_rate': uniform(0.001, 1.0),  
    'epochs': [100, 500, 1000, 2000],
    'regularization': uniform(0.01, 1.0)  
}


perceptron = EnhancedPerceptron()


random_search = RandomizedSearchCV(
    estimator=perceptron,
    param_distributions=param_distributions,
    n_iter=20,  
    cv=5,  
    scoring='accuracy',
    random_state=42
)

random_search.fit(X_train_pre, y_train_pre)

print("Best Hyperparameters (Random Search):", random_search.best_params_)

best_random_model = random_search.best_estimator_
y_pred_best_random = best_random_model.predict(X_test_pre)

print("Test Accuracy (Random Search):", accuracy_score(y_test_pre, y_pred_best_random))
print("Classification Report (Random Search):\n", classification_report(y_test_pre, y_pred_best_random))

conf_matrix = confusion_matrix(y_test_pre, y_pred_best_random)
print("Confusion Matrix (Random Search):\n", conf_matrix)

print(f"Training data shape: X_train_pre = {X_train_pre.shape}, y_train_pre = {y_train_pre.shape}")
print(f"Test data shape: X_test_pre = {X_test_pre.shape}, y_test_pre = {y_test_pre.shape}")  

Best Hyperparameters (Random Search): {'epochs': 500, 'learning_rate': 0.3864165025399161, 'regularization': 0.025966252220214196}
Test Accuracy (Random Search): 0.7337662337662337
Classification Report (Random Search):
               precision    recall  f1-score   support

           0       0.73      0.40      0.52        55
           1       0.73      0.92      0.82        99

    accuracy                           0.73       154
   macro avg       0.73      0.66      0.67       154
weighted avg       0.73      0.73      0.71       154

Confusion Matrix (Random Search):
 [[22 33]
 [ 8 91]]
Training data shape: X_train_pre = (614, 9), y_train_pre = (614,)
Test data shape: X_test_pre = (154, 9), y_test_pre = (154,)
