### Model Logistic Regression with Ridge Regression

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [41]:
class LogReg:
    #inisialisasi
    def __init__(self, learning_rate=0.01, num_iterations=100, lambda_l2=0.1):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.lambda_l2 = lambda_l2
        self.weights = None
        self.bias = None
        
    #sigmoid function
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    #cost function with binary cross-entropy
    def cost_function(self, X, y):
        m = X.shape[0]
        y_hat = self.sigmoid(np.dot(X, self.weights) + self.bias)
        cost = (-1 / m) * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
        ridge_regularization = (self.lambda_l2 / (2 * m)) * np.sum(np.square(self.weights))
        cost = cost + ridge_regularization
        return cost
    
    #optimaztion with gradient descent
    def gradient_descent(self, X, y):
        m = X.shape[0]
        y_hat = self.sigmoid(np.dot(X, self.weights) + self.bias)
        dw = (1 / m) * np.dot(X.T, (y_hat - y)) + (self.lambda_l2 / m) * self.weights
        db = (1 / m) * np.sum(y_hat - y)
        self.weights = self.weights - self.learning_rate * dw
        self.bias = self.bias - self.learning_rate * db
        
    #fit function
    def fit(self, X, y):
        n_features = X.shape[1]
        self.weights = np.zeros(n_features)
        self.bias = 0
        costs = []
        for i in range(self.num_iterations):
            self.gradient_descent(X, y)
            cost = self.cost_function(X, y)
            costs.append(cost)
            if i % 50 == 0:
                print(f"Cost after iteration {i}: {cost}")
        return costs
    
    #predict function
    def predict(self, X):
        y_hat = self.sigmoid(np.dot(X, self.weights) + self.bias)
        y_pred = np.round(y_hat).astype(int)
        return y_pred

### Implement Model 

In [42]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [43]:
# split the data into features and labels
X = data.data
y = data.target

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
# standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [51]:
# Impelement model 
model = LogReg(learning_rate=0.1, num_iterations=300, lambda_l2=0.01)

# train the model on the training data
training = model.fit(X_train, y_train)

Cost after iteration 0: 0.5275331668594314
Cost after iteration 50: 0.1331572244348908
Cost after iteration 100: 0.10639096221781891
Cost after iteration 150: 0.09453071848806319
Cost after iteration 200: 0.0874684367248958
Cost after iteration 250: 0.08264524345471312


In [52]:
# predict model on data test
y_pred = model.predict(X_test)
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 0])

In [53]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)

Accuracy: 0.9824561403508771
Precision: 0.9859154929577465
Recall: 0.9859154929577465
F1 score: 0.9859154929577465
