In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [23]:
df = pd.read_csv('./dataset/Social_Network_Ads.csv')

In [24]:
X = df.iloc[:,:-1].values
Y = df.iloc[:, -1].values

In [25]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state= 0)

In [26]:
X_train.shape

(300, 2)

In [27]:
Y_train.shape

(300,)

In [28]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [29]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [30]:
# z = 4
# h = sigmoid(4)
# print(h)

In [32]:
# Initialize weights and bias
theta = np.zeros(X.shape[1])

In [37]:
def cost_function(X, Y, theta):
    m = len(Y)
    y_pred = sigmoid(np.dot(X, theta))
    cost = -1/m * np.sum(Y * np.log(y_pred) + (1 - Y) * np.log(1 - y_pred))
    return cost

In [41]:
def gradient_descent(X, Y, theta, alpha, iterations):
    m = len(Y)
    costs = []
    
    for _ in range(iterations):
        y_pred = sigmoid(np.dot(X, theta))
        gradient = np.dot(X.T, (y_pred - Y))/ m
        theta -= alpha * gradient
        costs.append(cost_function(X, Y, theta))
    return theta, costs

In [42]:
alpha = 0.01
iterations = 1000

In [43]:
theta, costs = gradient_descent(X_train, Y_train, theta, alpha, iterations)

In [52]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [53]:
predictions = np.round(sigmoid(np.dot(X_test, theta)))

In [54]:
print(predictions)

[0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 1.
 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1.
 0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0.
 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0.
 0. 1. 1. 1.]


In [55]:
# Calculate accuracy
accuracy = np.mean(predictions == Y_test)
print("Accuracy:", accuracy)

# Confusion matrix
confusion_matrix = np.zeros((2, 2))
for i in range(2):
    for j in range(2):
        confusion_matrix[i, j] = np.sum((Y_test == i) & (predictions == j))

print("Confusion Matrix:")
print(confusion_matrix)

Accuracy: 0.84
Confusion Matrix:
[[54. 14.]
 [ 2. 30.]]
