In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import (StandardScaler, 
                                   OneHotEncoder)

In [19]:
data = pd.read_csv("campaign_responses.csv")
data.drop('customer_id', axis=1, inplace=True)
print(data.head(10))


   age  gender  annual_income  credit_score employed marital_status  \
0   35    Male          65000           720      Yes        Married   
1   28  Female          45000           680       No         Single   
2   42    Male          85000           750      Yes        Married   
3   31  Female          55000           710      Yes         Single   
4   47    Male          95000           790      Yes        Married   
5   25  Female          38000           630       No         Single   
6   39    Male          72000           740      Yes        Married   
7   33  Female          48000           670      Yes         Single   
8   51    Male         110000           820      Yes        Married   
9   27  Female          40000           620       No         Single   

   no_of_children responded  
0               2       Yes  
1               0        No  
2               3       Yes  
3               1        No  
4               2       Yes  
5               0        No  
6       

In [20]:
# Data processing - transform numerical and categorical data
response_map = {'Yes' : 1, 'No' : 0 }

X = data.drop('responded', axis = 1)
y = data['responded'].map(response_map).astype(int)

numericalPredictors = X.select_dtypes(include = ['int','float']).columns.to_list()
categoricalPredictors = X.select_dtypes(include = ['category','object']).columns.to_list()

transformer = [('ohe', 
                OneHotEncoder(drop = 'first', 
                              handle_unknown = 'ignore', 
                              sparse_output = False), 
                categoricalPredictors), 
               
               ('scaler', 
               StandardScaler(), 
               numericalPredictors)]

preprocessor = ColumnTransformer(transformers = transformer, 
                                 remainder = 'passthrough', 
                                 n_jobs = -1, 
                                 verbose_feature_names_out = False).set_output(transform = 'pandas')

X = preprocessor.fit_transform(X)
print(X)
print(y)

    gender_Male  employed_Yes  marital_status_Single       age  annual_income  \
0           1.0           1.0                    0.0 -0.172859      -0.177936   
1           0.0           0.0                    1.0 -1.169337      -1.063659   
2           1.0           1.0                    0.0  0.823620       0.707788   
3           0.0           1.0                    1.0 -0.742275      -0.620797   
4           1.0           1.0                    0.0  1.535390       1.150650   
5           0.0           0.0                    1.0 -1.596399      -1.373663   
6           1.0           1.0                    0.0  0.396558       0.132068   
7           0.0           1.0                    1.0 -0.457567      -0.930801   
8           1.0           1.0                    0.0  2.104807       1.814943   
9           0.0           0.0                    1.0 -1.311691      -1.285090   
10          1.0           1.0                    0.0  1.108328       0.929219   
11          0.0           1.

In [26]:
def sigmoidFunction(z):
    g = 1/(1 + np.exp(-z))
    return g

def predictY(X, w, b):
    return sigmoidFunction(np.dot(X, w) + b)

def cost_function(y_pred, y):
    m = len(y)
    cost = -1 / m * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return cost

def gradientDescent(X, y):
    iteration_hist = []
    cost_hist = []
    w = np.zeros(X.shape[1])
    b= 0
    iterations = 100
    alpha = 0.01 
    m = X.shape[0]
    for iteration in range(iterations):
        y_pred = predictY(X, w, b)
        cost = cost_function(y_pred, y )
        cost_hist.append(cost)
        error = y_pred-y
        delta_w = (1 / m) * np.dot(X.T, error)
        delta_b = np.mean(error)
        w -= alpha * delta_w
        b -= alpha * delta_b
        iteration_hist.append(iteration)
    return w, b, iteration_hist, cost_hist

def predictionAccuracy(y_pred_binary, y_binary):
    result = [1 if y_pred_binary == y_binary else 0 for y_pred_binary, y_binary in zip(y_pred_binary, y_binary)]
    result = np.sum(result)/y_binary.shape[0]
    return result

In [27]:
w, b, iteration_hist, cost_hist = gradientDescent(X, y)
print(f"w = {w}, b = {b}")
y_pred = sigmoidFunction(np.dot(X, w ) + b)
y_pred_binary = (y_pred >= 0.5).astype(int)
y_binary = (y == 1).astype(int)
result = predictionAccuracy(y_pred_binary, y_binary)
print(result)

w = [ 0.17016481  0.06417441 -0.17534414  0.26848132  0.29103447  0.27035385
  0.29485787], b = -0.005179325656765166
1.0
