In [1]:
import numpy as np
import pandas as pd

In [2]:
# Membuat fungsi Standarisasi
def standarisasi(X):
    # X = data variabel independen.
    # m =  jumlah data variabel independen
    # n = jumlah variabel independen
    m, n = X.shape
    # iterasi untuk Standarisasi data variabel independen.
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
        
    return X

In [3]:
# Fungsi Aktivasi Sigmoid
def sigmoid(z):
    return 1.0/(1 + np.exp(-z))

In [4]:
# Membuat Loss Function untuk menghitung nilai loss
def loss(y, y_hat):
    loss = -np.mean(y*(np.log(y_hat)) - (1-y)*np.log(1-y_hat))
    return loss

In [5]:
#menghitung gradien dari loss koefisien dan konstanta
def gradients(X, y, y_hat):
    # X = data variabel independen
    # y = data variabel dependen
    # y_hat = data prediksi variabel dependen
    # w = koefisien variabel independen
    # c = konstanta
    # m =  jumlah data variabel independen
    m = X.shape[0]
    # Gradien Loss Koefisien
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    # Gradien Loss Konstanta
    dc = (1/m)*np.sum((y_hat - y)) 
    return dw, dc

In [6]:
# Membuat fungsi untuk melatih data
def train(X, y, batch, iter, lr):
    # batch = Batch Size.
    # iter = jumlah iterasi.
    # lr = Learning rate.
    m, n = X.shape
    # Inisiasi Koefisien variabel dan Konstanta
    w = np.zeros((n,1))
    c = 0
    # Reshaping y
    y = y.reshape(m,1)
    # Standarisasi variabel independen
    x = standarisasi(X)
    # Empty list untuk menyimpan loss.
    losses = []
    # Training loop.
    for i in range(iter):
        for i in range((m-1)//batch + 1):
            # Mendefinisikan Ukuran Data Yang digunakan setiap iterasi
            start_i = i*batch
            end_i = start_i + batch
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            # menghitung data prediksi variabel dependen.
            y_hat = sigmoid(np.dot(xb, w) + c)
            # mendapatkan gradien loss dari koefisien dan kosntanta.
            dw, dc = gradients(xb, yb, y_hat)
            # memperbaharui nilai parameter.
            w -= lr*dw
            c -= lr*dc
        # menghitung nilai loss dari setiap iterasi.
        l = loss(y, sigmoid(np.dot(X, w) + c))
        losses.append(l)  
    # mengembalikan nilai koefisien, konstanta and loss(List).
    return w, c, losses

In [7]:
# Mendefinisikan fungsi untuk memprediksi variabel dependen
def predict(X):
    # Standarisasi data variabel independen.
    x = standarisasi(X)
    # menghitung data prediksi variabel dependen.
    preds = sigmoid(np.dot(X, w) + c)
    # Empty List untuk menyimpan hasil prediksi.
    pred_class = []
    # jika y_hat >= 0.5 y dibulatkan ke 1
    # jika y_hat < 0.5 y dibulatkan ke 0
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    # mengembalikan hasil prediksi
    return np.array(pred_class)

In [8]:
# Mendefinisikan fungsi menghitung akurasi
def accuracy(y, y_hat):
    # menghitung akurasi, yaitu banyaknya data benar dibagi panjang data
    accuracy = np.sum(y == y_hat)/len(y)
    return accuracy

# Contoh Implementasi 

In [9]:
# Read data
data = pd.read_csv("/content/data.csv")

In [10]:
# Cek data
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [11]:
# info dari data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [12]:
# Membuang variabel bernama 'Unnamed: 32'
data = data.drop(['Unnamed: 32'], axis = 1)

In [13]:
# Membuang variabel bernama 'id'
data = data.drop(['id'], axis = 1)

In [14]:
# Check data
data.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [15]:
# Membuat variabel data diagnosis sebagai variabel dependen 
data.diagnosis = [1 if each == "M" else 0 for each in data.diagnosis]

In [16]:
# Check data.diagnosis
data.diagnosis.head()

0    1
1    1
2    1
3    1
4    1
Name: diagnosis, dtype: int64

In [17]:
# Membuat data X berisi variabel prediktor
X = data.loc[:, data.columns != 'diagnosis']

In [18]:
# Membuat variabel dependen
y = data.diagnosis

In [19]:
# Check data X
X.head()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [20]:
# Check data y
y.head()

0    1
1    1
2    1
3    1
4    1
Name: diagnosis, dtype: int64

In [21]:
# Merubah ke dalam bentuk array
X = X.values
y = y.values

In [22]:
# Check x
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [23]:
# Check y
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,

In [24]:
# melatih Algoritma terhadap data
w, c, l = train(X, y, batch=1, iter=10000, lr=0.0001)

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


In [25]:
# Check koefisien
print(w)

[[-11.68863368]
 [  0.34627353]
 [-16.4899809 ]
 [  0.59176407]
 [  0.33732876]
 [  1.63602918]
 [  2.32996945]
 [  0.92926489]
 [  0.50302526]
 [  0.11775072]
 [ -0.29540927]
 [ -0.65797062]
 [  2.08238987]
 [  1.8453409 ]
 [  0.04065618]
 [  0.34475706]
 [  0.50406588]
 [  0.11481808]
 [  0.14610503]
 [  0.02850465]
 [-12.46740961]
 [  5.95937391]
 [  8.94152246]
 [  0.32882636]
 [  0.58593786]
 [  4.92137936]
 [  6.21971691]
 [  1.66600738]
 [  1.47951845]
 [  0.46283829]]


In [26]:
# Check Konstanta
print(c)

-1.569245115898879


In [27]:
# Check Akurasi
accuracy(y,predict(X))

0.9209138840070299

In [28]:
# Fungsi RegresiLogistik dan Output
def RegresiLogistik(X,y):
  w, c, l = train(X, y, batch=1, iter=10000, lr=0.0001)
  print("Koefisien dari variabel x1 = {0}, x2 = {1}, x3 = {2}, x4 = {3}, x5 = {4}".format(w[0,0],w[1,0],w[2,0],w[3,0],w[4,0]))
  print("Koefisien dari variabel x6 = {0}, x7 = {1}, x8 = {2}, x9 = {3}, x10 = {4}".format(w[5,0],w[6,0],w[7,0],w[8,0],w[9,0]))
  print("Koefisien dari variabel x11 = {0}, x12 = {1}, x13 = {2}, x14 = {3}, x15 = {4}".format(w[10,0],w[11,0],w[12,0],w[13,0],w[14,0]))
  print("Koefisien dari variabel x16 = {0}, x17 = {1}, x18 = {2}, x19 = {3}, x20 = {4}".format(w[15,0],w[16,0],w[17,0],w[18,0],w[19,0]))
  print("Koefisien dari variabel x21 = {0}, x22 = {1}, x23 = {2}, x24 = {3}, x25 = {4}".format(w[20,0],w[21,0],w[22,0],w[23,0],w[24,0]))
  print("Koefisien dari variabel x26 = {0}, x27 = {1}, x28 = {2}, x29 = {3}, x30 = {4}".format(w[25,0],w[26,0],w[27,0],w[28,0],w[29,0]))
  print("Konstanta dari persamaan logistik ={0}".format(c))
  print("Accuracy dari model = ",accuracy(y,predict(X)))

In [29]:
# Panggil Fungsinya
RegresiLogistik(X,y)

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


Koefisien dari variabel x1 = -11.68863367556556, x2 = 0.3462735255731345, x3 = -16.48998090273542, x4 = 0.5917640695320338, x5 = 0.33732876121901934
Koefisien dari variabel x6 = 1.636029175835698, x7 = 2.3299694454356263, x8 = 0.9292648927769442, x9 = 0.50302525806211, x10 = 0.11775071731644662
Koefisien dari variabel x11 = -0.2954092682850008, x12 = -0.6579706167054378, x13 = 2.082389874616541, x14 = 1.845340901513078, x15 = 0.04065617922751525
Koefisien dari variabel x16 = 0.34475706421046126, x17 = 0.504065877845447, x18 = 0.11481808087885616, x19 = 0.1461050273094117, x20 = 0.028504653849867948
Koefisien dari variabel x21 = -12.46740960666672, x22 = 5.9593739080797565, x23 = 8.941522456947931, x24 = 0.3288263583123243, x25 = 0.5859378564491273
Koefisien dari variabel x26 = 4.92137935869433, x27 = 6.219716914355003, x28 = 1.666007377005377, x29 = 1.4795184526488516, x30 = 0.462838287514781
Konstanta dari persamaan logistik =-1.569245115898879
Accuracy dari model =  0.920913884007029