## Logistic Regression
#### Training/ Fit
- Initialization
    - Initialize weights to be 0
    - Initialize bias to be 0
- Ingest data x and calculate z = w*x + b 
- Calculate prediction as sigmoid(z)
- Calculate the gradient of weights and bias
- Updata weights and bias
 
#### Test
Given a data points:
- Calculate the z 
- Calculate the a
- With defined threshold, choose label

#### Test points:
- Sigmoid/ Derivative of sigmoid
- Initialization
- When to use `np.dot` or `*`
- Partial derivatives  
- (if possible) CE function

In [4]:
import numpy as np


class LogisticRegression():

    def __init__(self, lr=0.001, n_iters=1000, threshold = 0.5):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.threshold = threshold

    @staticmethod
    def sigmoid(x):
        return 1/(1+np.exp(-x))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(linear_pred)

            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions-y)

            self.weights -= self.lr*dw
            self.bias -= self.lr*db


    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_pred)
        class_pred = [0 if p <= self.threshold else 1 for p in y_pred]
        return class_pred

#### Test 

In [5]:
if __name__ == "__main__":
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn import datasets
    import matplotlib.pyplot as plt

    def accuracy(y_pred, y_test):
        return np.sum(y_pred==y_test)/len(y_test)

    bc = datasets.load_breast_cancer()
    X, y = bc.data, bc.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

    clf = LogisticRegression(lr=0.01)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy(y_pred, y_test)
    print(acc)

0.9210526315789473


  return 1/(1+np.exp(-x))


#### Extra:
1. Derive dw, dz 
![image](./image/lr_gradient_weights.png)

2. CE 

In [3]:
def cross_entropy(y, y_pred, binary = True, eps=1e-15):
    y_pred = np.clip(y_pred, eps, 1 - eps)  # Clip predictions to avoid log(0)
    if binary:
        loss = -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    else:
        loss = -np.mean(np.sum(y * np.log(y_pred), axis = 1))
    return loss