<a href="https://colab.research.google.com/github/trprince21/STATS101C_notes/blob/main/Week4_Discussion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Logistic Loss = $$\min_{f \in \mathcal{F}} \frac{1}{100} \sum_{i=1}^{100} \log \left(1 + \exp(-f(x_i)y_i)\right)$$

$f(x_i) = x_i^T \beta = \beta_0 + \beta_1 x_1 + \beta_2 x_2$ [and] $y_i \in \{-1, 1\}$

[when] $y_i = -1$:  $\log \left(1 + \exp(-x_i^T \beta y_i)\right)$
 [we want] $x_i^T \beta \rightarrow -\infty$

[when] $y_i = 1$:  $\log \left(1 + \exp(-x_i^T \beta)\right)$
 [we want] $x_i^T \beta \rightarrow \infty$

[so] $(-\infty, \infty) \rightarrow (0,1)$

Log loss = -log (Likelihood)
\begin{cases}
P(y_i = 1 | x_i) = \frac{1}{1 + \exp(-x_i^T \beta)} \\
P(y_i = -1 | x_i) = 1 - P(y_i = 1 | x_i) = 1 - \frac{1}{1 + \exp(-x_i^T \beta)}
\end{cases}
$\Rightarrow \log(1 + \exp(-x_i^T \beta y_i))$

\begin{align*}
\frac{\exp(-x_i^T \beta)}{1 + \exp(-x_i^T \beta)} \cdot \frac{\exp(x_i^T \beta)}{\exp(x_i^T \beta)}
& = \frac{\exp(0)}{\exp(x_i^T \beta) + \exp(\beta)} \\
& = \frac{1}{1 + \exp(x_i^T \beta)}
\end{align*}

$$-\log\left( \frac{1}{1 + \exp(x_i^T \beta)} \right) = \log(1 + \exp(-x_i^T \beta y_i))$$

$y_i = \{0, 1\}$

**(REFER TO PHOTOS)**

K-Fold CV: ...




In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
Data = pd.read_csv('BankNote_Authentication.csv')
Data.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [None]:
Data_X = np.array(Data.iloc[:,0:2])
Data_Y = np.array(Data.iloc[:,-1])

In [None]:
def createFolds(X, y, k=5): # Assumes that number of data points is divisible by k
    n = len(X)
    indices = np.arange(n)
    X, y, indices = shuffle(X, y, indices, random_state=1) # shuffle my data

    fold_sizes = n // k # How large is each fold/how large is test data
    folds = []
    ind = 0

    for i in range(k):
        fold_size = fold_sizes
        fold_indices = indices[ind:(ind + fold_size)]
        folds.append(fold_indices)
        ind += fold_size
    return folds
print(createFolds(Data_X, Data_Y, k = 5))

[array([1240,  703,  821, 1081,   37,  167,  223,  647,  325,  558,  341,
       1218,  302, 1124,  793,   80,  607,  538,  255,  236, 1209,  108,
         48, 1109, 1338, 1126,  336,  186, 1259,    3,  426,   19,  938,
       1051,  259,  962, 1182,  596, 1115,   91,  409,  181,  310,  309,
       1108, 1322,   60,  575,  757, 1314,  248,  521, 1307,  446,  729,
       1369, 1043,  759,  375, 1121, 1005, 1159,  990, 1331,  798, 1083,
        480, 1076,  853,  383,  494,  921,  862,  424, 1066,  311,  693,
       1068, 1087,  301,  649,  496,  982,  894,  531,  101,  584,  885,
        280,   88,  641,  686,  833,  918, 1048, 1186, 1189,  824,  453,
       1003, 1298,  782,  503,  992,  190,  414,  368,  159,  512,  201,
         65,  797, 1233,  419, 1146,  111,  351,  288,  529,  428, 1352,
        177, 1358, 1045,  708,  989, 1192, 1296, 1071, 1058, 1179,  304,
        335,  808,  669,  697,  115, 1017,  403,  330,  945,   75,  599,
        498,  604,  612, 1114,  443,  473,  826,  

In [None]:
def kfoldCV(X, y, model, k=5):
    folds = createFolds(X, y, k)
    accuracies = []
    for i in range(k):
        test_indices = folds[i]
        train_indices = np.concatenate([folds[j] for j in range(k) if j != i])
        X_train, X_test = X[train_indices], X[test_indices]
        y_train, y_test = y[train_indices], y[test_indices]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    return mean_accuracy, std_accuracy

In [None]:
models = {
    'LDA': LDA(),
    'QDA': QDA(),
    'KNN': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression()
}

for name, model in models.items():
    mean_acc, std_acc = kfoldCV(Data_X, Data_Y, model, k=5)
    print(name, "Mean Accuracy = ", mean_acc, "Std = ", std_acc)

LDA Mean Accuracy =  0.881021897810219 Std =  0.014525364045352134
QDA Mean Accuracy =  0.894890510948905 Std =  0.011859910079760512
KNN Mean Accuracy =  0.937956204379562 Std =  0.009517083803215518
Logistic Regression Mean Accuracy =  0.8861313868613138 Std =  0.009058155945978705


In [None]:
from sklearn.model_selection import KFold
def kfold(X, y, model, k=5):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    accuracies = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    return mean_accuracy, std_accuracy

In [None]:
for name, model in models.items():
    mean_acc, std_acc = kfold(Data_X, Data_Y, model, k=5)
    print(name, "Mean Accuracy = ", mean_acc, "Std = ", std_acc)

LDA Mean Accuracy =  0.8826463171864631 Std =  0.02190877463732849
QDA Mean Accuracy =  0.893584605175846 Std =  0.022834099543830137
KNN Mean Accuracy =  0.9446131386861314 Std =  0.005285974515724446
Logistic Regression Mean Accuracy =  0.8826463171864631 Std =  0.02446316952469506
