In [None]:
from sklearn import datasets
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import model_selection
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [None]:
sns.set_context("paper")
sns.set_style("darkgrid")

## Data

In [None]:
np.random.seed(1)#7
x,y=datasets.make_classification(n_samples=1000,n_features=2,n_classes=2,
                                 n_informative=2,n_redundant=0,n_clusters_per_class=1,weights=[0.99,0.01])
df=pd.DataFrame(x,columns=["x1","x2"])
df['y']=y

df.shape

In [None]:
df[df.columns[:2]].hist(figsize=(15,5))
plt.show()

# Training a logistic regression model

In [None]:
LogisticModel = LogisticRegression(solver="liblinear")
kfold = model_selection.KFold(n_splits=10,random_state=10)

accuracy = model_selection.cross_val_score(LogisticModel, x, y, cv=kfold,scoring="accuracy")

accuracy.mean()

In [None]:
df['yhat']=0
accuracy_score(y,df['yhat'])

In [None]:
"%y=1:",df[df['y']==0].shape[0]/df.shape[0],"%y=0:",df[df['y']==1].shape[0]/df.shape[0]

## Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm=confusion_matrix(y,df['yhat']).T
cm

In [None]:
# Pos: y=1
# Neg: y=0

#           Neg.  Pos.
# Neg. hat [983,  17],
# Pos. hat [  0,   0]

In [None]:
Count_neg=len(y[y==0])
Count_pos=len(y[y==1])
Count_neg,Count_pos

In [None]:
TN=cm[0][0] #True Neg.
FN=cm[0][1] #False Neg.: predicted as Neg., but it is Pos.
FP=cm[1][0] #False Pos.: predicted as Pos., but it is Neg.
TP=cm[1][1] #True Pos.

In [None]:
Sensitivity=TP/Count_pos #how many sick people are identified as sick
Specificity=TN/Count_neg #how many health people are identified as healthy
Precision=0#TP/(TP+FP)
Sensitivity,Specificity

In [None]:
F1_score=0#2*(Precision*Sensitivity)/(Precision+Sensitivity)
F1_score

In [None]:
np.random.seed(582)
LogisticModel = LogisticRegression(solver="liblinear").fit(x, y)
df['yhat']=LogisticModel.predict(x)
accuracy_score(y,df['yhat'])

In [None]:
cm=confusion_matrix(y,df['yhat']).T
cm

In [None]:
TN=cm[0][0] #True Neg.
FN=cm[0][1] #False Neg.: predicted as Neg., but it is Pos.
FP=cm[1][0] #False Pos.: predicted as Pos., but it is Neg.
TP=cm[1][1] #True Pos.

In [None]:
Sensitivity=TP/Count_pos #how many sick people are identified as sick
Specificity=TN/Count_neg #how many health people are identified as healthy
Precision=TP/(TP+FP)
Sensitivity,Specificity

In [None]:
F1_score=2*(Precision*Sensitivity)/(Precision+Sensitivity)
F1_score

In [None]:
from sklearn.metrics import f1_score

In [None]:
f1_score(y,df['yhat'])

In [None]:
LogisticModel = LogisticRegression(solver="liblinear")
f1 = model_selection.cross_val_score(LogisticModel, x, y, cv=kfold,scoring="f1")
f1.mean()

In [None]:
from sklearn.neural_network import MLPClassifier
import numpy as np
import matplotlib.pyplot as plt

def plot_decision_boundary(pred_func,X,y):
    x_min, x_max = X[:, 0].min() - 0.15, X[:, 0].max() + 0.15
    y_min, y_max = X[:, 1].min() - 0.15, X[:, 1].max() + 0.15
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y,s=100)
    plt.show()
    
def predict(model, x):
    return model.predict(x)

In [None]:
clf = MLPClassifier(solver='lbfgs',hidden_layer_sizes=(32,10),\
                    activation="logistic", random_state=1,max_iter=10000)
clf.fit(x, y)


In [None]:
cm=confusion_matrix(y,predict(clf, x)).T
cm

In [None]:
TN=cm[0][0] #True Neg.
FN=cm[0][1] #False Neg.: predicted as Neg., but it is Pos.
FP=cm[1][0] #False Pos.: predicted as Pos., but it is Neg.
TP=cm[1][1] #True Pos.

Sensitivity=TP/Count_pos #how many sick people are identified as sick
Specificity=TN/Count_neg #how many health people are identified as healthy
Precision=TP/(TP+FP)
Sensitivity,Specificity

In [None]:
F1_score=2*(Precision*Sensitivity)/(Precision+Sensitivity)
F1_score

In [None]:
plt.figure(figsize=(20,20))
plot_decision_boundary(lambda x: predict(clf, x),x,y)

In [None]:
for nn in [(8),(16),(32),(8,8),(8,16),(8,32),(16,8),(16,16),(16,32),(32,8),(32,16),(32,32)]:
    clf = MLPClassifier(solver='lbfgs',hidden_layer_sizes=nn,\
                        activation="logistic", random_state=1,max_iter=10000)


    f1 = model_selection.cross_val_score(clf, x, y, cv=kfold,scoring="f1")
    print(nn,f1.mean())

In [None]:
LogisticModel = LogisticRegression(solver="liblinear")
f1 = model_selection.cross_val_score(LogisticModel, x, y, cv=kfold,scoring="f1")
f1.mean()

In [None]:
# https://scikit-learn.org/stable/modules/model_evaluation.html 