# SDA - lecture 7 - Neural discrimination

In [None]:
import logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(asctime)s: %(message)s')

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

## Example 1 - ROC of two Poisson distributions

In [None]:
# Generate samples - Positive and negative responses (Poissonian distribution)

mneg, mpos = 5, 10
nsamp = 100

spos = np.random.poisson(mpos, nsamp)
sneg = np.random.poisson(mneg, nsamp)

### Plot histograms

In [None]:
hist_neg, hist_bins = np.histogram(sneg, bins=np.arange(0,mpos*2.5), density=True)
hist_pos, hist_bins = np.histogram(spos, bins=np.arange(0,mpos*2.5), density=True)

plt.bar(hist_bins[:-1], hist_pos, color='b', label='Positive', align='center', width=0.4)
plt.bar(hist_bins[:-1], hist_neg, color='r', label='Negative', align='edge', width=-0.4)
plt.legend();

In [None]:
dprime = (spos.mean()-sneg.mean()) / np.sqrt((spos.var()+sneg.var())/2)
logging.info(f'd-prime of the responses is {dprime:.2f}')

### ROC curve

In [None]:
plt.plot(1-hist_neg.cumsum(), 1-hist_pos.cumsum(),'*:')
plt.plot([0,1], [0,1], '-')
plt.xlabel('P (False positive)')
plt.ylabel('P (True positive)');

## Example 2 - ROC using sklearn
ROC calculation of the result of a logistic regressor (Contributed by Yuval Samoilov)

In [None]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score

X, y = make_classification(n_samples=1000, n_classes=2, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
model = LogisticRegression(solver='lbfgs')
model.fit(x_train, y_train)

# predict probabilities
ns_pred = [0 for _ in range(len(y_test))]
lr_pred = model.predict_proba(x_test)
lr_pred = lr_pred[:, 1]

# calculate scores
ns_auc = roc_auc_score(y_test, ns_pred)
lr_auc = roc_auc_score(y_test, lr_pred)

logging.info(f'No test: ROC area under the currve (AUC) = {ns_auc:.3f}')
logging.info(f'Logistic regression: ROC area under the currve (AUC) = {lr_auc:.3f}')

# calculate roc curves
ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_pred)
lr_fpr, lr_tpr, _ = roc_curve(y_test, lr_pred)

plt.plot(ns_fpr, ns_tpr, linestyle='--', label='Naive classifier')
plt.plot(lr_fpr, lr_tpr, marker='.', linestyle =":", color='k', label='Logistic regression')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend();