In [82]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

from animation import Animation

from statsmodels.formula.api import logit
import sklearn.metrics

In [85]:
plt.ioff()
# https://stackoverflow.com/questions/43445103/inline-animations-in-jupyter
# plt.rcParams["animation.html"] = "jshtml"
# http://louistiao.me/posts/notebooks/save-matplotlib-animations-as-gifs/
plt.rc('animation', html='html5')
plt.rc('patch', ec='black')
plt.rc('font', size=14)

In [None]:
np.random.seed(123)

n = 5000

df = pd.DataFrame({'y': np.random.choice([0, 1], n)})
df['x'] = np.where(df.y == 1, np.random.normal(10, 1, n), np.random.normal(8.5, 1, n))
model = logit('y ~ x', df).fit()
df['p_y'] = model.predict()

fpr, tpr, thresholds = sklearn.metrics.roc_curve(df.y, df.p_y)
metrics = pd.DataFrame({
    'fpr': fpr,
    'tpr': tpr,
    'threshold': thresholds
}).sort_values(by='threshold').reset_index(drop=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 9))

ax1.hist(df[df.y == 0].p_y, label='0', ec='black', bins=25, alpha=.6)
ax1.hist(df[df.y == 1].p_y, label='1', ec='black', bins=25, alpha=.6)
ax1.legend(title='y =')
vline = ax1.vlines(0, *ax1.get_ylim(), lw=2)
ax1.set(ylabel='frequency', xlabel='$P(y = 1)$', title='P(y = 1) by actual y value')

lines, = ax2.plot([], [], ls=':', marker='.')
ax2.set(title='ROC Curve', xlabel='FPR', ylabel='TPR')

text = ax1.annotate(
    'Threshold',
    (0, ax1.get_ylim()[1] * .8),
    xytext=(.5, ax1.get_ylim()[1] * .9),
    arrowprops={'arrowstyle': '->'}
)

def animate(i):
    if i >= metrics.shape[0]:
        return
    global text
    row = metrics.iloc[i]
    text.remove()
    text = ax1.annotate(
        'Threshold',
        (row.threshold, ax1.get_ylim()[1] * .8),
        xytext=(.5, ax1.get_ylim()[1] * .9),
        arrowprops={'arrowstyle': '->'}
    )
    vline.set_paths((np.array([[row.threshold, 0], [row.threshold, 160]]), ))
    lines.set_data(metrics.fpr[:i], metrics.tpr[:i])

anim = FuncAnimation(fig, animate, interval=1000/30, frames=metrics.shape[0] * 2)
anim

Optimization terminated successfully.
         Current function value: 0.476647
         Iterations 6
