In [None]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Set constants
POS_INF = float("inf")
NEG_INF = float("-inf")

In [None]:
def get_ROC_data(thresholds, df):
    """Create a Pandas dataframe contains thresholds, P_d and P_fa.

    Parameters:
        thresholds (pd.Series): a list of thresholds.

    Returns:
        A 3-column dataframe with first column as thresholds, second column as P_d,
        and third column as P_fa.
    """
    H0 = df["truth"].value_counts().loc[0] # all true 0
    H1 = df["truth"].value_counts().loc[1] # all true 1
    extra = pd.Series([POS_INF, NEG_INF])
    ROC_data = pd.DataFrame(pd.concat([thresholds, extra], ignore_index=True), columns=["thresholds"])
    ROC_data["P_d"] = ROC_data.apply(lambda row:
                                sum((df["lambda"]>=row.thresholds) & (df["truth"]==1))/H1,
                                axis = 1)
    ROC_data["P_fa"] = ROC_data.apply(lambda row:
                                sum((df["lambda"]>=row.thresholds) & (df["truth"]==0))/H0,
                                axis = 1)
    return ROC_data

In [None]:
def draw_ROC(ROC_data, linecolor):
    fig, ax = plt.subplots()
    plt.xlabel("P_fa")
    plt.ylabel("P_d")
    plt.xlim(-0.01, 1.01)
    plt.ylim(-0.01, 1.01)
    ax.scatter(ROC_data["P_fa"], ROC_data["P_d"], color=linecolor, s=8, marker="o")
    ax.plot(ax.get_xlim(), ax.get_ylim(), c="Black", alpha=0.6, ls='--')
    plt.gca().set_box_aspect(1)
    plt.show()

In [None]:
df = pd.read_csv("Data/moderateData.csv", names=["truth", "lambda"])

In [None]:
# Method 1: Every decision statistics as a threshold
thresholds_1 = df["lambda"].copy()
ROC_data_1 = get_ROC_data(thresholds_1, df)
draw_ROC(ROC_data_1, "SteelBlue")

# Method 2: 99 linearly sample the decision statistics as a threshold
min_lambda = df["lambda"].min()
max_lambda = df["lambda"].max()
thresholds_2 = pd.Series(np.linspace(min_lambda, max_lambda, 99))
ROC_data_2 = get_ROC_data(thresholds_2, df)
draw_ROC(ROC_data_2, "Red")

# Method 3: sample every n-th decision statistics as a threshold
n = df.shape[0] // 99 if df.shape[0] >= 99 else 1
thresholds_3 =  df["lambda"].copy().sort_values().iloc[::n]
ROC_data_3 = get_ROC_data(thresholds_3, df)
draw_ROC(ROC_data_3, "Green")

# Method 4: every H0 decision statistic as a threshold
thresholds_4 = df["lambda"].copy()[df["truth"]==0]
ROC_data_4 = get_ROC_data(thresholds_4, df)
draw_ROC(ROC_data_4, "Orange")

# Method 5: thresholds selected so that PFA is linearly sampled from 0 to 1
# at an interval of 0.01
n = H0 // 100 if df.shape[0] >= 100 else 1
thresholds_5 = df["lambda"].copy()[df["truth"]==0].sort_values().iloc[::n]
ROC_data_5 = get_ROC_data(thresholds_5, df)
draw_ROC(ROC_data_5, "Purple")