In [None]:
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering, KMeans
from sklearn.datasets import make_classification
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.linear_model import LogisticRegression

In [None]:
plt.style.use(["seaborn-white", "seaborn-paper"])
sns.set_palette(["#57b6a2", "#f0b67f"])
sns.set_context("talk")
half_size = (5.4, 4.04)

In [None]:
rng = np.random.default_rng(42)

n_examples = 20
df = pd.DataFrame({
    "label": [True] * n_examples + [False] * n_examples,
    "disease": ["Case"] * n_examples + ["Control"] * n_examples,
    "score_1": np.concatenate(
        [rng.normal(3.5, 1, size=n_examples), rng.normal(2, 1, size=n_examples)]
    ),
    "score_2": np.concatenate(
        [rng.normal(4, 1, size=n_examples), rng.normal(2, 1, size=n_examples)]
    )
})

In [None]:
fig, ax = plt.subplots(figsize=(5, 4))

sns.swarmplot(x=df["score_1"], y=df["disease"], size=9, ax=ax)

from sklearn.svm import LinearSVC
X = df["score_1"].values.reshape(-1, 1)
X = np.hstack((X, np.zeros_like(X)))
y = df["label"].values.astype(np.uint8)
clf = LinearSVC(random_state=42)
clf.fit(X, y)

x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
y_min, y_max = -1, 2
h = .01
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot.
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, alpha=0.5)

ax.set_xlabel("Protein Abundance")
ax.set_ylabel("")

plt.tight_layout(pad=0.01)

plt.savefig("decision_threshold.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()