In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.datasets import make_classification, make_s_curve
from sklearn.decomposition import PCA
from sklearn.manifold import MDS, TSNE
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [None]:
sns.set_context("talk")

In [None]:
np.random.seed(13)
n_samples = 20
x = np.linspace(0, 10, n_samples) + np.random.normal(scale=0.5, size=n_samples)
y = np.linspace(0, 10, n_samples) + np.random.normal(scale=0.5, size=n_samples)


mosaic = """
A
A
A
B"""

fig, axes = plt.subplot_mosaic(mosaic, figsize=(7.5, 5))

ax = axes["A"]
ax.scatter(x, y, c="#404040")

ax.set_xlabel("Cholesterol abundance")
ax.set_ylabel("Bile acids abundance")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim([-1.5, 11.5])
ax.set_ylim([-1.5, 11.5])

ax = axes["B"]
ax.set_xlabel("$z_1$")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])

plt.tight_layout()

plt.savefig("fig/pca_1.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()


fig, axes = plt.subplot_mosaic(mosaic, figsize=(7.5, 5))

ax = axes["A"]
ax.scatter(x, y, c="#404040")
ax.plot([-1, 11], [-1, 11], ls="--", c="#57b6a2")

ax.set_xlabel("Cholesterol abundance")
ax.set_ylabel("Bile acids abundance")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim([-1.5, 11.5])
ax.set_ylim([-1.5, 11.5])

ax = axes["B"]
ax.set_xlabel("$z_1$")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])

plt.tight_layout()

plt.savefig("fig/pca_2.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()


fig, axes = plt.subplot_mosaic(mosaic, figsize=(7.5, 5))

ax = axes["A"]
ax.scatter(x, y, c=np.arange(len(x)), cmap="tab20")
ax.plot([-1, 11], [-1, 11], ls="--", c="#57b6a2")

ax.set_xlabel("Cholesterol abundance")
ax.set_ylabel("Bile acids abundance")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim([-1.5, 11.5])
ax.set_ylim([-1.5, 11.5])

ax = axes["B"]
ax.scatter(x, np.repeat(0, n_samples), c=np.arange(len(x)), cmap="tab20")

ax.set_xlabel("$z_1$")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim([-1.5, 11.5])

plt.tight_layout()

plt.savefig("fig/pca_3.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
food_counts = pd.read_csv(
    "pca_ibd_fecal_food_count.csv", index_col="filename"
)
food_counts.index = food_counts.index.str[:-6]

In [None]:
food_counts.head(9)

In [None]:
food_counts_pca = PCA().fit(food_counts.values.T)
pd.DataFrame(food_counts_pca.components_.T, index=food_counts.index).head(9)[[0, 1]]

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    food_counts_pca.components_[0],
    food_counts_pca.components_[1],
    c="#404040",
)

ax.set_xlabel("$z_1$")
ax.set_ylabel("$z_2$")

plt.savefig("fig/pca_4.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
pca_variance = PCA().fit(
    make_classification(
        n_features=10, n_informative=5, n_redundant=0, random_state=1
    )[0]
)

fig, ax = plt.subplots(figsize=(7.5, 5))

sns.barplot(
    x=np.arange(1, len(pca_variance.explained_variance_) + 1),
    y=pca_variance.explained_variance_ratio_,
    color="#404040",
)

ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))

ax.set_xlabel("Principal component")
ax.set_ylabel("Explained variance ratio")

plt.savefig("fig/pca_explained_variance.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
np.random.seed(13)
n_samples = 5
x = np.asarray([-8, -5, 4, 6, 7])
y = np.asarray([-8, -5, 4, 6, 7]) + np.random.normal(scale=2, size=n_samples)


fig, ax = plt.subplots(figsize=(5, 5))

ax.scatter(x, y, c="#404040")
ax.text(10.25, 10, "$z_1$", c="white")
ax.text(-11.5, 10, "$z_2$", c="white")

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

plt.tight_layout()

plt.savefig("fig/pca_5.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.scatter(x, y, c="#404040")
ax.text(10.25, 10, "$z_1$", c="#57b6a2")
ax.text(-11.5, 10, "$z_2$", c="white")
ax.annotate(
    "",
    [-10, -10],
    [10, 10],
    c="#57b6a2",
    arrowprops={"arrowstyle": "<|-", "color": "#57b6a2"},
)

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

plt.tight_layout()

plt.savefig("fig/pca_6.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.scatter(x, y, c="#404040")
ax.text(10.25, 10, "$z_1$", c="#57b6a2")
ax.text(-11.5, 10, "$z_2$", c="white")
ax.annotate(
    "",
    [-10, -10],
    [10, 10],
    c="#57b6a2",
    arrowprops={"arrowstyle": "<|-", "color": "#57b6a2"},
)
for xi, yi in zip(x, y):
    ax.plot(
        [xi, xi + (yi - xi) / 2],
        [yi, xi + (yi - xi) / 2],
        "--",
        c="#57b6a2",
        zorder=0.5,
    )

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

plt.tight_layout()

plt.savefig("fig/pca_7.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.scatter(x, y, c="#404040")
ax.text(10.25, 10, "$z_1$", c="#57b6a2")
ax.text(-11.5, 10, "$z_2$", c="#ee266d")
ax.annotate(
    "",
    [-10, -10],
    [10, 10],
    c="#57b6a2",
    arrowprops={"arrowstyle": "<|-", "color": "#57b6a2"},
)
ax.annotate(
    "",
    [10, -10],
    [-10, 10],
    c="#ee266d",
    arrowprops={"arrowstyle": "<|-", "color": "#ee266d"},
)
for xi, yi in zip(x, y):
    ax.plot(
        [xi, xi + (yi - xi) / 2],
        [yi, xi + (yi - xi) / 2],
        "--",
        c="#57b6a2",
        zorder=0.5,
    )

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

plt.tight_layout()

plt.savefig("fig/pca_8.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.scatter(x, y, c="#404040")
ax.text(10.25, 10, "$z_1$", c="#57b6a2")
ax.text(-11.5, 10, "$z_2$", c="#ee266d")
ax.annotate(
    "",
    [-10, -10],
    [10, 10],
    c="#57b6a2",
    arrowprops={"arrowstyle": "<|-", "color": "#57b6a2"},
)
ax.annotate(
    "",
    [10, -10],
    [-10, 10],
    c="#ee266d",
    arrowprops={"arrowstyle": "<|-", "color": "#ee266d"},
)
for xi, yi in zip(x, y):
    ax.plot(
        [xi, xi + (yi - xi) / 2],
        [yi, xi + (yi - xi) / 2],
        "--",
        c="#57b6a2",
        zorder=0.5,
    )
for xi, yi in zip(x, y):
    ax.plot(
        [xi, -(yi - xi) / 2],
        [yi, (yi - xi) / 2],
        "--",
        c="#ee266d",
        zorder=0.5,
    )

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

plt.tight_layout()

plt.savefig("fig/pca_9.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

for xi, yi in zip(x, y):
    ax.scatter(xi + (yi - xi) / 2, -(yi - xi) / 2, c="#404040", zorder=5)

ax.set_xlabel("$z_1$", loc="right")
ax.set_ylabel("$z_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.spines["bottom"].set_color("#57b6a2")
ax.xaxis.label.set_color("#57b6a2")
ax.plot(
    1, 0, ">", transform=ax.get_yaxis_transform(), clip_on=False, c="#57b6a2"
)
ax.spines["left"].set_color("#ee266d")
ax.yaxis.label.set_color("#ee266d")
ax.plot(
    0, 1, "^", transform=ax.get_xaxis_transform(), clip_on=False, c="#ee266d"
)

plt.tight_layout()

plt.savefig("fig/pca_10.png", dpi=300, bbox_inches="tight", facecolor="white")
# plt.show()
plt.close()

In [None]:
fig, axes = plt.subplot_mosaic(mosaic, figsize=(5, 6.7))

ax = axes["A"]
ax.scatter(x, y, c=np.arange(len(x)), cmap="Set1")
ax.text(10.25, 10, "$z_1$", c="#57b6a2")
ax.annotate(
    "",
    [-10, -10],
    [10, 10],
    c="#57b6a2",
    arrowprops={"arrowstyle": "<|-", "color": "#57b6a2"},
)
for xi, yi in zip(x, y):
    ax.plot(
        [xi, xi + (yi - xi) / 2],
        [yi, xi + (yi - xi) / 2],
        "--",
        c="#57b6a2",
        zorder=0.5,
    )

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

ax = axes["B"]
ax.scatter(x, np.repeat(0, n_samples), c=np.arange(len(x)), cmap="Set1")

ax.set_xlabel("$z_1$", loc="right")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim([-11.5, 11.5])

sns.despine(ax=ax, left=True)
ax.spines["bottom"].set_position(("data", 0))
ax.spines["bottom"].set_color("#57b6a2")
ax.xaxis.label.set_color("#57b6a2")
ax.plot(
    1, 0, ">", transform=ax.get_yaxis_transform(), clip_on=False, c="#57b6a2"
)

plt.tight_layout()

plt.savefig("fig/pca_11.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig, axes = plt.subplot_mosaic(mosaic, figsize=(5, 6.7))

ax = axes["A"]
ax.scatter(x, x, c=np.arange(len(x)), cmap="Set1")
ax.text(10.25, 10, "$z_1$", c="#57b6a2")
ax.annotate(
    "",
    [-10, -10],
    [10, 10],
    c="#57b6a2",
    arrowprops={"arrowstyle": "<|-", "color": "#57b6a2"},
)

ax.set_xlabel("$x_1$", loc="right")
ax.set_ylabel("$x_2$", loc="top")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_xlim(-11, 11)
ax.set_ylim(-11, 11)

sns.despine(ax=ax)
ax.spines[["left", "bottom"]].set_position(("data", 0))
ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)

ax = axes["B"]
ax.set_xlabel("$z_1$")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])

plt.tight_layout()

plt.savefig("fig/pca_12.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
fig = plt.figure(figsize=(7.5, 7.5))
ax = plt.axes(projection="3d")

pcoa_data, color = make_s_curve(1000, random_state=1)
ax.scatter(pcoa_data[:, 0], pcoa_data[:, 1], pcoa_data[:, 2], c=color, cmap="viridis")
ax.view_init(4, -72)

plt.savefig("fig/pcoa_0.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()

In [None]:
pcoa_data, color = make_s_curve(1000, random_state=1)
pcoa_euclidean = PCA(2).fit(pcoa_data.T).components_
pcoa_manhattan = MDS(dissimilarity="precomputed").fit_transform(
    pairwise_distances(pcoa_data, metric="manhattan")
)
tsne = TSNE(
    perplexity=5, learning_rate="auto", init="pca", random_state=1, n_jobs=-1
).fit_transform(pcoa_data)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(x=pcoa_euclidean[0], y=pcoa_euclidean[1], c=color, cmap="viridis")

ax.set_xlabel("$z_1$")
ax.set_ylabel("$z_2$")
ax.set_title("PCA")

plt.savefig("fig/pcoa_1.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()


fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    x=pcoa_manhattan[:, 0], y=pcoa_manhattan[:, 1], c=color, cmap="viridis"
)

ax.set_xlabel("$z_1$")
ax.set_ylabel("$z_2$")
ax.set_title("PCoA (Manhattan)")

plt.savefig("fig/pcoa_2.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()


tsne = TSNE(
    learning_rate="auto", init="pca", random_state=1, n_jobs=-1
).fit_transform(pcoa_data)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    x=tsne[:, 0], y=tsne[:, 1], c=color, cmap="viridis"
)

ax.set_xlabel("$z_1$")
ax.set_ylabel("$z_2$")
ax.set_title("t-SNE")

plt.savefig("fig/tsne_1.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()


tsne = TSNE(
    perplexity=5, learning_rate="auto", init="pca", random_state=1, n_jobs=-1
).fit_transform(pcoa_data)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    x=tsne[:, 0], y=tsne[:, 1], c=color, cmap="viridis"
)

ax.set_xlabel("$z_1$")
ax.set_ylabel("$z_2$")
ax.set_title("t-SNE")

plt.savefig("fig/tsne_2.png", dpi=300, bbox_inches="tight")
# plt.show()
plt.close()