In [1]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots

# Image 1

In [42]:
df = pd.read_excel("~/local/populations.xlsx", sheet_name="img1")
for col in ["hapA", "hapB", "none"]:
    df[f"{col}_f"] = df[col] / df["sum"]
pats = df.melt(
    id_vars=["phenotype", "cohort"], value_vars=["hapA_f", "hapB_f", "none_f"]
).iloc[::-1]
pats["variable"] = pats["variable"].str.replace("_f", "")
# custom order the data
pats["variable"] = pd.Categorical(
    pats["variable"], categories=["hapA", "hapB", "none"], ordered=True
)
pats = pd.concat(
    [
        pats[pats["phenotype"] == "aFTLD-U"].sort_values(by="variable"),
        pats[pats["phenotype"] == "Controls"],
    ]
)

fig = px.bar(pats,
             x="value", y="cohort", color="variable", orientation="h", 
             color_discrete_map={"hapA": "red", "hapB": "orange", "none": "green"},
             labels={"value": "Proportion", "variable": "Haplotype", "cohort": "Cohort"},
             facet_col="phenotype",
             facet_col_spacing=0,
             )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font=dict(size=36), xanchor="left"))

fig.update_layout(
    title="",
    xaxis_title="",
    xaxis2_title="",
    yaxis_title="Cohort<br>",
    title_x=0.5,
    plot_bgcolor="white",
    font=dict(size=24),
    margin=dict(l=0, r=0, b=0),
) 

# put the legend horizontal, centered, below the plotting area
fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.15,
        xanchor="center",
        x=0.5,
        title="Carrier frequency:",
    )
)

# hide x-axis labels
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(
    ticks="outside",
    tickcolor="white",  # adjust color of the tick
)

pats_vals = df[df["phenotype"] == "aFTLD-U"].copy()
pats_vals["hapA_loc"] = pats_vals["hapA_f"] / 2  # center of hapA block
pats_vals["hapB_loc"] = (
    pats_vals["hapA_f"] + pats_vals["hapB_f"] / 2
)  # center of hapB block
pats_vals["none_loc"] = (
    pats_vals["hapA_f"] + pats_vals["hapB_f"] + pats_vals["none_f"] / 2
)  # center of none block

for i, row in pats_vals.iterrows():
    fig.add_annotation(
        x=row["hapA_loc"],
        y=row["cohort"],
        text=f"{row['hapA']}",
        showarrow=False,
        font=dict(color="white", size=24),
        col=1, row=1
    )
    fig.add_annotation(
        x=row["hapB_loc"],
        y=row["cohort"],
        text=f"{row['hapB']}",
        showarrow=False,
        font=dict(color="white", size=24),
        col=1, row=1
    )
    fig.add_annotation(
        x=row["none_loc"],
        y=row["cohort"],
        text=f"{row['none']}",
        showarrow=False,
        font=dict(color="white", size = 24),
        col=1, row=1
    )

    fig.add_annotation(
        x=-0.08,
        y=row["cohort"],
        text=f"N={row['sum']}",
        font=dict(color="black", size=24),
        col=1, row=1,
        showarrow=False,
    )

con_vals = df[df["phenotype"] == "Controls"].copy()
con_vals["hapA_loc"] = con_vals["hapA_f"] / 2  # center of hapA block
con_vals["hapB_loc"] = (
    con_vals["hapA_f"] + con_vals["hapB_f"] / 2
)  # center of hapB block
con_vals["none_loc"] = (
    con_vals["hapA_f"] + con_vals["hapB_f"] + con_vals["none_f"] / 2
)  # center of none block

for i, row in con_vals.iterrows():
    fig.add_annotation(
        x=row["hapA_loc"],
        y=row["cohort"],
        text=f"{row['hapA']}",
        showarrow=True,
        arrowhead=1,
        font=dict(color="black", size=24),
        yshift=0,
        ax=-20,
        ay=-15,
        col=2, row=1
    )
    fig.add_annotation(
        x=row["hapB_loc"],
        y=row["cohort"],
        text=f"{row['hapB']}",
        showarrow=False,
        font=dict(color="white", size=24),
        textangle=-60, 
        col=2, row=1
    )
    fig.add_annotation(
        x=row["none_loc"],
        y=row["cohort"],
        text=f"{row['none']}",
        showarrow=False,
        font=dict(color="white", size=24),
        col=2, row=1
    )

    fig.add_annotation(
        x=-0.18,
        y=row["cohort"],
        text=f"N={row['sum']}",
        font=dict(color="black", size=24),
        col=2, row=1,
        showarrow=False,
    )

fig.add_shape(
    type="rect",
    x0=-0.12,
    x1=0.98,
    y0=0.8,
    y1=1,
    line=dict(color="pink", width=2, dash="dot"),
    xref="paper",
    yref="paper",
)

fig.add_annotation(x=-0.23, y="Cohort 1", yshift=20, text="GWAS Phase I", showarrow=False, font=dict(size=20, color="pink"))

fig.add_shape(
    type="rect",
    x0=-0.12,
    x1=0.98,
    y0=0.4,
    y1=0.6,
    line=dict(color="purple", width=3, dash="dot"),
    xref="paper",
    yref="paper",
)

fig.add_annotation(
    x=-0.23,
    y="Cohort 1+2",
    yshift=20,
    text="GWAS Phase II",
    showarrow=False,
    font=dict(size=20, color="purple"),
)


fig.show()

# Image 2

In [30]:
df = pd.read_excel("~/local/populations.xlsx", sheet_name="img2")
for col in ["hapA", "hapB", "none", "none2"]:
    df[f"{col}_f"] = df[col] / df["sum"]
df["none+hapB"] = df["none2_f"]
df2 = df.melt(
    id_vars=["cohort"], value_vars=["hapA_f", "hapB_f", "none_f", "none+hapB"]
).iloc[::-1]
df2["variable"] = df2["variable"].str.replace("_f", "")

# custom order the data
df2["variable"] = pd.Categorical(
    df2["variable"], categories=["hapA", "hapB", "none", "none+hapB"], ordered=True
)

fig = px.bar(
    df2,
    x="value",
    y="cohort",
    color="variable",
    orientation="h",
    color_discrete_map={
        "hapA": "red",
        "hapB": "orange",
        "none": "green",
        "none+hapB": "#022b0f",
    },
    category_orders={"variable": ["hapA", "hapB", "none", "none+hapB"]},
    labels={"value": "Proportion", "variable": "Haplotype", "cohort": "Cohort"},
)
fig.update_layout(
    title="Non-aFTLD-U populations",
    xaxis_title="",
    yaxis_title="Cohort<br>",
    title_x=0.55,
    title_xanchor="left",
    plot_bgcolor="white",
    font=dict(size=24),
    margin=dict(l=0, r=0, b=0),
    width=1200,
)

# put the legend horizontal, centered, below the plotting area
fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.15,
        xanchor="center",
        x=0.3,
        title="Carrier frequency:",
    )
)

# hide x-axis labels
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(
    ticks="outside",
    tickcolor="white",  # adjust color of the tick
)

# add annotations
df_vals = df.copy()
df_vals["hapA_loc"] = df_vals["hapA_f"] / 2  # center of hapA block
df_vals["hapB_loc"] = (
    df_vals["hapA_f"] + df_vals["hapB_f"] / 2
)  # center of hapB block
df_vals["none_loc"] = (
    df_vals["hapA_f"] + df_vals["hapB_f"] + df_vals["none_f"] / 2
)  # center of none block
df_vals["none+hapB_loc"] = (
    df_vals["hapA_f"] + df_vals["hapB_f"] + df_vals["none_f"] + df_vals["none2_f"] / 2
)  # center of none+hapB block

for i, row in df_vals.iterrows():
    fig.add_annotation(
        x=row["hapA_loc"],
        y=row["cohort"],
        text=f"{row['hapA']}",
        arrowhead=1,
        font=dict(color="black", size=24),
        yshift=0,
        ax=-20,
        ay=-15,
    )
    if not row["hapB"] == 0:
        if row["hapB"] > 10:
            fig.add_annotation(
                x=row["hapB_loc"],
                y=row["cohort"],
                text=f"{row['hapB']}",
                showarrow=False,
                font=dict(color="white", size=24),
            )
        else:
            fig.add_annotation(
                x=row["hapB_loc"],
                y=row["cohort"],
                text=f"{row['hapB']}",
                arrowhead=1,
                font=dict(color="black", size=24),
                yshift=0,
                ax=-15,
                ay=-35,
            )

    if not row["none"] == 0:
        fig.add_annotation(
            x=row["none_loc"],
            y=row["cohort"],
            text=f"{row['none']}",
            showarrow=False,
            font=dict(color="white", size=24),
        )
    if not row["none2"] == 0:
        fig.add_annotation(
            x=row["none+hapB_loc"],
            y=row["cohort"],
            text=f"{row['none2']}",
            showarrow=False,
            font=dict(color="white", size=24),
        )
    fig.add_annotation(
        x=-0.06,
        y=row["cohort"],
        text=f"N={row['sum']}",
        xanchor="right",
        font=dict(color="black", size=24),
        showarrow=False,
    )

fig.show()

# Image 3


In [20]:
df = pd.read_excel("~/local/populations.xlsx", sheet_name="img3")
for col in ["CT repeat", "Short CT repeat", "CnT repeat", "12-mer repeat", "CCCTCT repeat", "no repeat"]:
    df[f"{col}_f"] = df[col] / df["sum"]
df["C<sub>n</sub>T repeat_f"] = df["CnT repeat_f"]
df2 = df.melt(
    id_vars=["haplotype", "phenotype", "classification"],
    value_vars=[f"{v}_f" for v in ["CT repeat", "Short CT repeat", "C<sub>n</sub>T repeat", "12-mer repeat", "CCCTCT repeat", "no repeat"]],
).iloc[::-1]

df2["variable"] = df2["variable"].str.replace("_f", "")
df2["phenotype"] = pd.Categorical(
    df2["phenotype"].apply(lambda x: "non-aFTLD-U subjects" if x=="Controls" else x), categories=["aFTLD-U", "non-aFTLD-U subjects"], ordered=True
)

df2 = pd.concat([df2[df2["haplotype"] == "hapA"].sort_values("phenotype"), df2[df2["haplotype"] == "hapB"], df2[df2["haplotype"] == "hapA+B"]])

fig = px.bar(
    df2,
    x="value",
    y="classification",
    color="variable",
    orientation="h",
    color_discrete_map={
        "CT repeat": "red",
        "Short CT repeat": "pink",
        "C<sub>n</sub>T repeat": "purple",
        "12-mer repeat": "orange",
        "CCCTCT repeat": "yellow",
        "no repeat": "grey",
    },
    labels={"value": "Proportion", "variable": "Repeat", "haplotype": "Haplotype"},
    facet_col="phenotype",
    facet_col_spacing=0,
    facet_row="haplotype",
    facet_row_spacing=0.1,
    category_orders={
        "variable": [
            "CT repeat",
            "Short CT repeat",
            "CnT repeat",
            "12-mer repeat",
            "CCCTCT repeat",
            "no repeat",
        ]
    },
)

annotation_right = {
    "hapA": "hapA<br>N=19", "hapB": "hapB<br>N=27", "hapA+B": "hapA+B<br>N=46"
}

fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], font=dict(size=36)))
fig.for_each_annotation(
    lambda a: a.update(text=annotation_right[a.text] if a.text.startswith('hap') else a.text, font=dict(size=24))
)

fig.update_layout(
    title="",
    xaxis_title="",
    xaxis2_title="",
    yaxis=dict(title=dict(text="hapA+B<br>N=31<br>", font=dict(size=24))),
    yaxis2_title="",
    yaxis3=dict(title=dict(text="hapB<br>N=9<br>", font=dict(size=24))),
    yaxis4_title="",
    yaxis5=dict(title=dict(text="hapA<br>N=22<br>", font=dict(size=24))),
    title_x=0.5,
    plot_bgcolor="white",
    font=dict(size=24),
    margin=dict(l=0, r=40, b=0),
    height=600,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.25,
        xanchor="center",
        x=0.4,
        title="Repeat classification:",
    ),
)

fig.update_xaxes(showticklabels=False)

# add annotations
df_vals = df.copy()
df_vals["CT repeat_loc"] = df_vals["CT repeat_f"] / 2  # center of CT repeat block
df_vals["Short CT repeat_loc"] = (
    df_vals["CT repeat_f"] + df_vals["Short CT repeat_f"] / 2
)  # center of Short CT block
df_vals["CnT repeat_loc"] = (
    df_vals["CT repeat_f"] + df_vals["Short CT repeat_f"] + df_vals["CnT repeat_f"] / 2
)  # center of CnT repeat block
df_vals["12-mer repeat_loc"] = (
    df_vals["CT repeat_f"] + df_vals["Short CT repeat_f"] + df_vals["CnT repeat_f"] + df_vals["12-mer repeat_f"] / 2
)  # center of 12-mer repeat block
df_vals["CCCTCT repeat_loc"] = (
    df_vals["CT repeat_f"] + df_vals["Short CT repeat_f"] + df_vals["CnT repeat_f"] + df_vals["12-mer repeat_f"] + df_vals["CCCTCT repeat_f"] / 2
)  # center of CCCTCT repeat block
df_vals["no repeat_loc"] = (
    df_vals["CT repeat_f"] + df_vals["Short CT repeat_f"] + df_vals["CnT repeat_f"] + df_vals["12-mer repeat_f"] + df_vals["CCCTCT repeat_f"] + df_vals["no repeat_f"] / 2
)  # center of no repeat block

# map haplotype and phenotype to col and row
subplot_dict = {('hapA', 'aFTLD-U'): (1, 3), ('hapA', 'Controls'): (2, 3), ('hapB', 'aFTLD-U'): (1, 2), ('hapB', 'Controls'): (2, 2), ('hapA+B', 'aFTLD-U'): (1, 1), ('hapA+B', 'Controls'): (2, 1)}

for i, row in df_vals.iterrows():
    col_num, row_num = subplot_dict[(row["haplotype"], row["phenotype"])]
    if not row["CT repeat"] == 0:
        fig.add_annotation(
            x=row["CT repeat_loc"],
            y=row["classification"],
            text=f"{row['CT repeat']}",
            showarrow=False,
            font=dict(color="white", size=24),
            col=col_num, row=row_num
        )
    if not row["Short CT repeat"] == 0:
        fig.add_annotation(
            x=row["Short CT repeat_loc"],
            y=row["classification"],
            text=f"{row['Short CT repeat']}",
            showarrow=False,
            font=dict(color="black", size=24),
            col=col_num, row=row_num
        )
    if not row["CnT repeat"] == 0:
        fig.add_annotation(
            x=row["CnT repeat_loc"],
            y=row["classification"],
            text=f"{row['CnT repeat']}",
            showarrow=False,
            font=dict(color="white", size=24),
            col=col_num, row=row_num
        )
    if not row["12-mer repeat"] == 0:
        fig.add_annotation(
            x=row["12-mer repeat_loc"],
            y=row["classification"],
            text=f"{row['12-mer repeat']}",
            showarrow=False,
            font=dict(color="white", size=24),
            col=col_num, row=row_num
        )
    if not row["CCCTCT repeat"] == 0:
        fig.add_annotation(
            x=row["CCCTCT repeat_loc"],
            y=row["classification"],
            text=f"{row['CCCTCT repeat']}",
            showarrow=False,
            font=dict(color="black", size=24),
            col=col_num, row=row_num
        )
    if not row["no repeat"] == 0:
        fig.add_annotation(
            x=row["no repeat_loc"],
            y=row["classification"],
            text=f"{row['no repeat']}",
            showarrow=False,
            font=dict(color="white", size=24),
            col=col_num, row=row_num
        )


fig.show()