In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go



In [46]:
# if rerunning this, make sure to remove the first row of the excel file, as it cannot be properly parsed
df = pd.read_excel(
    "/home/wdecoster/local/fus-assoc/FTLD-FUS cohort 26-06-2023.xlsx",
    sheet_name="Cases in cohort",
)
df = df[
    (df["AGEATONSET"] > 0) & (df["Type of case"] == "path") & (df["Path"] == "aFTLD-U")
]

df.loc[
    (df["chr15:rs148687709"] == "TT") & (df["chr15: rs549846383"] == "AT/AT"), "haplotype"
] = "none"
df.loc[df["chr15:rs148687709"].isin(["TC", "CC"]), "haplotype"] = "HapB"
df.loc[df["chr15: rs549846383"].isin(["AT/A", "A/A"]), "haplotype"] = "HapA"

# df.loc[df["Gentli_ID"] == "rr_PIDN4463", "haplotype"] = "none"

df = df[["Gentli_ID", "AGEATONSET", "AGEATDEATH", "haplotype", "SEXatBirth"]]

df["DISEASEDURATION"] = df["AGEATDEATH"] - df["AGEATONSET"]
df = df[(df["haplotype"] != "nan") & (~df["haplotype"].isna())]
df["issue"] = df["Gentli_ID"].apply(lambda x: "Y" if x == "rr_PIDN4463" else "N")

In [57]:
def age_strip_plot(variable):
    variable_map = {
        "AGEATDEATH": "Age at death",
        "AGEATONSET": "Age at onset",
        "DISEASEDURATION": "Disease duration",
    }
    fig = px.strip(
        data_frame=df.sort_values("haplotype"),
        y="haplotype",
        x=variable,
        width=600,
        height=300,
        labels={
            "AGEATDEATH": "Age at death",
            "AGEATONSET": "Age at onset",
            "DISEASEDURATION": "Disease duration",
            "SEXatBirth": "Sex",
            "Haplotype": "Risk factor",
        },
        hover_data=["Gentli_ID"],
        title=f"{variable_map[variable]} vs. haplotype",
        color="issue",
        color_discrete_map={"Y": "grey", "N": "red"},
        stripmode="overlay",
    )
    fig.update_layout(
        width=800,
        height=800,
        plot_bgcolor="rgba(0, 0, 0, 0)",
        font=dict(size=20),
        showlegend=False,
        margin=dict(l=0, r=0, t=50, b=0),
    )

    minage = df[variable].min()
    maxage = df[variable].max()

    fig.update_xaxes(
        showline=True,
        linewidth=2,
        linecolor="black",
        mirror=True,
        range=[minage - 3, maxage + 10],
    )

    fig.update_yaxes(
        showline=True,
        linewidth=2,
        linecolor="black",
        mirror=True,
    )

    fig.update_traces(marker=dict(size=12, opacity=0.5), jitter=1)

    # perform a t-test
    from scipy.stats import ttest_ind
    for hap in ["HapA", "HapB"]:
        ttest = ttest_ind(
            df[df["haplotype"] == hap][variable],
            df[df["haplotype"] == "none"][variable],
            equal_var=False,
        )

        print(f"T-test for {hap} vs none: {ttest}")
        if ttest.pvalue < 0.05:
            # draw annotation showing the p-value of major vs none
            fig.add_annotation(
                x=0.95,
                y=0.5,
                xref="paper",
                yref="paper",
                text=f"*",
                showarrow=False,
                font=dict(size=20),
            )
            fig.add_trace(
                go.Scatter(
                    x=[maxage+3, maxage+4, maxage+4, maxage+3],
                    y=["none", "none", hap, hap],
                    fill=None,
                    mode="lines",
                    showlegend=False,
                    line=dict(color="grey", width=1),
                )
            )

    # # draw annotation showing the p-value of minor vs none
    # fig.add_annotation(
    #     x=0.89,
    #     y=0.66,
    #     xref="paper",
    #     yref="paper",
    #     text=f"*",
    #     showarrow=False,
    #     font=dict(size=20),
    # )
    # fig.add_trace(
    #     go.Scatter(
    #         x=[76, 77, 77, 76],
    #         y=["none", "none", "HapB", "HapB"],
    #         fill=None,
    #         mode="lines",
    #         showlegend=False,
    #         line=dict(color="grey", width=1),
    #     )
    # )

    # add an arrow to the sample rr_PIDN4463
    # fig.add_annotation(
    #     x=df.loc[df["Gentli_ID"] == "rr_PIDN4463", "AGEATDEATH"].values[0],
    #     y=df.loc[df["Gentli_ID"] == "rr_PIDN4463", "Haplotype"].values[0],
    #     showarrow=True,
    #     arrowhead=2,
    #     arrowwidth=0.8,
    #     arrowcolor="black",
    #     ax=0,
    #     ay=-30,
    # )

    fig.show(
        config={
            "toImageButtonOptions": {
                "format": "png",
                "filename": "",
                "height": 800,
                "width": 800,
                "scale": 5,
            }
        }
    )

In [58]:
age_strip_plot("AGEATDEATH")

T-test for HapA vs none: TtestResult(statistic=2.0519224844627035, pvalue=0.04373596542507367, df=73.57712673868988)
T-test for HapB vs none: TtestResult(statistic=1.0550168399126014, pvalue=0.30394930169292916, df=20.07630855935336)


In [59]:
age_strip_plot("AGEATONSET")

T-test for HapA vs none: TtestResult(statistic=2.101784140161494, pvalue=0.03899497651926687, df=73.59301028904439)
T-test for HapB vs none: TtestResult(statistic=0.5996534118884191, pvalue=0.5556145489168454, df=19.577593105434186)


In [60]:
age_strip_plot("DISEASEDURATION")

T-test for HapA vs none: TtestResult(statistic=-0.46954780209199276, pvalue=0.6400629288465931, df=73.77808420726853)
T-test for HapB vs none: TtestResult(statistic=0.9108537749845814, pvalue=0.37675015422120717, df=15.029357169892329)


In [55]:
# make a barchart with the number of males and females in each haplotype

fig = px.bar(
    df.groupby(["haplotype", "SEXatBirth"]).size().reset_index(),
    x="haplotype",
    y=0,
    color="SEXatBirth",
    color_discrete_map={"M": "blue", "F": "red"},
    barmode="stack",
    labels={"0": "Count", "haplotype": "haplotype", "SEXatBirth": "Sex at birth"},
    title="Sex distribution in haplotypes",
)

fig.update_layout(
    width=800,
    height=800,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(size=20),
    margin=dict(l=0, r=0, t=50, b=0),
    legend=dict(
        title="Sex at birth",
        yanchor="top",
        y=0.99,
        xanchor="center",
        x=0.50,
        orientation="h",
        font=dict(size=16),
    ),
)

fig.update_xaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_yaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.show()