In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [82]:
# if rerunning this, make sure to remove the first row of the excel file, as it cannot be properly parsed
df = pd.read_excel(
    "/home/wdecoster/local/fus-assoc/FTLD-FUS cohort 26-06-2023.xlsx",
    sheet_name="Cases in cohort",
)
df = df[
    (df["AGEATONSET"] > 0) & (df["Type of case"] == "path") & (df["Path"] == "aFTLD-U")
]

df.loc[
    (df["chr15:rs148687709"] == "TT") & (df["chr15: rs549846383"] == "AT/AT"), "haplotype"
] = "none"
df.loc[df["chr15:rs148687709"].isin(["TC", "CC"]), "haplotype"] = "HapB"
df.loc[df["chr15: rs549846383"].isin(["AT/A", "A/A"]), "haplotype"] = "HapA"

# df.loc[df["Gentli_ID"] == "rr_PIDN4463", "haplotype"] = "none"

df = df[["Gentli_ID", "AGEATONSET", "AGEATDEATH", "haplotype"]]
df = df[(df["haplotype"] != "nan") & (~df["haplotype"].isna())]

df["issue"] = df["Gentli_ID"].apply(lambda x: "Y" if x == "rr_PIDN4463" else "N")
df["Haplotype"] = df["haplotype"].replace(
    {"HapA": "chr15q14", "none": "Unknown", "HapB": "chr15q14"}
)

In [30]:
fig = px.strip(
    data_frame=df,
    y="SEXatBirth",
    x="AGEATONSET",
    width=600,
    height=300,
    color="SEXatBirth",
    labels={"AGEATONSET": "Age at symptom onset", "SEXatBirth": "Sex"},
    hover_data=["Gentli_ID"],
    title="Age at symptom onset",
)
fig.update_layout(
    width=800,
    height=800,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(size=20),
)

fig.update_xaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_yaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_traces(marker=dict(size=12, opacity=0.5), jitter=1)

fig.show(
    config={
        "toImageButtonOptions": {
            "format": "png",
            "filename": "",
            "height": 800,
            "width": 800,
            "scale": 5,
        }
    }
)

In [53]:
df = df[df["haplotype"] != "nan"]

fig = px.strip(
    data_frame=df.sort_values("haplotype"),
    y="SEXatBirth",
    x="AGEATONSET",
    width=600,
    height=300,
    color="haplotype",
    labels={"AGEATONSET": "Age at symptom onset", "SEXatBirth": "Sex", "haplotype": "Chr15q14 haplotype"},
    hover_data=["Gentli_ID"],
    title="Age at symptom onset",
)
fig.update_layout(
    width=800,
    height=800,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(size=20),
)

fig.update_xaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_yaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_traces(marker=dict(size=12, opacity=0.5), jitter=1)

fig.show(
    config={
        "toImageButtonOptions": {
            "format": "png",
            "filename": "",
            "height": 800,
            "width": 800,
            "scale": 5,
        }
    }
)

In [77]:
df = df[df["haplotype"] != "nan"]

fig = px.strip(
    data_frame=df.sort_values("haplotype"),
    y="haplotype",
    x="AGEATONSET",
    width=600,
    height=300,
    labels={
        "AGEATONSET": "Age at symptom onset",
        "SEXatBirth": "Sex",
        "haplotype": "Chr15q14 haplotype",
    },
    hover_data=["Gentli_ID"],
    title="Age at symptom onset",
    color="issue",
    color_discrete_map={"Y": "grey", "N": "red"},
    stripmode="overlay",
)
fig.update_layout(
    width=800,
    height=800,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(size=20),
    showlegend=False,
)

minage = df["AGEATONSET"].min()

fig.update_xaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
    range=[minage-3, 80],
)

fig.update_yaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_traces(marker=dict(size=12, opacity=0.5), jitter=1)

# perform a t-test
from scipy.stats import ttest_ind

ttest_major_none = ttest_ind(
    df[df["haplotype"] == "HapA"]["AGEATONSET"],
    df[df["haplotype"] == "none"]["AGEATONSET"],
    equal_var=False,
)

print(f"T-test for major vs none: {ttest_major_none}")

ttest_minor_none = ttest_ind(
    df[df["haplotype"] == "HapB"]["AGEATONSET"],
    df[df["haplotype"] == "none"]["AGEATONSET"],
    equal_var=False,
)

print(f"T-test for minor vs none: {ttest_minor_none}")

ttest_major_minor = ttest_ind(
    df[df["haplotype"] == "HapA"]["AGEATONSET"],
    df[df["haplotype"] == "HapB"]["AGEATONSET"],
    equal_var=False,
)

print(f"T-test for major vs minor: {ttest_major_minor}")

# draw annotation showing the p-value of major vs none
fig.add_annotation(
    x=0.98,
    y=0.5,
    xref="paper",
    yref="paper",
    text=f"*",
    showarrow=False,
    font=dict(size=20),
)
fig.add_trace(go.Scatter(x=[76, 77, 77, 76], y=["none", "none", "HapA", "HapA"], fill=None, mode="lines", showlegend=False, line=dict(color="grey", width=1)))

fig.show(
    config={
        "toImageButtonOptions": {
            "format": "png",
            "filename": "",
            "height": 800,
            "width": 800,
            "scale": 5,
        }
    }
)

T-test for major vs none: TtestResult(statistic=2.101784140161494, pvalue=0.03899497651926687, df=73.59301028904439)
T-test for minor vs none: TtestResult(statistic=0.5996534118884191, pvalue=0.5556145489168454, df=19.577593105434186)
T-test for major vs minor: TtestResult(statistic=1.0747382291248397, pvalue=0.2945877519470608, df=21.17266211899469)


In [80]:
df = df[df["haplotype"] != "nan"]

fig = px.strip(
    data_frame=df.sort_values("haplotype"),
    y="haplotype",
    x="AGEATDEATH",
    width=600,
    height=300,
    labels={
        "AGEATDEATH": "Age at death",
        "SEXatBirth": "Sex",
        "haplotype": "Chr15q14 haplotype",
    },
    hover_data=["Gentli_ID"],
    title="Age at death",
    color="issue",
    color_discrete_map={"Y": "grey", "N": "red"},
    stripmode="overlay",
)
fig.update_layout(
    width=800,
    height=800,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(size=20),
    showlegend=False,
)

minage = df["AGEATDEATH"].min()

fig.update_xaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
    range=[minage - 3, 85],
)

fig.update_yaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_traces(marker=dict(size=12, opacity=0.5), jitter=1)

# perform a t-test
from scipy.stats import ttest_ind

ttest_major_none = ttest_ind(
    df[df["haplotype"] == "HapA"]["AGEATDEATH"],
    df[df["haplotype"] == "none"]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for HapA vs none: {ttest_major_none}")

ttest_minor_none = ttest_ind(
    df[df["haplotype"] == "HapB"]["AGEATDEATH"],
    df[df["haplotype"] == "none"]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for minor vs none: {ttest_minor_none}")

# now do the same minor vs none test but with rr_PIDN4463 removed
ttest_minor_none = ttest_ind(
    df[(df["haplotype"] == "HapB") & (df["issue"] == "N")]["AGEATDEATH"],
    df[(df["haplotype"] == "none") & (df["issue"] == "N")]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for minor vs none (excluding rr_PIDN4463): {ttest_minor_none}")

ttest_major_minor = ttest_ind(
    df[df["haplotype"] == "HapA"]["AGEATDEATH"],
    df[df["haplotype"] == "HapB"]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for major vs minor: {ttest_major_minor}")

ttest_chr15q14_none = ttest_ind(
    df[df["haplotype"].isin(["HapA", "HapB"])]["AGEATDEATH"],
    df[df["haplotype"] == "none"]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for HapA+HapB vs none: {ttest_chr15q14_none}")

# draw annotation showing the p-value of major vs none
fig.add_annotation(
    x=0.95,
    y=0.5,
    xref="paper",
    yref="paper",
    text=f"*",
    showarrow=False,
    font=dict(size=20),
)
fig.add_trace(
    go.Scatter(
        x=[79, 80, 80, 79],
        y=["none", "none", "HapA", "HapA"],
        fill=None,
        mode="lines",
        showlegend=False,
        line=dict(color="grey", width=1),
    )
)


# # draw annotation showing the p-value of minor vs none
# fig.add_annotation(
#     x=0.89,
#     y=0.66,
#     xref="paper",
#     yref="paper",
#     text=f"*",
#     showarrow=False,
#     font=dict(size=20),
# )
# fig.add_trace(
#     go.Scatter(
#         x=[76, 77, 77, 76],
#         y=["none", "none", "HapB", "HapB"],
#         fill=None,
#         mode="lines",
#         showlegend=False,
#         line=dict(color="grey", width=1),
#     )
# )

fig.show(
    config={
        "toImageButtonOptions": {
            "format": "png",
            "filename": "",
            "height": 800,
            "width": 800,
            "scale": 5,
        }
    }
)

T-test for HapA vs none: TtestResult(statistic=2.0519224844627035, pvalue=0.04373596542507367, df=73.57712673868988)
T-test for minor vs none: TtestResult(statistic=1.0550168399126014, pvalue=0.30394930169292916, df=20.07630855935336)
T-test for minor vs none (excluding rr_PIDN4463): TtestResult(statistic=2.0384380052561064, pvalue=0.05164733944172774, df=26.377161442909017)
T-test for major vs minor: TtestResult(statistic=0.6188170956248046, pvalue=0.5424552363303341, df=21.781277731750126)
T-test for HapA+HapB vs none: TtestResult(statistic=2.058458184281454, pvalue=0.04278715330066002, df=80.34601803828934)


In [79]:
fig = px.strip(
    data_frame=df.sort_values("Haplotype"),
    y="Haplotype",
    x="AGEATDEATH",
    width=600,
    height=300,
    labels={
        "AGEATDEATH": "Age at death",
        "SEXatBirth": "Sex",
        "Haplotype": "Risk factor",
    },
    hover_data=["Gentli_ID"],
    title="Age at death",
    color="issue",
    color_discrete_map={"Y": "grey", "N": "red"},
    stripmode="overlay",

)

# within the chr15q14 category, identify the haplotypes by changing the dots to symbols for HapA and HapB


fig.update_layout(
    width=800,
    height=800,
    plot_bgcolor="rgba(0, 0, 0, 0)",
    font=dict(size=20),
    showlegend=False,
)

minage = df["AGEATDEATH"].min()

fig.update_xaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
    range=[minage - 3, 85],
)

fig.update_yaxes(
    showline=True,
    linewidth=2,
    linecolor="black",
    mirror=True,
)

fig.update_traces(marker=dict(size=12, opacity=0.5), jitter=1)

# perform a t-test
from scipy.stats import ttest_ind

ttest = ttest_ind(
    df[df["Haplotype"] == "chr15q14"]["AGEATDEATH"],
    df[df["Haplotype"] == "Unknown"]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for carriers vs unknown: {ttest}")

# Also perform the t-test while excluding rr_PIDN4463
ttest = ttest_ind(
    df[(df["Haplotype"] == "chr15q14") & (df["issue"] == "N")]["AGEATDEATH"],
    df[(df["Haplotype"] == "Unknown") & (df["issue"] == "N")]["AGEATDEATH"],
    equal_var=False,
)

print(f"T-test for carriers vs unknown (excluding rr_PIDN4463): {ttest}")

# draw annotation showing the p-value
fig.add_annotation(
    x=0.95,
    y=0.5,
    xref="paper",
    yref="paper",
    text=f"*",
    showarrow=False,
    font=dict(size=20),
)
fig.add_trace(
    go.Scatter(
        x=[79, 80, 80, 79],
        y=["chr15q14", "chr15q14", "Unknown", "Unknown"],
        fill=None,
        mode="lines",
        showlegend=False,
        line=dict(color="grey", width=1),
    )
)

fig.show(
    config={
        "toImageButtonOptions": {
            "format": "png",
            "filename": "",
            "height": 800,
            "width": 800,
            "scale": 5,
        }
    }
)

T-test for carriers vs unknown: TtestResult(statistic=2.058458184281454, pvalue=0.04278715330066002, df=80.34601803828934)
T-test for carriers vs unknown (excluding rr_PIDN4463): TtestResult(statistic=2.2645008798629416, pvalue=0.02628160080065872, df=79.07531405253856)
