## Plot results of fGWAS analysis 

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 3. Run fGWAS analysis

### 4. Plot output of fGWAS 

This currently plots all the cell type, consider removing cell types whose log(OR) is negative (as done by Teichmann lab and in their gut paper https://www.nature.com/articles/s41586-021-03852-1/figures/19) 

In [None]:
res = pd.read_csv('/nfs/team292/vl6/FetalReproductiveTract/fGWAS/adult_endometrium/GCST90205183/forest_GCST90205183_noatac.tsv', sep = '\t')
res.head()

In [None]:
# Order by increasing FDR 
res = res.sort_values(by='FDR', ascending = False)

# Columns are off by one
res = res.reset_index().rename(columns={'index': 'celltype'})
res.head()

In [None]:
# Create the forest plot
fig, ax = plt.subplots(figsize = (8, 16))
plt.rcParams['font.size'] = 14
y_pos = range(len(res))
ax.scatter(res['log OR'], y_pos, marker='o', color='black', s=30)
ax.hlines(y_pos, res['CI (lower 95%)'], res['CI (upper 95%)'], color='black')

# Color the confidence intervals based on FDR
colors = ['orange' if x < 0.001 else 'darkgray' for x in res['FDR']]
for i, (lo, hi) in enumerate(zip(res['CI (lower 95%)'], res['CI (upper 95%)'])):
    ax.plot([lo, hi], [i, i], color=colors[i], lw=3)

# Set the y-axis labels to the study names
ax.set_yticks(y_pos)
ax.set_yticklabels(res['celltype'], fontsize=15)
ax.set_ylabel('Cell type', fontsize=18)

# Add a vertical line at the null value
ax.axvline(x=0, color='black', linestyle='--')

# Set the x-axis limits and label
#ax.set_xlim([-3, 3])
ax.set_xlabel('log(OR)', fontsize=18)

# Add a title
plt.title('Forest Plot (FDR < 0.001 in orange)', fontsize=20)

plt.show()
fig.savefig('ForestPlotEndometriosis_FDR0point001.pdf', bbox_inches='tight')

In [None]:
# Create the forest plot
fig, ax = plt.subplots(figsize = (8, 16))
plt.rcParams['font.size'] = 14
y_pos = range(len(res))
ax.scatter(res['log OR'], y_pos, marker='o', color='black', s=30)
ax.hlines(y_pos, res['CI (lower 95%)'], res['CI (upper 95%)'], color='black')

# Color the confidence intervals based on FDR
colors = ['orange' if x < 0.05 else 'darkgray' for x in res['FDR']]
for i, (lo, hi) in enumerate(zip(res['CI (lower 95%)'], res['CI (upper 95%)'])):
    ax.plot([lo, hi], [i, i], color=colors[i], lw=3)

# Set the y-axis labels to the study names
ax.set_yticks(y_pos)
ax.set_yticklabels(res['celltype'], fontsize=15)
ax.set_ylabel('Cell type', fontsize=18)

# Add a vertical line at the null value
ax.axvline(x=0, color='black', linestyle='--')

# Set the x-axis limits and label
#ax.set_xlim([-3, 3])
ax.set_xlabel('log(OR)', fontsize=18)

# Add a title
plt.title('Forest Plot (FDR < 0.05 in orange)', fontsize=20)

plt.show()
fig.savefig('ForestPlotEndometriosis_FDR0point05.pdf', bbox_inches='tight')