## Notebook to fix GWAS Endometriosis summary statistics and plot results of fGWAS analysis 

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 1. Fix summary statistics 

The file **GCST90205183_buildGRCh38.tsv** doesn’t seem to be properly tab separated, so we have to fix this first (I can’t even read it properly in python otherwise) 

Now we can read the properly tab separated file **GCST90205183_buildGRCh38_prova.tsv** in python and reformat it in a way that is compatible for fGWAS to be run

In [None]:
endo = pd.read_csv('/nfs/team292/vl6/FetalReproductiveTract/fGWAS/adult_endometrium/GCST90205183_buildGRCh38_prova.tsv', sep = '\t', header = 0)

# Chromosome column contains pseudochromosomes or however they are called, remove them 
endo['chromosome'] = endo['chromosome'].astype(str)
endo = endo[~endo['chromosome'].str.contains('_')]
np.unique(endo['chromosome'])
endo['chromosome'] = endo['chromosome'].astype(int)

# Duplicate base pair location column
endo['base_pair_location_2'] = endo['base_pair_location']

# Drop unnecessary columns 
endo = endo.drop(['effect_allele_frequency', 'p_value'], axis = 1)

# Reorder columns 
endo = endo[['chromosome', 'base_pair_location', 'base_pair_location_2', 'other_allele', 'effect_allele', 'beta', 'standard_error']]

# Sort dataframe by chromosome and base pair position
endo = endo.sort_values(by=['chromosome', 'base_pair_location'])

# Define a dictionary to map old column names to new ones
column_map = {'chromosome' : 'hm_chrom', 'base_pair_location': 'hm_pos', 'base_pair_location_2': 'hm_pos', 'other_allele': 'hm_other_allele', 'effect_allele' : 'hm_effect_allele', 'beta' : 'hm_beta', 'standard_error' : 'standard_error'}

# Rename the columns based on the dictionary
endo = endo.rename(columns=column_map)

# Save the dataframe as a TSV file without the index and header
endo.to_csv('/nfs/team292/vl6/FetalReproductiveTract/fGWAS/adult_endometrium/GCST90205183_buildGRCh38_corrected.tsv', sep='\t', index=False)


### 2. Convert to bed and tabix indexed file 