### PhyloP max score over SV 

In [1]:
from pathlib import Path 
import pyBigWig
import numpy as np
import pandas as pd 

In [2]:
wkdir="/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3/"
wkdir_path = Path(wkdir)

phylop_scores_bw = wkdir_path.joinpath("reference/conservation/phylop/hg38.phyloP100way.bw")
sv_bed_path = wkdir_path.joinpath("5_misexp_vrnts/test_cntrl_sets/vrnt_id_in_windows_misexp_genes.bed")
out_dir = wkdir_path.joinpath("5_misexp_vrnts/scores/phylop")
out_dir.mkdir(parents=True, exist_ok=True)

In [3]:
bed_columns={0:"chrom", 1:"start", 2:"end", 3:"vrnt_id"}
sv_bed_df = pd.read_csv(sv_bed_path, sep="\t", header=None).rename(columns=bed_columns)

In [4]:
phylop_bw = pyBigWig.open(str(phylop_scores_bw))
phylop_consv_vrnt_results = {}
for index, row in sv_bed_df.iterrows():
    chrom = row['chrom']
    start = row['start']
    end = row['end']
    vrnt_id = row["vrnt_id"]
    # query BigWig file
    values = phylop_bw.values(chrom, start, end)
    if len(values) != end - start: 
        print(vrnt_id)
    # ignores NaNs
    max_value = np.nanmax(values) if values is not None else None
    # add max to dictionary 
    phylop_consv_vrnt_results[index] = [chrom, start, end, vrnt_id, max_value]
# close the BigWig file
phylop_bw.close()

  from ipykernel import kernelapp as app


In [5]:
pylop_results_columns=["chrom", "start", "end", "vrnt_id", "phylop_max"]
phylop_consv_vrnt_df = pd.DataFrame.from_dict(phylop_consv_vrnt_results, orient="index", columns=pylop_results_columns)

In [6]:
vrnt_id_in_phylop = phylop_consv_vrnt_df.vrnt_id.unique()
print(f"Number of varints annotated: {len(vrnt_id_in_phylop)}")
vrnt_id_in_phylop_nan = phylop_consv_vrnt_df[phylop_consv_vrnt_df.phylop_max.isna()]
print(f"Number of variants with no PhyloP score: {len(vrnt_id_in_phylop_nan)}")

Number of varints annotated: 20255
Number of variants with no PhyloP score: 193


In [7]:
phylop_consv_vrnt_path = out_dir.joinpath("sv_in_windows_phylop_metrics.tsv")
phylop_consv_vrnt_df.to_csv(phylop_consv_vrnt_path, sep="\t", index=False)