# Drosophila melanogaster polymorphism

This notebook reports the subseting of Zambia (DPGP3) functiona (synonymous and non-synonymous SNPs).

In [2]:
# Import python libraries
import pandas as pd

In [3]:
# Upload the .tsv files with pd.read_table()
ZIchr2L_table = pd.read_table("ZI_chr2L_ann_table.tsv")
ZIchr2R_table = pd.read_table("ZI_chr2R_ann_table.tsv")
ZIchr3L_table = pd.read_table("ZI_chr3L_ann_table.tsv")
ZIchr3R_table = pd.read_table("ZI_chr3R_ann_table.tsv")

In [5]:
# Subset to retain only SNPs annotated as introns
ZIchr2L_exons = ZIchr2L_table[(ZIchr2L_table['effect'] == "NON_SYNONYMOUS_CODING") | (ZIchr2L_table['effect'] == "SYNONYMOUS_CODING")]
ZIchr2R_exons = ZIchr2R_table[(ZIchr2R_table['effect'] == "NON_SYNONYMOUS_CODING") | (ZIchr2R_table['effect'] == "SYNONYMOUS_CODING")]
ZIchr3L_exons = ZIchr3L_table[(ZIchr3L_table['effect'] == "NON_SYNONYMOUS_CODING") | (ZIchr3L_table['effect'] == "SYNONYMOUS_CODING")]
ZIchr3R_exons = ZIchr3R_table[(ZIchr3R_table['effect'] == "NON_SYNONYMOUS_CODING") | (ZIchr3R_table['effect'] == "SYNONYMOUS_CODING")]


In [7]:
# Take a look at the number of SNPs with .shape
"chr2L = {}, chr2R = {}, chr3L = {}, chr3R = {} SNPs!".format(ZIchr2L_exons.shape[0], ZIchr2R_exons.shape[0], ZIchr3L_exons.shape[0], ZIchr3R_exons.shape[0])

'chr2L = 258554, chr2R = 238496, chr3L = 228226, chr3R = 272780 SNPs!'

In [8]:
# Take a look at the first 5 rows
ZIchr2L_exons.head()

Unnamed: 0,chrom,pos,id,ref,alt,refcount,altcount,refflank,altflank,refcodon,...,snpeff_trnscid,sift_trnscid,sift_geneid,sift_genename,sift_region,sift_vartype,sifts_core,sift_median,sift_pred,deleteriousness
228,chr2L,11414,.,T,G,176,5,CTTTTCGAT,CTTTGCGAT,Aaa,...,FBtr0078170,FBtr0078167,FBgn0002121,l(2)gl,CDS,NONSYNONYMOUS,0.535,3.05,TOLERATED,tolerated
229,chr2L,11464,.,A,G,183,1,ACTCATGTT,ACTCGTGTT,aTg,...,FBtr0078170,FBtr0078167,FBgn0002121,l(2)gl,CDS,NONSYNONYMOUS,0.527,2.99,TOLERATED,tolerated
230,chr2L,11479,.,C,G,177,10,AGTACTGGC,AGTAGTGGC,aGt,...,FBtr0078170,FBtr0078166,FBgn0002121,l(2)gl,CDS,NONSYNONYMOUS,0.951,3.0,TOLERATED,tolerated
231,chr2L,11513,.,G,A,182,3,GATAGGCGA,GATAAGCGA,Cta,...,FBtr0078170,FBtr0078166,FBgn0002121,l(2)gl,CDS,SYNONYMOUS,0.828,3.01,TOLERATED,
236,chr2L,11824,.,T,A,180,2,GTCATATTT,GTCAAATTT,Atg,...,FBtr0078170,FBtr0330655,FBgn0002121,l(2)gl,CDS,NONSYNONYMOUS,0.287,2.96,TOLERATED,tolerated


In [9]:
# Subset columns
columns_to_subset = ["chrom", "pos", "ref", "alt", "refcount", "altcount", "refcodon", "altcodon", "effect", "sift_geneid", "sift_genename"]
ZIchr2L_exons_subset = ZIchr2L_exons[columns_to_subset]
ZIchr2R_exons_subset = ZIchr2R_exons[columns_to_subset]
ZIchr3L_exons_subset = ZIchr3L_exons[columns_to_subset]
ZIchr3R_exons_subset = ZIchr3R_exons[columns_to_subset]

In [18]:
# Export tables
%cd ../data

ZIchr2L_exons_subset.to_csv("Zambia_chr2L_exons_subset.csv", sep="\t")
ZIchr2R_exons_subset.to_csv("Zambia_chr2R_exons_subset.csv", sep="\t")
ZIchr3L_exons_subset.to_csv("Zambia_chr3L_exons_subset.csv", sep="\t")
ZIchr3R_exons_subset.to_csv("Zambia_chr3R_exons_subset.csv", sep="\t")