Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
urmi-21 committed Jan 5, 2021
1 parent ed68cce commit 8ffa22f
Showing 1 changed file with 23 additions and 16 deletions.
39 changes: 23 additions & 16 deletions case_studies/Covid_RNA-Seq/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ salmon=quant.Salmon(index="human_data/salmon_index",transcriptome=Tr,threads=15)
rule all:
input:
#expand("{wd}/{sample}/salmon_out/quant.sf",wd=DIR,sample=SRR)
expand("{wd}/results_TPM.tsv",wd=DIR)
expand("{wd}/results_TPM_tx.tsv",wd=DIR)

rule quant:
output:
Expand All @@ -37,28 +37,35 @@ rule merge:
input:
["{wd}/{sample}/salmon_out/quant.sf".format(wd=DIR,sample=s) for s in SRR]
output:
outfile="{wd}/results_TPM.tsv"
outfile="{wd}/results_TPM_tx.tsv"
run:
#read tx names
with open(input[0]) as f:
thisdata=f.read().splitlines()
thisdata=f.read().splitlines()
thisdata.pop(0)
ids=[]

names=[]
txids=[]
geneids=[]
for l in thisdata:
ids.append((l.split('\t'))[0].split('|')[0])
df=pd.DataFrame({'TranscriptID':ids})
thistx=l.split('\t')[0].split('|')[0]
thisgene=l.split('\t')[0].split('|')[1]
txids.append(thistx)
geneids.append(thisgene)
df=pd.DataFrame({'TranscriptID':txids,'GeneID':geneids})

#read files in memory
for qf in input:
name=qf.split('/')[1]
thisdata=pd.read_csv(qf,sep='\t',usecols=[3],skiprows=0)
df[name]=thisdata['TPM']
name=qf.split('/')[1]
names.append(name)
thisdata=pd.read_csv(qf,sep='\t',usecols=[3],skiprows=0)
df[name]=thisdata['TPM']


#transcript counts
df_tx=df[['TranscriptID']+names].copy()
#write to file
df.to_csv(output.outfile,sep='\t',index=False)





df_tx.to_csv(output.outfile,sep='\t',index=False)
#gene counts
df_gene=df[['GeneID']+names].copy()
df_gene = df_gene.groupby(['GeneID'],as_index = False).sum()
df_gene.to_csv(DIR+'/results_TPM_gene.tsv',sep='\t',index=False)

0 comments on commit 8ffa22f

Please sign in to comment.