Skip to content

Commit

Permalink
update valid args
Browse files Browse the repository at this point in the history
  • Loading branch information
urmi-21 committed Jan 17, 2021
1 parent 5c767a2 commit 6c3f9fe
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 40 deletions.
14 changes: 7 additions & 7 deletions case_studies/Covid_RNA-Seq/Snakemake/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ rule merge:
#gene counts
df_gene=df[['GeneID']+names].copy()
df_gene = df_gene.groupby(['GeneID'],as_index = False).sum()
df_gene['GeneID']=df_gene['GeneID'].str.split('.').str[0]
#add gene metadata
md=pd.read_csv('Ens_gene_metadata.txt',sep='\t',skiprows=0)
md.rename(columns={ md.columns[0]: "GeneID" }, inplace = True)
df_gene=md.merge(df_gene, on=['GeneID'], how='right')
#reorder
df_gene = df_gene[ ['Gene name'] + [ col for col in df_gene.columns if col != 'Gene name' ] ]
df_gene['GeneID']=df_gene['GeneID'].str.split('.').str[0]
#add gene metadata
md=pd.read_csv('Ens_gene_metadata.txt',sep='\t',skiprows=0)
md.rename(columns={ md.columns[0]: "GeneID" }, inplace = True)
df_gene=md.merge(df_gene, on=['GeneID'], how='right')
#reorder
df_gene = df_gene[ ['Gene name'] + [ col for col in df_gene.columns if col != 'Gene name' ] ]
df_gene.to_csv(DIR+'/results_TPM_gene.tsv',sep='\t',index=False)
5 changes: 2 additions & 3 deletions pyrpipe/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,8 @@ def build_index(self,index_path,genome,objectid="NA"):
raise OSError("Error creating hisat2 index. Failed to create index directory.")


hisat2Buildvalid_args=['-c','--large-index','-a','-p','--bmax','--bmaxdivn','--dcv','--nodc','-r','-3','-o',
'-t','--localoffrate','--localftabchars','--snp','--haplotype','--ss','--exon',
'--seed','-q','-h','--usage','--version']
hisat2Buildvalid_args=valid_args._args_HISAT2BUILD


args=(genome,index_path)
internal_kwargs={"-p":self._threads}
Expand Down
7 changes: 3 additions & 4 deletions pyrpipe/quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def build_index(self,index_path,transcriptome,objectid="NA"):
#add positional args
internal_kwargs['--']=args

validArgsIndex=['-i','--index','-k','--kmer-size','--make-unique']
validArgsIndex=valid_args._args_KALLISTO_INDEX

kallisto_cmd=['kallisto','index']
kallisto_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs))
Expand Down Expand Up @@ -280,9 +280,8 @@ def build_index(self,index_path,transcriptome,objectid="NA"):
raise OSError("Error creating salmon index. Failed to create index directory.")


validArgsIndex=['-v','-t','--transcripts','-k','--kmerLen','-i',
'--index','--gencode','--keepDuplicates','-p','--threads','--perfectHash',
'--type','-s','--sasamp','-d','--decoys']
validArgsIndex=valid_args._args_SALMON_INDEX


internal_kwargs={"--threads":_threads,"-t":transcriptome,"-i":index_path}
#read build parameters
Expand Down
127 changes: 101 additions & 26 deletions pyrpipe/valid_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
This module contains a list of valid arguments for the tools
"""

#version 2.10.0
_args_FASTERQDUMP=['-e','-f','-t','-s','-N','-X','-a','-p','-c','-o','-O','-h','-V',
'-L','-v','-q','-b','-m','-x','-S','-3','-P','-M',
'-B','--option-file','--strict','--table','--include-technical',
'--skip-technical','--concatenate-reads']

#version 0.6.6
_args_TRIM_GALORE=['--cores','-v','-q','--phred33','--phred64','--fastqc','--fastqc_args','-a','-a2',
'--illumina','--nextera','--small_rna','--consider_already_trimmed',
'--max_length','--stringency','-e','--gzip','--dont_gzip','--length',
Expand All @@ -22,7 +23,7 @@
'--clock','--polyA','--rrbs','--non_directional','--keep','--paired','-t',
'--retain_unpaired','-r1','-r2']


#version 38.76
_args_BBDUK=['in','in2','ref','literal','touppercase','interleaved','qin','reads','copyundefined',
'samplerate','samref','out','out2','outm','outm2','outs','stats','refstats','rpkm',
'dump','duk','nzo','overwrite','showspeed','ziplevel','fastawrap','qout','statscolumns',
Expand All @@ -42,11 +43,15 @@
'entropyk','minbasefrequency','entropytrim','entropymask','entropymark','cardinality',
'cardinalityout','loglogk','loglogbuckets','-Xmx','-eoom','-da']

_args_STAR=['--parametersFiles','--sysShell','--runMode','--runThreadN','--runDirPerm','--runRNGseed','--quantMode','--quantTranscriptomeBAMcompression','--quantTranscriptomeBan','--twopassMode','--twopass1readsN',
'--genomeDir','--genomeLoad','--genomeFastaFiles','--genomeChrBinNbits','--genomeSAindexNbases','--genomeSAsparseD','--genomeSuffixLengthMax','--genomeChainFiles','--genomeFileSizes',
#STAR version 2.7.6a
_args_STAR=['--help','--parametersFiles','--sysShell','--runMode','--runThreadN','--runDirPerm','--runRNGseed','--quantMode','--outFilterIntronStrands','--alignInsertionFlush','--peOverlapMMp',
'--quantTranscriptomeBAMcompression','--quantTranscriptomeBan','--twopassMode','--twopass1readsN','--chimMultimapScoreRange','--chimNonchimScoreDropMin','--chimOutJunctionFormat',
'--genomeDir','--genomeLoad','--genomeFastaFiles','--genomeType','--genomeTransformType','--genomeTransformVCF','--outSAMtlen','--outBAMsortingBinsN','--peOverlapNbasesMin','--waspOutputMode',
'--genomeChrBinNbits','--genomeSAindexNbases','--genomeSAsparseD','--genomeSuffixLengthMax','--readFilesManifest','--readFilesPrefix','--readQualityScoreBase','--seedMapMin',
'--genomeChainFiles','--genomeFileSizes','--genomeConsensusFile','--sjdbGTFtagExonParentGeneName','--sjdbGTFtagExonParentGeneType','--varVCFfile','--readFilesType','--limitNreadsSoft','--seedSplitMin',
'--sjdbFileChrStartEnd','--sjdbGTFfile','--sjdbGTFchrPrefix','--sjdbGTFfeatureExon','--sjdbGTFtagExonParentTranscript','--sjdbGTFtagExonParentGene','--sjdbOverhang','--sjdbScore','--sjdbInsertSave',
'--inputBAMfile','--readFilesIn','--readFilesCommand','--readMapNumber','--readMatesLengthsIn','--readNameSeparator','--clip3pNbases','--clip5pNbases','--clip3pAdapterSeq','--clip3pAdapterMMp','--clip3pAfterAdapterNbases',
'--limitGenomeGenerateRAM','--limitIObufferSize','--limitOutSAMoneReadBytes','--limitOutSJoneRead','--limitOutSJcollapsed','--limitBAMsortRAM','--limitSjdbInsertNsj','--outFileNamePrefix','--outTmpDir','--outTmpKeep',
'--limitGenomeGenerateRAM','--limitIObufferSize','--limitOutSAMoneReadBytes','--limitOutSJoneRead','--limitOutSJcollapsed','--limitBAMsortRAM ','--limitSjdbInsertNsj','--outFileNamePrefix','--outTmpDir','--outTmpKeep',
'--outStd','--outReadsUnmapped','--outQSconversionAdd','--outMultimapperOrder','--outSAMtype','--outSAMmode','--outSAMstrandField','--outSAMattributes','--outSAMattrIHstart','--outSAMunmapped','--outSAMorder',
'--outSAMprimaryFlag','--outSAMreadID','--outSAMmapqUnique','--outSAMflagOR','--outSAMflagAND','--outSAMattrRGline','--outSAMheaderHD','--outSAMheaderPG','--outSAMheaderCommentFile','--outSAMfilter','--outSAMmultNmax',
'--outBAMcompression','--outBAMsortingThreadN','--bamRemoveDuplicatesType','--bamRemoveDuplicatesMate2basesN','--outWigType','--outWigStrand','--outWigReferencesPrefix','--outWigNorm','--outFilterType',
Expand All @@ -56,8 +61,11 @@
'--seedSearchStartLmax','--seedSearchStartLmaxOverLread','--seedSearchLmax','--seedMultimapNmax','--seedPerReadNmax','--seedPerWindowNmax','--seedNoneLociPerWindow','--alignIntronMin','--alignIntronMax','--alignMatesGapMax',
'--alignSJoverhangMin','--alignSJstitchMismatchNmax','--alignSJDBoverhangMin','--alignSplicedMateMapLmin','--alignSplicedMateMapLminOverLmate','--alignWindowsPerReadNmax','--alignTranscriptsPerWindowNmax','--alignTranscriptsPerReadNmax',
'--alignEndsType','--alignEndsProtrude','--alignSoftClipAtReferenceEnds','--winAnchorMultimapNmax','--winBinNbits','--winAnchorDistNbins','--winFlankNbins','--winReadCoverageRelativeMin','--winReadCoverageBasesMin',
'--chimOutType','--chimSegmentMin','--chimScoreMin','--chimScoreDropMax','--chimScoreSeparation','--chimScoreJunctionNonGTAG','--chimJunctionOverhangMin','--chimSegmentReadGapMax','--chimFilter','--chimMainSegmentMultNmax']
'--chimOutType','--chimSegmentMin','--chimScoreMin','--chimScoreDropMax','--chimScoreSeparation','--chimScoreJunctionNonGTAG','--chimJunctionOverhangMin','--chimSegmentReadGapMax','--chimFilter','--chimMainSegmentMultNmax',
'--soloCBwhitelist','--soloType','--soloCBstart','--soloCBlen','--soloUMIstart','--soloUMIlen','--soloBarcodeReadLength','--soloCBposition','--soloUMIposition','--soloAdapterSequence','--soloAdapterMismatchesNmax','--soloCBmatchWLtype',
'--soloStrand','--soloFeatures','--soloUMIdedup','--soloUMIfiltering','--soloOutFileNames','--soloCellFilter','--soloOutFormatFeaturesGeneField3','--soloClusterCBfile']

#HISAT2 version 2.2.1
_args_HISAT2=['-x','-1','-2','-U','--sra-acc','-S','-q','--qseq','-f','-r','-c','-s',
'-u','-5','-3','--phred33','--phred64','--int-quals',
'--sra-acc','--n-ceil','--ignore-quals','--nofw','--norc','--pen-cansplice',
Expand All @@ -72,7 +80,11 @@
'--met','--no-head','--no-sq','--rg-id','--rgit-sec-seq','-o','-p',
'--reorder','--mm','--qc-filter','--seed','--non-deterministic',
'--remove-chrname','--add-chrname','--version']
_args_HISAT2BUILD=['-c','--large-index','-a','-p','--bmax','--bmaxdivn','--dcv','--nodc','-r','-3','-o',
'-t','--localoffrate','--localftabchars','--snp','--haplotype','--ss','--exon',
'--repeat-ref','--repeat-info','--repeat-snp','--repeat-haplotype','--seed','-q','-h','--usage','--version']

#bowtie2 version 2.3.5.1
_args_BOWTIE2=['-x','-1','-2','-U','--interleaved','-S','-b','-q','--tab5','--tab6','--qseq','-f','-r','-F','-c','-s','-u','-5','-3',
'--trim-to','--phred33','--phred64','--int-quals','--very-fast','--fast',
'--sensitive','--very-sensitive','--very-fast-local','--fast-local',
Expand All @@ -87,10 +99,12 @@
'-p','--threads','--reorder','--mm','--qc-filter','--seed','--non-deterministic',
]

#stringtie version 2.1.4
_args_STRINGTIE=['-G','--version','--conservative','--rf','--fr','-o','-l',
'-f','-L','-m','-a','-j','-t','-c','-s','-v','-g','-M',
'-p','-A','-B','-b','-e','-x','-u','-h','--merge','-F','-T','-i']

#cufflinks version 2.2.1
_args_CUFFLINKS=['-o','--output-dir','-p','--num-threads','--seed','-G','--GTF','-g','--GTF-guide','-M','--mask-file','-b','--frag-bias-correct','-u','--multi-read-correct','--library-type','--library-norm-method',
'-m','--frag-len-mean','-s','--frag-len-std-dev','--max-mle-iterations','--compatible-hits-norm','--total-hits-norm','--num-frag-count-draws','--num-frag-assign-draws','--max-frag-multihits','--no-effective-length-correction',
'--no-length-correction','-N','--upper-quartile-norm','--raw-mapped-norm','-L','--label','-F','--min-isoform-fraction','-j','--pre-mrna-fraction','-I','--max-intron-length','-a','--junc-alpha','-A','--small-anchor-fraction',
Expand All @@ -108,23 +122,84 @@
'--normalize_by_read_set','--genome_guided_max_intron','--genome_guided_min_coverage','--genome_guided_min_reads_per_partition',
'--grid_conf','--grid_node_CPU','--grid_node_max_memory']

_args_KALLISTO=['-i','--index','-o','--output-dir','--bias','-b','--bootstrap-samples',
'--seed','--plaintext','--fusion','--single','--fr-stranded','--rf-stranded',
'-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam']

_args_SALMON=['--help-reads','-i','--index','-l','--libType','-r','--unmatedReads',
'-1','--mates1','-2','--mates2','-o','--output','--discardOrphansQuasi',
'--allowOrphansFMD','--seqBias','--gcBias','-p','--threads','--incompatPrior',
'-g','--geneMap','-z','--writeMappings','--meta','--alternativeInitMode',
'--auxDir','-c','--consistentHits','--dumpEq','-d','--dumpEqWeights',
'--fasterMapping','--minAssignedFrags','--reduceGCMemory','--biasSpeedSamp',
'--strictIntersect','--fldMax','--fldMean','--fldSD','-f','--forgettingFactor',
'-m','--maxOcc','--initUniform','-w','--maxReadOcc','--noLengthCorrection',
'--noEffectiveLengthCorrection','--noFragLengthDist','--noBiasLengthThreshold',
'--numBiasSamples','--numAuxModelSamples','--numPreAuxModelSamples','--useVBOpt',
'--rangeFactorizationBins','--numGibbsSamples','--numBootstraps','--thinningFactor',
'-q','--perTranscriptPrior','--vbPrior','--writeOrphanLinks','--writeUnmappedNames',
'-x','--quasiCoverage','--validateMappings']


_args_SAMTOOLS=None




#kallisto version 0.46.2
_args_KALLISTO_INDEX=['-i','--index','-k','--kmer-size','--make-unique']
_args_KALLISTO_QUANT=['-i','--index','-o','--output-dir','--bias','-b','--bootstrap-samples','--genomebam','--verbose',
'--seed','--plaintext','--fusion','--single','--fr-stranded','--rf-stranded','-g','-c',
'-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam','--single-overhang']
_args_KALLISTO_BUS=['-i','-o','-x','-l','-t','-b','-n','--verbose']
_args_KALLISTO_H5DUMP=['-o','--output-dir']
_args_KALLISTO_INSPECT=['-g','-G','-b']
_args_KALLISTO_MERGE=['-i','-o','--output-dir']
_args_KALLISTO_PSEUDO=['-i','--index','-o','--output-dir','-u','--umi','-b','--batch','--single','-l','--fragment-length','-s','--sd','-t','--threads']
_args_KALLISTO={}
_args_KALLISTO['index']=_args_KALLISTO_INDEX
_args_KALLISTO['quant']=_args_KALLISTO_QUANT
_args_KALLISTO['bus']=_args_KALLISTO_BUS
_args_KALLISTO['pseudo']=_args_KALLISTO_PSEUDO
_args_KALLISTO['h5dump']=_args_KALLISTO_H5DUMP
_args_KALLISTO['inspect']=_args_KALLISTO_INSPECT
_args_KALLISTO['merge']=_args_KALLISTO_MERGE



#salmon version 0.14.1
_args_SALMON_ALEVIN=['-l','-i','-r','-1','-2','-v','-h','-o','-p','--tgMap','--hash',
'--dropseq','--chromiumV3','--chromium','--gemcode','--celseq','--celseq2',
'--whitelist','--noQuant','--numCellBootstraps','--forceCells','--expectCells',
'--mrna','--rrna','--keepCBFraction','--dumpfq','--dumpBfh','--dumpUmiGraph',
'--dumpFeatures','--dumpMtx','--lowRegionMinNumBarcodes','--maxNumBarcodes']
_args_SALMON_INDEX=['-v','--version','-h','--help','-t','--transcripts','-k','--kmerLen','-i',
'--index','--gencode','--keepDuplicates','-p','--threads','--perfectHash',
'--type','-d','--decoys']
_args_SALMON_QUANT=['--help-reads','-i','--index','-l','--libType','-r','--unmatedReads',
'-1','--mates1','-2','--mates2','-o','--output','--discardOrphansQuasi',
'--allowOrphansFMD','--seqBias','--gcBias','-p','--threads','--incompatPrior',
'-g','--geneMap','-z','--writeMappings','--meta','--alternativeInitMode',
'--auxDir','-c','--consistentHits','--dumpEq','-d','--dumpEqWeights',
'--fasterMapping','--minAssignedFrags','--reduceGCMemory','--biasSpeedSamp',
'--strictIntersect','--fldMax','--fldMean','--fldSD','-f','--forgettingFactor',
'-m','--maxOcc','--initUniform','-w','--maxReadOcc','--noLengthCorrection',
'--noEffectiveLengthCorrection','--noFragLengthDist','--noBiasLengthThreshold',
'--numBiasSamples','--numAuxModelSamples','--numPreAuxModelSamples','--useVBOpt',
'--rangeFactorizationBins','--numGibbsSamples','--numBootstraps','--thinningFactor',
'-q','--perTranscriptPrior','--vbPrior','--writeOrphanLinks','--writeUnmappedNames',
'-x','--quasiCoverage','--validateMappings','--consensusSlack','--minScoreFraction',
'--maxMMPExtension','--ma','--mp','--go','--ge','--bandwidth','--allowDovetail','--recoverOrphans',
'--mimicBT2','--mimicStrictBT2','--hardFilter','--skipQuant','--useEM','--noGammaDraw',
'--bootstrapReproject','--sigDigits']
_args_SALMON_QUANTMERGE=['--quants','--names','-c','--column','-o','--output','--genes','--missing']

_args_SALMON={}
_args_SALMON['index']=_args_SALMON_INDEX
_args_SALMON['quant']=_args_SALMON_QUANT
_args_SALMON['alevin']=_args_SALMON_ALEVIN
_args_SALMON['quantmerge']=_args_SALMON_QUANTMERGE


#samtools version 1.9
_args_SAMTOOLS_SORT=['-l','-m','-n','-t','-o','-T','-O','-@','--input-fmt-option','--output-fmt','--output-fmt-option','--reference','--threads']
_args_SAMTOOLS_VIEW=['-b','-C','-1','-u','-h','-H','-c','-o','-U','-t','-L','-r','-R','-q','-l','-m','-f','-F','-G','-s','-M','-x','-B','-?','-S','O','T','@',
'--input-fmt-option','--output-fmt','--output-fmt-option','--reference','--threads']
_args_SAMTOOLS_MERGE=['-n','-t','-r','-u','-f','-1','-l','-R','-h','-c','-p','-s','-b','-O','-@','--input-fmt-option',
'--output-fmt','--output-fmt-option','--reference','--threads']
_args_SAMTOOLS={}
_args_SAMTOOLS['sort']=_args_SAMTOOLS_SORT
_args_SAMTOOLS['view']=_args_SAMTOOLS_VIEW
_args_SAMTOOLS['merge']=_args_SAMTOOLS_MERGE












0 comments on commit 6c3f9fe

Please sign in to comment.