Skip to content

Commit

Permalink
Update to 1.0.0
Browse files Browse the repository at this point in the history
Updated README, and restructured the console scripts
  • Loading branch information
Zachary Sethna authored and Zachary Sethna committed Jul 13, 2018
1 parent 7ff1809 commit 40c012c
Show file tree
Hide file tree
Showing 8 changed files with 1,122 additions and 1,449 deletions.
269 changes: 172 additions & 97 deletions README.md

Large diffs are not rendered by default.

782 changes: 782 additions & 0 deletions olga/compute_pgen.py

Large diffs are not rendered by default.

358 changes: 0 additions & 358 deletions olga/compute_single_sequence_pgen.py

This file was deleted.

278 changes: 130 additions & 148 deletions olga/generate_synthetic_sequences.py → olga/generate_sequences.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion olga/generation_probability.py
Expand Up @@ -564,7 +564,7 @@ def list_seqs_from_regex(self, regex_seq, print_warnings = True, raise_overload_
new_expression = [int(ex.strip('{}').split(',')[0]), default_max_reps, syms]
if new_expression[0] > new_expression[1]:
if print_warnings:
print 'Check regex syntax --- should be {min, max}'
print 'Check regex syntax --- should be {min,max}'
return []
max_num_seqs *= sum([len(syms)**n for n in range(new_expression[0], new_expression[1]+1)])/len(syms)
#print new_expression
Expand Down
841 changes: 0 additions & 841 deletions olga/run_pgen.py

This file was deleted.

34 changes: 34 additions & 0 deletions olga/utils.py
Expand Up @@ -302,6 +302,40 @@ def generate_sub_codons_right(codons_dict):

return sub_codons_right

def determine_seq_type(seq, aa_alphabet):
"""Determine the type of a sequence.
Parameters
----------
seq : str
Sequence to be typed.
aa_alphabet : str
String of all characters recoginized as 'amino acids'. (i.e. the keys
of codons_dict: aa_alphabet = ''.join(codons_dict.keys()) )
Returns
-------
seq_type : str
The type of sequence (ntseq, aaseq, regex, None) seq is.
Example
--------
>>> determine_seq_type('TGTGCCAGCAGTTCCGAAGGGGCGGGAGGGCCCTCCCTGAGAGGTCATGAGCAGTTCTTC', aa_alphabet)
'ntseq'
>>> determine_seq_type('CSARDX[TV]GNX{0,}', aa_alphabet)
'regex
"""

if all([x in 'ACGTacgt' for x in seq]):
return 'ntseq'
elif all([x in aa_alphabet for x in seq]):
return 'aaseq'
elif all([x in aa_alphabet + '[]{}0123456789,']):
return 'regex'

#%%
#If using the steady-state distribution for first nucleotide probabilities we include a function to compute it
def calc_steady_state_dist(R):
"""Calculate the steady state dist of a 4 state markov transition matrix.
Expand Down
7 changes: 3 additions & 4 deletions setup.py
Expand Up @@ -7,7 +7,7 @@ def readme():
data_files_to_include = [('', ['README.md', 'LICENSE', 'example_expanded_amino_acid_alphabet.txt'])]

setup(name='olga',
version='0.1.0',
version='1.0.0',
description='Compute generation probability of CDR3 sequences',
long_description=readme(),
url='https://github.com/zsethna/OLGA',
Expand Down Expand Up @@ -37,8 +37,7 @@ def readme():
data_files = data_files_to_include,
include_package_data=True,
entry_points = {'console_scripts': [
'olga-compute_single_sequence_pgen=olga.compute_single_sequence_pgen:main',
'olga-run_pgen=olga.run_pgen:main',
'olga-generate_synthetic_sequences=olga.generate_synthetic_sequences:main'
'olga-compute_pgen=olga.compute_pgen:main',
'olga-generate_sequences=olga.generate_sequences:main'
], },
zip_safe=False)

0 comments on commit 40c012c

Please sign in to comment.