Update to 1.0.0

Updated README, and restructured the console scripts
statbiophys · Jul 13, 2018 · 40c012c · 40c012c
1 parent 7ff1809
commit 40c012c
Show file tree

Hide file tree

Showing 8 changed files with 1,122 additions and 1,449 deletions.
diff --git a/README.md b/README.md
diff --git a/olga/compute_pgen.py b/olga/compute_pgen.py
diff --git a/olga/compute_single_sequence_pgen.py b/olga/compute_single_sequence_pgen.py
diff --git a/olga/generate_synthetic_sequences.py → olga/generate_sequences.py b/olga/generate_synthetic_sequences.py → olga/generate_sequences.py
diff --git a/olga/generation_probability.py b/olga/generation_probability.py
@@ -564,7 +564,7 @@ def list_seqs_from_regex(self, regex_seq, print_warnings = True, raise_overload_
                         new_expression = [int(ex.strip('{}').split(',')[0]), default_max_reps, syms]
                     if new_expression[0] > new_expression[1]:
                         if print_warnings:
-                            print 'Check regex syntax --- should be {min, max}'
+                            print 'Check regex syntax --- should be {min,max}'
                         return []
                     max_num_seqs *= sum([len(syms)**n for n in range(new_expression[0], new_expression[1]+1)])/len(syms)
                 #print new_expression

diff --git a/olga/run_pgen.py b/olga/run_pgen.py
diff --git a/olga/utils.py b/olga/utils.py
@@ -302,6 +302,40 @@ def generate_sub_codons_right(codons_dict):
 
     return sub_codons_right        
 
+def determine_seq_type(seq, aa_alphabet):
+    """Determine the type of a sequence.
+    
+    Parameters
+    ----------
+
+    seq : str
+        Sequence to be typed.
+    aa_alphabet : str
+        String of all characters recoginized as 'amino acids'. (i.e. the keys
+        of codons_dict: aa_alphabet = ''.join(codons_dict.keys())  )
+
+    Returns
+    -------
+    seq_type : str
+        The type of sequence (ntseq, aaseq, regex, None) seq is.
+    
+    Example
+    --------
+    >>> determine_seq_type('TGTGCCAGCAGTTCCGAAGGGGCGGGAGGGCCCTCCCTGAGAGGTCATGAGCAGTTCTTC', aa_alphabet)
+    'ntseq'
+    >>> determine_seq_type('CSARDX[TV]GNX{0,}', aa_alphabet)
+    'regex
+    
+    """
+
+    if all([x in 'ACGTacgt' for x in seq]):
+        return 'ntseq'
+    elif all([x in aa_alphabet for x in seq]):
+        return 'aaseq'
+    elif all([x in aa_alphabet + '[]{}0123456789,']):
+        return 'regex'
+
+#%%
 #If using the steady-state distribution for first nucleotide probabilities we include a function to compute it
 def calc_steady_state_dist(R):
     """Calculate the steady state dist of a 4 state markov transition matrix.

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@ def readme():
 data_files_to_include = [('', ['README.md', 'LICENSE', 'example_expanded_amino_acid_alphabet.txt'])]
 
 setup(name='olga',
-      version='0.1.0',
+      version='1.0.0',
       description='Compute generation probability of CDR3 sequences',
       long_description=readme(),
       url='https://github.com/zsethna/OLGA',
@@ -37,8 +37,7 @@ def readme():
       data_files = data_files_to_include,
       include_package_data=True,
       entry_points = {'console_scripts': [
-            'olga-compute_single_sequence_pgen=olga.compute_single_sequence_pgen:main',
-            'olga-run_pgen=olga.run_pgen:main',
-            'olga-generate_synthetic_sequences=olga.generate_synthetic_sequences:main'
+            'olga-compute_pgen=olga.compute_pgen:main',
+            'olga-generate_sequences=olga.generate_sequences:main'
             ], },
       zip_safe=False)