In [1]:
def reverse_complement(seq):
    return seq[::-1].translate(str.maketrans('ATGCN', 'TACGN'))

In [2]:
reverse_complement('ATGC')

'GCAT'

In [3]:
def longest_common_substring(str1, str2):
    for k in range(len(str1), 1, -1):
        for i in range(len(str1) -k +1):
            if str1[i:i+k] in str2:
                return str1[i:i+k]
            else:
                continue

In [6]:
longest_common_substring('ACGTGGAAAGCCA', 'GTACACACGTTTTGAGAGACAC')

'ACGT'

In [41]:
def parens_count(struc):
    """
    Ensures there are equal number of open and closed parentheses in structure.
    """
    return struc.count("(") == struc.count(")")

In [43]:
def dot_parens_to_bp(struc):
    """
    convert a dot-parens structure to a list of base pairs.
    Return False if the structure is invalid.
    """
    if not parens_count(struc):
        print("Error in input structure.")
        return False
    
    # Initialize list of open parens and list of base pair
    open_parens = []
    bps = []
    
    # Scan through string
    for i,x in enumerate(struc):
        if x == '(':
            open_parens.append(i)
        elif x == ')':
            if len(open_parens) > 0:
                bps.append((open_parens.pop(), i))
            else:
                print("Error in input structure")
                return False
            
    # Return the result a tuple
    return tuple(sorted(bps))

In [44]:
dot_parens_to_bp('..(((...)))(((((....)))).)..')

((2, 10), (3, 9), (4, 8), (11, 25), (12, 23), (13, 22), (14, 21), (15, 20))

In [45]:
def hairpin_check(bps):
    """Check to make sure no hairpins are too short."""
    for bp in bps:
        if bp[1] - bp[0] < 4:
            print("A hairpin is too short.")
            return False
    
    # Everything check out
    return True

In [46]:
def rna_ss_validator(seq, sec_struc, wobble=True):
    """Validate and RNA structure"""
    # Convert structure to base pairs
    bps = dot_parens_to_bp(sec_struc)
    
    # If this falied, the structure was invalid
    if not bps:
        return False
    
    # Do the hairpin check
    if not hairpin_check(bps):
        return False
    
    # Possible base pairs
    if wobble:
        bps_check = ('gc', 'cg', 'au', 'ua', 'gu', 'ug')
    else:
        bps_check = ('gc', 'cg', 'au', 'ua')
    
    # Check complementarity
    for bp in bps:
        bp_str = (seq[bp[0]] + seq[bp[1]]).lower()
        if bp_str not in bps_check:
            print("Invalid base pair.")
            return False
    
    return True

In [47]:
print('Should be True:')
print(rna_ss_validator('GCAUCUAUGC', '(((....)))'))
print(rna_ss_validator('GCAUCUAUGU', '(((....)))'))
print(rna_ss_validator('GCAUCUAUGU', '(.(....).)'))
print(rna_ss_validator('AUUGAUGCACGUGCAUCCCCAGCGGGUCCCGCGAGCUCACCCCCUUCCAAAAGCACCACGUGCCAGGCCUCGCCCCCGGAAGUAUACCUGUGAGCCAGA',
                       '...(((((....)))))....((((...))))..((((((...(((((....((((...))))..(((...)))...))))).......))))))....'))

print('\nShould be False:')
print(rna_ss_validator('GCAUCUACGC', '(((....)))'), '\n')
print(rna_ss_validator('GCAUCUAUGU', '(((....)))', wobble=False), '\n')
print(rna_ss_validator('GCAUCUAUGU', '(.(....)).'), '\n')
print(rna_ss_validator('GCCCUUGGCA', '(.((..))).'),'\n')
print(rna_ss_validator('AUUGAUGCACGUGCAUCCCCAGCGGGUCCCGCGAGCCCACCCCCUUCCAAAAGCACCACGUGCCAGGCCUCGCCCCCGGAAGUAUACCUGUGAGCCAGA',
                       '...(((((....)))))....((((...))))..((((((...(((((....((((...))))..(((...)))...))))).......))))))....'))

Should be True:
True
True
True
True

Should be False:
Invalid base pair.
False 

Invalid base pair.
False 

Invalid base pair.
False 

A hairpin is too short.
False 

Invalid base pair.
False
