In [None]:
"""


"""
import pandas as pd
import numpy as np
import os


def extRight(seq, k, prefSet, seenSet, genSeqList):
    """
    Given:
        1. Sequence
        2. k
        3. Preference Set
        4. Set of kmers already used
        5. List to append generated sequences to
    Recursively extends a sequence to the right if the next nucleotide makes
        a kmer in the preference set and is not in the seen set. Otherwise, 
        appends to the genSeqList.
    """
    queryStart = len(seq) - k + 1
    querySeq = seq[queryStart:]
    canExt = 0
    for i in ['A', 'C', 'G', 'T']:
        if querySeq + i in prefSet and querySeq + i not in seenSet:
            canExt = 1
            seenSet.add(querySeq + i)
            extRight(seq + i, k, prefSet, seenSet, genSeqList)
    if canExt == 0:
        genSeqList.append(seq)
        
def extLeft(seq, k, prefSet, seenSet, genSeqLeftList):
    """
    Given:
        1. Sequence
        2. k
        3. Preference Set
        4. Set of kmers already used
        5. List to append generated sequences to
    Recursively extends a sequence to the left if the next nucleotide makes
        a kmer in the preference set and is not in the seen set. Otherwise, 
        appends to the genSeqLeftList.
    """
    querySeq = seq[:k-1]
    canExt = 0
    for i in ['A', 'C', 'G', 'T']:
        if i + querySeq in prefSet and i + querySeq not in seenSet:
            canExt = 1
            seenSet.add(i + querySeq)
            extLeft(i + seq, k, prefSet, seenSet, genSeqLeftList)
    if canExt == 0:
        genSeqLeftList.append(seq)

def extCaniR(seq, kSet,kDictNonSpecDiff, k=6):
    """
    Helper function for extend25.
    Given:
        1. Sequence to extend
        2. Set of kmers to use
        3. Dictionary of differences in kmer E-scores
        4. k
    Looks to extend by A,C,G, or T to the right. The one with the greatest difference
        is chosen and the sequence with the additional nucelotide is returned.
    """
    queryStart = len(seq) - k + 1
    querySeq = seq[queryStart:]
    bestCani = ''
    bestCaniPref = -0.5
    for i in ['A', 'C', 'G', 'T']:
        if querySeq + i in kSet:
            if kDictNonSpecDiff[querySeq + i] > bestCaniPref:
                bestCani = i
                bestCaniPref = kDictNonSpecDiff[querySeq + i]
    return(seq + bestCani)

def extCaniL(seq, kSet,kDictNonSpecDiff, k=6):
    """
    Helper function for extend25.
    Given:
        1. Sequence to extend
        2. Set of kmers to use
        3. Dictionary of differences in kmer E-scores
        4. k
    Looks to extend by A,C,G, or T to the left. The one with the greatest difference
        is chosen and the sequence with the additional nucelotide is returned.
    """
    querySeq = seq[:k - 1]
    bestCani = ''
    bestCaniPref = -0.5
    for i in ['A', 'C', 'G', 'T']:
        if i + querySeq in kSet:
            if kDictNonSpecDiff[i + querySeq] > bestCaniPref:
                bestCani = i
                bestCaniPref = kDictNonSpecDiff[i + querySeq]
    return(bestCani + seq)

def extend25(seq, kSet,kDictNonSpecDiff):
    """
    Greedy extension of a sequence. 
    Given a sequence to extend, a set of kmers, and a dictionary with the differences
        between conditions.
    Iterates by trying to extend left, then right. Terminates when the probe length is
        25 or if neither extCaniL or extCaniR can add an additional nucelotide.
    """
    extProbe = seq
    prevLen = len(extProbe)
    while True:
        extProbe = extCaniL(extProbe, kSet,kDictNonSpecDiff)
        if len(extProbe) == 25:
            return(extProbe)
        extProbe = extCaniR(extProbe, kSet,kDictNonSpecDiff)
        if len(extProbe) == 25:
            return(extProbe)
        if len(extProbe) == prevLen:
            return("N")
        prevLen = len(extProbe)
        
def windowSearchKmer(seq, k):
    """
    Given a sequence and a length k, returns a set of all kmers within that
        sequence.
    """
    kSet = set()
    for i in range(len(seq) - k + 1):
        kSet.add(seq[i:i+k])
    return(kSet)


def extendKmers(kmerList, prefSet):
    """
    Given a list of kmers, generates probes that are extended as far as possoble using
        the extRight and extLeft methods. Returns all unique generated probes as a list.
    """
    totalProbes = set()
    for i in kmerList:
        genSeqList = []
        ss = set() #Seen set, set of k-mers already used to generate the site
        # Extend sequence to the right
        extRight(i, 6, prefSet = prefSet, seenSet = ss, genSeqList = genSeqList)
        # Extend sequences to the left
        genSeqLeftList = []
        for j in genSeqList:
            extLeft(j, 6, prefSet = prefSet, seenSet = ss, genSeqLeftList=genSeqLeftList)
        # Update the probe
        totalProbes.update(genSeqLeftList)
    totalProbesList = list(totalProbes)
    return(totalProbesList)


def all25mersLongProbes(totalProbesList):
    """
    Given a list of probes generated by extendKmers
    For probes that are 25bp or more, add all 25mer substrings to a set and 
        return it as a list. 
    """
    all25mer = set()  
    for i in totalProbesList:
        iLen = len(i)
        if iLen >= 25:
            all25mer.update(windowSearchKmer(i, 25))  
    return(list(all25mer))


def extendShortProbes(totalProbesList, nonSpecWCSet,kDictNonSpecDiff):
    """
    Given a list of probes generated by extendKmers
    Those that are less than 25bp are extended with the extend25 function. 
    If a probe can be extended to 25bp, it is added to a set. 
    Returns the set of 25bp extended probes.
    """
    shortExtProbeSet = set()
    for probe in totalProbesList:
        if len(probe) > 6 and len(probe) < 25:
            extSeq = extend25(probe, nonSpecWCSet,kDictNonSpecDiff)
            if extSeq != "N":
                shortExtProbeSet.update(extSeq)
    return(list(shortExtProbeSet))