In [22]:
class Node():
    def __init__(self, inEdges, outEdges, label):
        self.inEdges = inEdges
        self.outEdges = outEdges
        self.label = label

class DirectedGraph():
    def __init__(self,dirGraph):
        self.paths = []
        self.startNodes = []
        self.dirGraph = dirGraph

        self.nodes = self.genNodes(dirGraph)



    def genNodes(self, dirGraph):
        nodes = {} #dictionary of nodes, where the key is the nodes label and the value is the corresponding Node object
        for key,value in dirGraph.items():
            if key not in nodes:
                nodes[key] = Node([], dirGraph[key],key)
            else:
                for edge in value:
                    nodes[key].outEdges.append(edge)
            for edge in value:
                if edge not in nodes:
                    nodes[edge] = Node([key],[],edge)
                else:
                    nodes[edge].inEdges.append(key)
        return nodes


    def findEulerPath(self):
        path = []
        start = Node([],[],'')
        for node in self.nodes.values():
            if len(node.inEdges) + 1 == len(node.outEdges):
                start = node

        if start.label == '':
            quit('No Eulerian Path')

        stack = [start]
        while len(stack) > 0:
            if len(stack[-1].outEdges) > 0:
                stack.append(self.nodes[stack[-1].outEdges[0]])
                stack[-2].outEdges.pop(0)
            else:
                path.append(stack.pop(-1))
        return path[::-1]

def deBrujin(kmers):
    deBrujinDict = {}

    for kmer in kmers:
        if kmer[:-1] in deBrujinDict:
            deBrujinDict[kmer[:-1]].append(kmer[1:])
        else:
            deBrujinDict[kmer[:-1]] = [kmer[1:]]

    return deBrujinDict

def main(fName):
    if fName == '':
        return
    else:
        with open(fName) as inFile:
            lines = inFile.readlines()
            k = int(lines[0])
            kmers = []
            for line in lines[1:]:
                kmers.append(line.strip())
    dict = deBrujin(kmers)
    graph = DirectedGraph(dict)
    seqList = graph.findEulerPath()

    seq = seqList.pop(0).label
    for node in seqList:
        seq += node.label[-1]
    print(seq)

if __name__ == '__main__':
    main(fName='')

In [23]:
main('rosalind_ba3h.txt')

CCTTACCCGTGCCGTTGATTCACACGTCCCATACCCCTCGACTGTGGGGTGCCTAGTGTTGGAGTAGTAACTGGACGACAACTCCGGCCTCAAACATTCGTGTGTCGACAGTTTCTAATAGCGCCCAGATGCCCCTGATGGGTTCCTGCGAACTCAGCATGTAAGCGCCGCCATGAGTGTAGAAGCATGGGTGGCACCGTCGAGCTGGCGGTGGGTAAGGTCAACCTCATTCAATAATCACAGCTAGGAACACGGGCAAGCTGAGTTCGGGGCAGGAGTAGAGGGCGATATCCGGTCGATCCCTGCCCAGCAAATAAAAGCACTGTTCCACTCCATTAAAACGGCAGCACTGATCCGCAAGTCGATGACTACGTGAAGCGTCATGGGCTACGTGTTCGCACCCTAGAGGGATAACTAGAATACTTTTATCTCGTCGGACGGTCTAGGCCGCGTAGTGGGTATATCCGCTCCACGATCTGTACACTTACAGTCGCCCTTAATGTTAGGTAGCCCCTCGTCGTTTAGTGTCCCTTAAAGACTCATCTGGGTATGACTCTCCATTTGTCGAGCTCCACCTAACTTAGAGATCTCGGGACTACAACTATATTTTCCCTGTGTAGACAACATGTGATGCTTCCGCTTGTCCAACGGTCGACCGATATGCACAATAAGAAGACGCTGCTCTCATTTATGGACTTTCGTTTATGCCCCGCTCCGAGTGCCCTTAACTCTCGTGTGATACTCAAACCCAACTGGTGAGCTTTATGCGACCCGTTTTCGCTACAATCGGCCCTGGCCCCTGACTCATTACGGTGGGATTCCGACATTCTCACCACAGGGGGCCGAGGACCCTTTCTGGTTGCCCAGCCGGGTATGCTGTAGTAACGGTGGGTTTGAGTTATTTCTGGCTAATCCTGTACACTTTACAGGTTACCCCACAGGCGGCAAATATATGGTGGTCGCCCGCGCTCCCATGTAAAATGGTTGGGCTTCTGAACAT