### Construct the De Bruijn Graph of a String

https://rosalind.info/problems/ba3d/

In [4]:
def DeBruijnGraphFromAString(k, text):
    d = {}
    for i in range(len(text) - k + 1):
        kmer = text[i:i + k]
        prefix = kmer[:-1]
        suffix = kmer[1:]
        if prefix in d:
            d[prefix].append(suffix)
        else:
            d[prefix] = [suffix]
    return d

In [5]:
with open("data/rosalind_ba3d.txt") as f:
    k = int(f.readline().strip())
    text = f.readline().strip()
    result = DeBruijnGraphFromAString(k, text)
    for key in sorted(result.keys()):
        print(f"{key} -> {','.join(result[key])}")  

AAAAAGTACAG -> AAAAGTACAGC
AAAACAAGAGA -> AAACAAGAGAG
AAAACGACCAG -> AAACGACCAGT
AAAAGTACAGC -> AAAGTACAGCT
AAAATAAAACG -> AAATAAAACGA
AAAATATTGCT -> AAATATTGCTA
AAAATCAGCGA -> AAATCAGCGAT
AAAATTGACCC -> AAATTGACCCG
AAACAAGAGAG -> AACAAGAGAGC
AAACATTATTA -> AACATTATTAA
AAACCTCATGA -> AACCTCATGAA
AAACCTCATGT -> AACCTCATGTT
AAACCTTCGTC -> AACCTTCGTCA
AAACGACCAGT -> AACGACCAGTA
AAACGACCTCG -> AACGACCTCGG
AAACGTATAGC -> AACGTATAGCC
AAACGTCACAC -> AACGTCACACG
AAACTCGTATT -> AACTCGTATTA
AAAGAATTTTG -> AAGAATTTTGC
AAAGCGAAAAC -> AAGCGAAAACA
AAAGCGAAGCA -> AAGCGAAGCAA
AAAGGACCTGA -> AAGGACCTGAT
AAAGGCCCCGT -> AAGGCCCCGTT
AAAGTACAGCT -> AAGTACAGCTT
AAAGTGTAAAC -> AAGTGTAAACC
AAATAAAACGA -> AATAAAACGAC
AAATAAGCATT -> AATAAGCATTT
AAATAGTCCTG -> AATAGTCCTGC
AAATATTGCTA -> AATATTGCTAA
AAATCAGCGAT -> AATCAGCGATG
AAATGACCTAG -> AATGACCTAGA
AAATTATCTGT -> AATTATCTGTC
AAATTGACCCG -> AATTGACCCGA
AACAAGAGAGC -> ACAAGAGAGCA
AACAAGGCTCA -> ACAAGGCTCAC
AACACATACCT -> ACACATACCTC
AACAGGACTTC -> ACAGGACTTCC
A

### Construct the De Bruijn Graph of a Collection of k-mers

https://rosalind.info/problems/ba3e/

In [6]:
def DeBruijnGraphFromKmers(kmers):
    d = {}
    for kmer in kmers:
        prefix = kmer[:-1]
        suffix = kmer[1:]
        if prefix in d:
            d[prefix].append(suffix)
        else:
            d[prefix] = [suffix]
    return d

In [7]:
with open("data/rosalind_ba3e.txt") as f:
    kmers = [line.strip() for line in f.readlines()]
    result = DeBruijnGraphFromKmers(kmers)
    for key in sorted(result.keys()):
        print(f"{key} -> {','.join(result[key])}")

AAAAAGTCTCTTATTACAC -> AAAAGTCTCTTATTACACG
AAAACCATAGTGCAAGGTA -> AAACCATAGTGCAAGGTAT
AAAACGCAAATAGGCTATC -> AAACGCAAATAGGCTATCG
AAAACTAAAAGAACTATCC -> AAACTAAAAGAACTATCCC
AAAACTGGGTCCTCTCCAC -> AAACTGGGTCCTCTCCACT
AAAAGAACTATCCCCAGTA -> AAAGAACTATCCCCAGTAA
AAAAGAACTATCCCCCATT -> AAAGAACTATCCCCCATTG
AAAAGAACTATCCCCGAAA -> AAAGAACTATCCCCGAAAT
AAAAGAACTATCCCCTCAA -> AAAGAACTATCCCCTCAAC
AAAAGGATTATAGAGGGCT -> AAAGGATTATAGAGGGCTT
AAAAGTCTCTTATTACACG -> AAAGTCTCTTATTACACGT
AAAATACGGTGTTTATAAG -> AAATACGGTGTTTATAAGC
AAAATGCGATGTGCAGTCT -> AAATGCGATGTGCAGTCTC
AAAATTCTACTTCGTGCAG -> AAATTCTACTTCGTGCAGA
AAACAAACGATCGGTAACT -> AACAAACGATCGGTAACTG
AAACACTCACGGATAAAAA -> AACACTCACGGATAAAAAG
AAACAGTAATCATCCGCAG -> AACAGTAATCATCCGCAGC
AAACCATAGTGCAAGGTAT -> AACCATAGTGCAAGGTATT
AAACCGTAAGTCGGGTACT -> AACCGTAAGTCGGGTACTC
AAACCGTCGGCAGAATAGG -> AACCGTCGGCAGAATAGGA
AAACGACGGTTCATCACAT -> AACGACGGTTCATCACATA
AAACGATCGGTAACTGACC -> AACGATCGGTAACTGACCT
AAACGCAAATAGGCTATCG -> AACGCAAATAGGCTATCGC
AAACGCGGGTT

### Find an Eulerian Cycle in a Graph

https://rosalind.info/problems/ba3f/

In [8]:
def EulerianCycle(graph):
    # Create a copy of the graph to manipulate
    g = {node: edges[:] for node, edges in graph.items()}
    # Start from any node with outgoing edges
    start_node = next(node for node in g if g[node])
    cycle = []
    stack = [start_node]

    while stack:
        current_node = stack[-1]
        if g[current_node]:
            next_node = g[current_node].pop()
            stack.append(next_node)
        else:
            cycle.append(stack.pop())

    return cycle[::-1]  # Return reversed cycle

In [9]:
with open("data/rosalind_ba3f.txt") as f:
    graph = {}
    for line in f:
        node, edges = line.strip().split(" -> ")
        graph[node] = edges.split(",")
    cycle = EulerianCycle(graph)
    print("->".join(cycle))

0->4->60->58->649->651->650->2080->2081->2082->2419->2420->2421->2082->650->1742->1741->2550->2549->2548->1741->1743->650->58->507->505->765->764->2299->2301->2300->764->763->2207->2206->2208->763->505->1774->1776->1775->505->506->1576->1577->2048->2047->2049->1577->1578->506->58->273->978->976->977->273->2942->2941->2943->273->271->305->306->304->271->272->803->1098->1097->1096->803->802->1068->1066->1067->802->804->1476->1474->1475->804->272->58->59->4->134->231->301->303->2872->2873->2874->303->1801->1802->1803->303->302->231->1441->1443->1442->231->229->1012->1369->1370->1371->1012->1013->1014->1558->1876->2849->2848->2850->1876->1878->1877->1558->1559->1560->1014->229->230->673->1832->2741->2742->2740->1832->1831->1833->673->674->1452->1451->1450->674->675->1136->1137->1135->1707->1706->2079->2077->2078->1706->1705->1135->675->230->134->224->364->2070->2069->2068->364->366->732->2909->2910->2908->732->730->731->366->365->1262->1261->1263->1399->1401->2928->2926->2927->1401->1400->

### Find an Eulerian Path in a Graph

https://rosalind.info/problems/ba3g/

In [10]:
def EulerianPath(graph):
    # Create a copy of the graph to manipulate
    g = {node: edges[:] for node, edges in graph.items()}
    
    # Calculate in-degrees and out-degrees
    in_degree = {}
    out_degree = {}
    for node in g:
        out_degree[node] = len(g[node])
        for neighbor in g[node]:
            in_degree[neighbor] = in_degree.get(neighbor, 0) + 1
            if neighbor not in out_degree:
                out_degree[neighbor] = 0
    for node in in_degree:
        if node not in out_degree:
            out_degree[node] = 0

    # Find start and end nodes
    start_node = None
    end_node = None
    for node in set(in_degree.keys()).union(set(out_degree.keys())):
        out_diff = out_degree.get(node, 0) - in_degree.get(node, 0)
        if out_diff == 1:
            start_node = node
        elif out_diff == -1:
            end_node = node

    # If no explicit start node, pick any node with outgoing edges
    if not start_node:
        start_node = next(node for node in g if g[node])

    # Add an edge from end_node to start_node to make it Eulerian
    if end_node:
        if end_node in g:
            g[end_node].append(start_node)
        else:
            g[end_node] = [start_node]

    # Find Eulerian cycle in the modified graph
    cycle = EulerianCycle(g)

    # Find the position of the added edge to split the cycle into a path
    if end_node:
        for i in range(len(cycle) - 1):
            if cycle[i] == end_node and cycle[i + 1] == start_node:
                return cycle[i + 1:] + cycle[1:i + 1]
    
    return cycle

In [11]:
with open("data/rosalind_ba3g.txt") as f:
    graph = {}
    for line in f:
        node, edges = line.strip().split(" -> ")
        graph[node] = edges.split(",")
    path = EulerianPath(graph)
    print("->".join(path))

37->39->375->968->969->967->1635->1634->1726->1728->1727->1634->1633->967->375->1175->1176->1174->375->373->1022->2613->2611->2612->1022->1023->2776->2777->2778->1023->2103->2101->2102->1023->1021->2112->2111->2110->1021->1042->1043->1070->1071->1069->1043->1044->1021->373->374->432->1034->1035->1033->432->430->431->374->1063->1064->1065->374->39->38->4->31->2738->2737->2739->31->229->231->733->1230->1228->1229->2055->2054->2053->1229->733->735->734->231->230->905->1700->1699->1701->905->904->906->230->31->116->575->753->752->785->784->786->752->751->575->574->576->116->2764->2766->2765->116->115->308->307->309->1600->1602->1601->309->115->117->485->484->1733->1732->1734->484->1491->1490->1489->484->486->117->31->32->143->144->142->660->658->659->142->367->970->972->971->367->369->1111->1112->1113->369->368->825->823->979->980->981->823->2133->2500->2502->2501->2133->2140->2142->2141->2133->2131->2132->823->1395->1394->1393->2156->2155->2157->1393->823->824->368->142->194->493->758->25

### Reconstruct a String from its k-mer Composition

https://rosalind.info/problems/ba3h/