### Find the Minimum Number of Coins Needed to Make Change

https://rosalind.info/problems/ba5a

In [1]:
def TheChangeProblem(coins, amount):
    # Initialize a list to store the minimum coins needed for each amount
    dp = [float('inf')] * (amount + 1)
    dp[0] = 0  # Base case: 0 coins are needed to make amount 0

    # Iterate through each coin
    for coin in coins:
        # Update the dp array for all amounts that can be reached with the current coin
        for x in range(coin, amount + 1):
            dp[x] = min(dp[x], dp[x - coin] + 1)

    # If dp[amount] is still infinity, it means it's not possible to make that amount
    return dp[amount] if dp[amount] != float('inf') else -1

In [4]:
with open('data/rosalind_ba5a.txt') as f:
    lines = f.readlines()
    amount = int(lines[0].strip())
    coins = list(map(int, lines[1].strip().split(',')))
    result = TheChangeProblem(coins, amount)
    print(result)

770


### Find the Length of a Longest Path in a Manhattan-like Grid

https://rosalind.info/problems/ba5b

In [5]:
def ManhattanTourist(n, m, Down, Right):
    # 初始化 DP 表
    s = [[0] * (m + 1) for _ in range(n + 1)]
    
    # 初始化第一列：只能往下走
    for i in range(1, n + 1):
        s[i][0] = s[i - 1][0] + Down[i - 1][0]
    
    # 初始化第一行：只能往右走
    for j in range(1, m + 1):
        s[0][j] = s[0][j - 1] + Right[0][j - 1]
    
    # 填充 DP 表
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            s[i][j] = max(
                s[i - 1][j] + Down[i - 1][j],   # 从上往下
                s[i][j - 1] + Right[i][j - 1]   # 从左往右
            )
    
    return s[n][m]


In [6]:
def read_rosalind_ba5b(filename):
    with open(filename) as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]
    
    n, m = map(int, lines[0].split())
    
    # 找到 "-" 的分割行
    sep_index = lines.index('-')
    down_lines = lines[1:sep_index]
    right_lines = lines[sep_index + 1:]
    
    Down = [list(map(int, row.split())) for row in down_lines]
    Right = [list(map(int, row.split())) for row in right_lines]
    
    return n, m, Down, Right


def main():
    n, m, Down, Right = read_rosalind_ba5b('data/rosalind_ba5b.txt')
    result = ManhattanTourist(n, m, Down, Right)
    print(result)


if __name__ == "__main__":
    main()


85


### Find a Longest Common Subsequence of Two Strings

https://rosalind.info/problems/ba5c

In [9]:
def LongestCommonSubsequence(v, w):
    n, m = len(v), len(w)
    
    # 创建 DP 表
    s = [[0] * (m + 1) for _ in range(n + 1)]
    
    # 填充 DP 表
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            if v[i - 1] == w[j - 1]:
                s[i][j] = s[i - 1][j - 1] + 1
            else:
                s[i][j] = max(s[i - 1][j], s[i][j - 1])
    
    # 回溯得到实际序列
    lcs = []
    i, j = n, m
    while i > 0 and j > 0:
        if v[i - 1] == w[j - 1]:
            lcs.append(v[i - 1])
            i -= 1
            j -= 1
        elif s[i - 1][j] >= s[i][j - 1]:
            i -= 1
        else:
            j -= 1
    
    return ''.join(reversed(lcs))


In [11]:
def read_rosalind_ba5c(filename):
    with open(filename) as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]
    return lines[0], lines[1]


def main():
    v, w = read_rosalind_ba5c('data/rosalind_ba5c.txt')
    result = LongestCommonSubsequence(v, w)
    print(result)


if __name__ == "__main__":
    main()

GAACTGTCCTATTAGTACGGATATAGCACGGTCCTCGACTTTTTCAAAACTAAACAGGAAAGCCAGCGTCGTTTGGGTGTTGACATGCCGTATCCGTAACAGGAATTAGCCGGGCGTCTCGAAGTCTTTGCGACAGGGAATATTTTTAAGGAGCGGGAGGTCACCCCCGTAGGAGTGGGGTTCTAACCGTAATCCCACTCAAGACGTTTCATCAGGAAATCGCCCCTGGGAATCGTGTAACGGATTCGGTACTGTAGTTAGATCTAAGGGGACCCACGTGCAGCCGTGATCGCCAAGCTTTGACGGGCGATGTTTCCATGCAAGGAAAAGACTCCCTTCCAGTCGAGAGGCCGCGCGCGCAACCTTAGTTTAACTCTTGGAATTTCCAGAACGCGACGTGGGGGAAGCACATGCAACCAGGTTCCTCCATTGTAATGGAGGGGCACCCATAGACCCTGTGGGCAAGTTTGACCTAGGACACTGTACGCAACCCGTTCCTCGGGGAAACCCCTTAGACCCAGCTATATTTTGGTCTCCTATGAAGTAGACCCCCCTTTACTAGGGCCGTTTACCCCGCAAAGCCCGCGGATAAACACA


### Find the Longest Path in a DAG （Directed Acyclic Graph）

https://rosalind.info/problems/ba5d

In [12]:
from collections import defaultdict, deque

def topological_sort(adj):
    indegree = defaultdict(int)
    for u in adj:
        for v, _ in adj[u]:
            indegree[v] += 1

    # 入度为 0 的节点
    zero_indegree = deque([u for u in adj if indegree[u] == 0])

    order = []
    while zero_indegree:
        u = zero_indegree.popleft()
        order.append(u)
        for v, _ in adj[u]:
            indegree[v] -= 1
            if indegree[v] == 0:
                zero_indegree.append(v)
    return order


def longest_path_in_DAG(source, sink, adj):
    # 拓扑排序
    order = topological_sort(adj)
    
    # 初始化 DP
    dist = defaultdict(lambda: float('-inf'))
    dist[source] = 0
    backtrack = {}

    # 按拓扑顺序动态规划
    for u in order:
        for v, w in adj[u]:
            if dist[v] < dist[u] + w:
                dist[v] = dist[u] + w
                backtrack[v] = u

    # 回溯路径
    path = [sink]
    while path[-1] != source:
        path.append(backtrack[path[-1]])
    path.reverse()

    return dist[sink], path


In [13]:
def read_rosalind_ba5d(filename):
    with open(filename) as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]
    source = int(lines[0])
    sink = int(lines[1])
    adj = defaultdict(list)
    for line in lines[2:]:
        left, right = line.split('->')
        u = int(left)
        v, w = map(int, right.split(':'))
        adj[u].append((v, w))
        if v not in adj:  # 确保每个节点都在图中出现
            adj[v] = []
    return source, sink, adj


def main():
    source, sink, adj = read_rosalind_ba5d('data/rosalind_ba5d.txt')
    length, path = longest_path_in_DAG(source, sink, adj)
    print(length)
    print('->'.join(map(str, path)))


if __name__ == "__main__":
    main()


58
1->9->31


### Find a Highest-Scoring Alignment of Two Strings

https://rosalind.info/problems/ba5e

In [17]:
def read_blosum62(filename="data/BLOSUM62.txt"):
    with open(filename) as f:
        lines = [line.strip() for line in f if line.strip()]
    symbols = lines[0].split()
    blosum = {}
    for line in lines[1:]:
        parts = line.split()
        aa = parts[0]
        scores = list(map(int, parts[1:]))
        for sym, sc in zip(symbols, scores):
            blosum[(aa, sym)] = sc
    return blosum


def global_alignment(v, w, blosum, sigma=5):
    n, m = len(v), len(w)
    s = [[0] * (m + 1) for _ in range(n + 1)]
    backtrack = [[None] * (m + 1) for _ in range(n + 1)]

    # 初始化边界
    for i in range(1, n + 1):
        s[i][0] = -sigma * i
        backtrack[i][0] = "down"
    for j in range(1, m + 1):
        s[0][j] = -sigma * j
        backtrack[0][j] = "right"

    # 填充 DP 表
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            match = s[i - 1][j - 1] + blosum[(v[i - 1], w[j - 1])]
            delete = s[i - 1][j] - sigma
            insert = s[i][j - 1] - sigma
            s[i][j] = max(match, delete, insert)

            if s[i][j] == match:
                backtrack[i][j] = "diag"
            elif s[i][j] == delete:
                backtrack[i][j] = "down"
            else:
                backtrack[i][j] = "right"

    # 回溯得到比对结果
    i, j = n, m
    aligned_v, aligned_w = [], []
    while i > 0 or j > 0:
        if backtrack[i][j] == "diag":
            aligned_v.append(v[i - 1])
            aligned_w.append(w[j - 1])
            i -= 1
            j -= 1
        elif backtrack[i][j] == "down":
            aligned_v.append(v[i - 1])
            aligned_w.append("-")
            i -= 1
        else:  # "right"
            aligned_v.append("-")
            aligned_w.append(w[j - 1])
            j -= 1

    return s[n][m], "".join(reversed(aligned_v)), "".join(reversed(aligned_w))


In [18]:
def main():
    v, w = open("data/rosalind_ba5e.txt").read().splitlines()
    blosum = read_blosum62("data/BLOSUM62.txt")
    score, align_v, align_w = global_alignment(v, w, blosum, sigma=5)
    print(score)
    print(align_v)
    print(align_w)


if __name__ == "__main__":
    main()

10435
EIEKGMTAYTMAQRGDYKTIHKCHNTCGNGPSQSEMHWEEVALTKGVMWQTATESGHKEMRVPGKIRKANRKWMFPCDKWPPIQYYHSACRSSIININKLPQ-V-IWPWGRTAMCMEKDHTVPPAIIHKGCWPTHYHEGWPDAKTQNRMAATIFQNSELSQILTKCAES--SCNCLQQDLRLGFPEAGVIDMHLFGASLQDMFDGMRDH-ASHPTPKHTLKPNCLGVPFRYNFPIFYPGGTSFNCFYHCDMPDAAGLYVTKKYEKTMFKSGVISSLQKSPF-LR-S-GDLNELFKADSYWIVSRPVMNEWSWWWCVTFCPSRIQMFHILFNPPFQFKTDANEWTMVFIETSTKPLMSGDKWHYLCARRRFLQSAFLYKKTMWASAYCTRPYQYKTLAKDLMKARPITYNSVIQNHNRDSTDTIIFPAREIVGWRRDPAFTTVWTNVIFMNIHREIMYFVVILWAACTSFVPHLSINLHPDIRIFSGIKIWPSLPWNEKGSCSKFWNWDVYSHQYN-----SHRE-M------DDYMLNCDHFVHEHYKLTQTDEPCHTPKQTFASGAIWPKPWI--WFNSKATHDKSVSGYKHSADDFKMANCGMHPTQVCGYDDDMKCRWWINTQSGFVID-F----LRPSMYLRMHNAQDDAIYDSRNDINTAHDPHKGYTETYVRMEEMNMNWQELDRCRKTAKLMRRVQCEETWPQCHWTNFKITWDCRQCTKSFDIYHVGKTIFKVYGVIVQKPWSEMFGIEEVSNATTMIWCMT-------YPDKEDSRIDY-H-HMLD--M--GYEWLGMKDNWTQESKRAHQRRFNNGFEFWDQSPMIAWYKDWTFQWG--PEKPEIGQHGIISKG--HKATLVIAAN-RC-LGNC-D--PTAMGFWSYYELYMYGELPFMCNCR-YSNVTSWVRSWLKVFCWYCHNECWYFHMHERAKLA-----YMVTADSHDMKPFNDNEWQTKLSSSPDQIP-WKF-VRRWD

### Find a Highest-Scoring Local Alignment of Two Strings

https://rosalind.info/problems/ba5f

In [19]:
from collections import defaultdict

def read_pam250(filename="data/PAM250.txt"):
    """读取 PAM250 矩阵"""
    with open(filename) as f:
        lines = [line.strip() for line in f if line.strip()]
    symbols = lines[0].split()
    pam = {}
    for line in lines[1:]:
        parts = line.split()
        aa = parts[0]
        scores = list(map(int, parts[1:]))
        for sym, sc in zip(symbols, scores):
            pam[(aa, sym)] = sc
    return pam

def read_sequences(filename="data/rosalind_ba5f.txt"):
    """读取 Rosalind 输入文件"""
    with open(filename) as f:
        lines = [line.strip() for line in f if line.strip()]
    return lines[0], lines[1]

def local_alignment(v, w, pam, sigma=5):
    n, m = len(v), len(w)
    s = [[0]*(m+1) for _ in range(n+1)]
    backtrack = [[0]*(m+1) for _ in range(n+1)]
    max_score, max_pos = 0, (0, 0)

    for i in range(1, n+1):
        for j in range(1, m+1):
            match = s[i-1][j-1] + pam[(v[i-1], w[j-1])]
            delete = s[i-1][j] - sigma
            insert = s[i][j-1] - sigma
            s[i][j] = max(0, match, delete, insert)

            if s[i][j] == match:
                backtrack[i][j] = '↖'
            elif s[i][j] == delete:
                backtrack[i][j] = '↑'
            elif s[i][j] == insert:
                backtrack[i][j] = '←'
            else:
                backtrack[i][j] = '0'

            if s[i][j] > max_score:
                max_score = s[i][j]
                max_pos = (i, j)

    # 回溯得到局部比对
    i, j = max_pos
    v_aln, w_aln = "", ""
    while i > 0 and j > 0 and s[i][j] != 0:
        if backtrack[i][j] == '↖':
            v_aln = v[i-1] + v_aln
            w_aln = w[j-1] + w_aln
            i, j = i-1, j-1
        elif backtrack[i][j] == '↑':
            v_aln = v[i-1] + v_aln
            w_aln = '-' + w_aln
            i -= 1
        elif backtrack[i][j] == '←':
            v_aln = '-' + v_aln
            w_aln = w[j-1] + w_aln
            j -= 1
        else:
            break

    return max_score, v_aln, w_aln

def main():
    v, w = read_sequences("data/rosalind_ba5f.txt")
    pam = read_pam250("data/PAM250.txt")
    score, v_aln, w_aln = local_alignment(v, w, pam, sigma=5)
    print(score)
    print(v_aln)
    print(w_aln)

if __name__ == "__main__":
    main()


3510
LWMGNSFYDTITH--WHPWPFACWTLCVAETT-QKTRRSP-HLKRIFSVVFKYHLE-CD-EEQDGDRQQIGRIHRLR-DVR-WPSDKIHPTMLTAWLIYQRLSAWVNARRIIEVATRDGWKLG-NATFKL-RN-KWIICQGHLETQAPSAVMWTPWLNSPWQRYAARSCSGCWHTEYEHKQDHPQQNQQFVMID-PE--AGEC--AMSA-LECPFIC-WK-NLYLPLHAFDCYVTDEDDNDELEQW-KVK-CNQPSGIVYTNKQRHAGM-TM-YAPGCDKPSRQQNND-MG-HT---HASAI-PGE-IRFAPN-FGSSIALNPCKPEFGAYLTGCYHSSAICSMRPF-GIGSDRENPPSDPHWG---VCNDQHILGWLWLGKYVF-ANLSHNPTAHPHQLPPFGVGVPK-IQITHGGTMEDSHQYDYMFNQRWVVD-QLRYIFIMLFQHYNKCCWRLWMSGAWSNNPALRMNMIWT--PLSGGSADEKREKQRAGIVAYWL-DGMFMFFNFPNQCTQSHRYDKWGPHYNSWE-GIACKHDHF-AGEVTSSTGNQPPNWSWGNLDFVWGD-MYFVMLDMCEGGEIMVCSQHFIEEWNVSFHTCLPKDQHGFLVNQEYTPLPKEEQHLYLAKLQPGCFQRIQSIDTLQFNWLWWGMW-LCVFQWLK--F--N-HRTTNKHKRMAAPVKHKDPRKMKIKNGSFKFPLMEHVMCLSTFDYIEFSWANPEYASSL-C-TKCYNTITIETIKEFAAS-C--VN-NE-QQNNLGTSKCAHLPNNTC--ST-YSHRLHM-TAGISYKRLRHVTE-VYIHLCTVS-CHQWIKPMQWFWGPCAEISGLFPINTMDWFNANCTFVTWVPRYSADDADPDMKIRTNCFHGWDTDNWSPQRKEFQRDYVRGYTSANCYIMPKNIPDVCNNMRSERTDHIPHDRENLNQWVKFYNLPMAFTARLPSKYHG-KFKEMVAHGQHQAM-----L--MQHFMA--N-------