In [8]:
import pandas as pd
from collections import namedtuple

In [9]:
Ladder = namedtuple('Ladder', "dataframe fullmass ori")

In [10]:
def ladder_complementation(ladders):
    """Implementation of the Ladder Complementation algorithm.
    
    :param ladders: a list of tupple, [(df, fullmass, ladder_orientation)]
    :return: a pandas DataFrame contains ladder complementation results.
    
    Now we have several ladders as the input, this method try to align them horizontally 
    and generate sequences based on those ladders/fragments. After we put all the fragments of 
    every ladder into the seats, we're able to analyze those isoforms' sequences as a whole.
    
    Please note that the result may requires further manual filter.
    """
    df_res = pd.DataFrame()
    for idx, ladder in enumerate(ladders):
#         df = ladder[0]
#         fullmass = ladder[1]
#         ori = ladder[2]
        df = ladder.dataframe
        fullmass = ladder.fullmass
        ori = ladder.ori
        bcr = base_calling_random(df)
        df_res = process_single_ladder(*bcr, fullmass, orientation=ori, df_res=df_res, idx=idx)

    return df_res

In [7]:
def process_single_ladder(df, mass_pairs, full_mass, 
                          orientation=5, df_res=pd.DataFrame(), idx=-1):
    """Put each fragment into their own position.
    
    :param df, mass_pairs: ladder information, effectively, they are the results of 
    base_calling_ramdom. please refer to modules/homology_search.ipynb.
    :param full_mass: the intact mass value of the ladder.
    :param orientation: ladder orientation, 3 or 5.
    :param df_res: a pandas DataFrame to store the recurring result for multiple ladders.
    :param idx: the index of this ladder among multiple ladders. Default -1, indicates only 
    one ladder need to be processed.
    
    For each ladder, we disperse the fragments into different seats, based on their value. 
    Each seat represents a position on a full-length tRNA. For example, if the tRNA is 76 nt 
    long, there will be 76 seats for this type of tRNA.
    """
    seats = int(full_mass // 320)
    col_base = 'Base' if idx < 0 else f'Base{idx}'
    col_mass = 'Mass' if idx < 0 else f'Mass{idx}'
    
    if df_res.empty:
        df_res = pd.DataFrame()
        df_res['position'] = range(1, seats+1+1)
        df_res.set_index('position', inplace=True)
    df_res[col_base] = ''
    df_res[col_mass] = ''
    
    df = df.fillna(0)
    for idx, row in df.iterrows():
        if row.Mass <= 0:
            continue
        pos = int(row.Mass // 320)
        if orientation == 3:
            pos = seats + 1 - pos
#             pos -= 1
        mass = df_res.loc[pos, col_mass] 
        if not mass:
            mass = row.Mass
        else:
            mass = '{}, {}'.format(mass, row.Mass)
        df_res.loc[pos, col_mass] = mass
        
    for t in mass_pairs:
        df_pair = df[df.Mass.isin(t)]
        if df_pair.empty:
            continue
        
        idx = df_pair['Mass'].idxmax()
        if orientation == 3:
            idx = df_pair['Mass'].idxmin()
        pos = int(df_pair.loc[idx].Mass // 320)
        if orientation == 3:
            pos = seats + 1 - pos
            pos -= 1
        base = df_res.loc[pos, col_base] 
        if not base:
            base = t[2]
        else:
            base = '{}, {}'.format(base, t[2])
        df_res.loc[pos, col_base] = base
    
    return df_res