In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
import numpy as np

In [None]:
from IPython.display import Audio, display

In [None]:
# from jupyterthemes import jtplot
# jtplot.style(theme='monokai', context='notebook', ticks=True, grid=False)

In [None]:
A = 329.0525
C = 305.0413
G = 345.0474
U = 306.0253
H2O = 18.0106
M = 14.01
Na = 21.9819
K = 37.9559
CO = 27.9949
adducts = [Na, K, CO]

In [None]:
bases = {'A': A, 'C': C, 'G': G, 'U': U,
        'a': A+M, 'c': C+M, 'g': G+M, 'u': U+M}

def generate_base_sequence(seq):
    seq_base_list = list(seq)
    seq_mass_list = [bases.get(base) for base in seq_base_list]
    df_5p = pd.DataFrame({'Base': seq_base_list, 'Mass': seq_mass_list})
    
    seq_base_list_rev = seq_base_list[::-1]
    seq_mass_list_rev = [bases.get(base) for base in seq_base_list_rev]
    df_3p = pd.DataFrame({'Base': seq_base_list_rev, 'Mass': seq_mass_list_rev})
    
    return df_5p, df_3p

In [None]:
features = ['Mass', 'RT', 'Vol']

In [None]:
def transfer_ThermoFisher_MFE(mfe_file):
    df = pd.read_excel(mfe_file)
    df.rename(columns={'Monoisotopic Mass': 'Mass', 'Sum Intensity': 'Vol', 'Apex RT': 'RT'}, inplace=True)
    df.to_excel(mfe_file)

In [None]:
def plot_zones(df3p, df5p, trend=False, y='RT', figsize=(16, 12)):
    plt.figure(figsize=figsize)
    plt.xlabel('Monoisotopic Mass (Da)', fontname="Arial", fontsize=15, color='black')
    plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
    plt.xticks(fontname="Arial", size=13, color='black')
    plt.yticks(fontname="Arial", size=13, color='black')
    if trend:
        sns.regplot(df3p.Mass, df3p[y])
        sns.regplot(df5p.Mass, df5p[y], order=2)
    else:
        plt.scatter(df3p.Mass, df3p[y])
        plt.scatter(df5p.Mass, df5p[y])
    
    return plt

def plot_zone(df, trend=False, order=1, y='RT', figsize=(16, 12)):
    import matplotlib.pyplot as plt
    plt.figure(figsize=figsize)
    plt.xlabel('Monoisotopic Mass (Da)', fontname="Arial", fontsize=15, color='black')
    plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
    plt.xticks(fontname="Arial", size=13, color='black')
    plt.yticks(fontname="Arial", size=13, color='black')
    if trend:
        sns.regplot(df.Mass, df[y], order=order)
    else:
        plt.scatter(df.Mass, df[y])
    
    return plt

def plot_multi_zones(dfs, y='RT', figsize=(16, 12)):
    plt.figure(figsize=figsize)
    plt.xlabel('Monoisotopic Mass (Da)', fontname="Arial", fontsize=15, color='black')
    plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
    plt.xticks(fontname="Arial", size=13, color='black')
    plt.yticks(fontname="Arial", size=13, color='black')
    for df in dfs:
        plt.scatter(df.Mass, df[y])
    
    return plt

In [None]:
def plotly_zones(df_a, df_b, x='Mass', y='RT', title=None, names=None, light_bg=False):
    dfa = df_a.copy()
    dfb = df_b.copy()
    namea, nameb = None, None
    if names:
        dfa['type'] = names[0]
        dfb['type'] = names[1]
        namea, nameb = names[0], names[1]
    else:
        dfa['type'] = 'ladder_a'
        dfb['type'] = 'ladder_b'
        namea, nameb = 'ladder_a', 'ladder_b'
    df = pd.concat([dfa, dfb])
    color_scheme = {namea: 'lightblue', nameb: 'red'} if light_bg else {}
    fig = px.scatter(df, x=x, y=y, color='type', 
                     color_discrete_map=color_scheme)
    if title:
        fig.update_layout(title=title)
    fig.show()
    
def plotly_zone(df, x='Mass', y='RT', title=None):
    fig = px.scatter(df, x=x, y=y)
    if title:
        fig.update_layout(title=title)
    fig.show()

def plotly_multi_zones(dfs, y='RT', title=None, names=None):
    df_list = list()
    for idx, df in enumerate(dfs):
        dfa = df.copy()
        if names:
            dfa['type'] = names[idx]
        else:
            dfa['type'] = 'ladder_{}'.format(idx+1)
        df_list.append(dfa)
    df = pd.concat(df_list)
    fig = px.scatter(df, x='Mass', y=y, color='type')
    if title:
        fig.update_layout(title=title)
    fig.show()
    

In [None]:
def plot_basecalling(df, mass_pairs, endpoints=pd.DataFrame(), annotate=False, plt=None, y='RT', figsize=(12, 9)):
#     if in_plt:
#         plt = in_plt
#     else:
    if not plt:
        plt = matplotlib.pyplot
    fig = plt.figure(figsize=figsize) #
#     plt.xlabel('Mass (Da)')
#     plt.ylabel('Retention Time (min)')
    plt.xlabel('Monoisotopic Mass (Da)', fontname="Arial", fontsize=15, color='black')
    plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
#     plt.xticks(range(0, 25001, 2500), fontname="Arial", size=13, color='black')
#     plt.yticks(range(0, 25, 2), fontname="Arial", size=13, color='black')
#     plt.xticks(fontname="Arial", size=13, color='black')
#     plt.yticks(fontname="Arial", size=13, color='black')
    
    plt.scatter(df.Mass, df[y], color='C0')
    for idx, row in df.iterrows():
        x_pos = row.Mass
        y_pos = row[y]
        mass = '{:.2f}'.format(x_pos)
#         plt.annotate(text=mass, size=13, xy=(x_pos, y_pos), 
#                      textcoords="offset points", xytext=(10, -20), ha='center')

    for t in mass_pairs:
        df_pair = df[df.Mass.isin(t)]
        if df_pair.empty:
            continue
        plt.plot(df_pair.Mass, df_pair[y], marker='o', color='black')
        
            
        idmax = df_pair.Mass.idxmax()
        x_pos = df_pair.Mass.mean()
        y_pos = df_pair[y].mean()
        plt.annotate(text=t[2], size=15, xy=(x_pos, y_pos), 
                     textcoords="offset points", xytext=(-10, 10), ha='center', color='black')
        
        
        if not annotate:
            continue
            
#         plt.annotate(s=t[2], size=15, xy=(df_pair.loc[idmax].Mass, df_pair.loc[idmax].RT), 
#                      textcoords="offset points", xytext=(-10, 10), ha='center', color='C0')
        
            
#         mass = '{:.2f}'.format(df_pair.loc[idmax].Mass)
#         plt.annotate(s=mass, size=13, xy=(df_pair.loc[idmax].Mass, df_pair.loc[idmax].RT), 
#                      textcoords="offset points", xytext=(10, -20), ha='center')
#         print(mass_pairs, df_pair)
        mass = '{:.2f}'.format(df_pair.iloc[0].Mass)
        plt.annotate(text=mass, size=13, xy=(df_pair.iloc[0].Mass, df_pair.iloc[0][y]), 
                     textcoords="offset points", xytext=(10, -20), ha='center')
        mass = '{:.2f}'.format(df_pair.iloc[1].Mass)
        plt.annotate(text=mass, size=13, xy=(df_pair.iloc[1].Mass, df_pair.iloc[1][y]), 
                     textcoords="offset points", xytext=(10, -20), ha='center')

    if not endpoints.empty:
        plt.scatter(endpoints.Mass, endpoints.RT, color='r')
        print(endpoints[['Mass', 'RT', 'Vol']])
#     plt.show()
    return plt, fig

In [None]:
def plotly_basecalling(df, mass_pairs, annotate=False, endpoints=pd.DataFrame(), 
                       df_ori=pd.DataFrame(), y='RT', mark_vol=False):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.Mass, y=df[y], mode='markers'))
    
    if annotate:
        for idx, row in df.iterrows():
            fig.add_annotation(x=row.Mass, y=row[y], yshift=-10,
                text='{:2f}'.format(row.Mass),
                showarrow=False,
                arrowhead=1)
    
    if mark_vol:
        for idx, row in df.iterrows():
            fig.add_annotation(x=row.Mass, y=row[y], yshift=-10,
                text='{:.2f}'.format(row.Vol),
                showarrow=False,
                arrowhead=1)
        
    if not df_ori.empty:
        fig.add_trace(go.Scatter(x=df_ori.Mass, y=df_ori['y'], mode='markers'))
    
    for t in mass_pairs:
        df_pair = df[df.Mass.isin(t)]
        if df_pair.empty:
            continue
#         fig.add_trace(go.Scatter(x=df_pair.Mass, y=df_pair[y], mode='lines+markers', name=t[2]))
        fig.add_trace(go.Scatter(x=df_pair.Mass, y=df_pair[y], mode='lines+markers', name=t[2], line=go.scatter.Line(color="pink")))
        
        idmax = df_pair['Mass'].idxmax()
        x_pos = df_pair.Mass.mean()
        y_pos = df_pair[y].mean()
#         plt.annotate(text=t[2], size=15, xy=(x_pos, y_pos), 
#                      textcoords="offset points", xytext=(-10, 10), ha='center', color='C0')
#         fig.add_annotation(x=df_pair.loc[idmax].Mass, y=df_pair.loc[idmax][y], xshift=-10,
        fig.add_annotation(x=x_pos, y=y_pos, yshift=5,
            text=t[2],
            showarrow=False,
            arrowhead=1)
        
    if not endpoints.empty:
        fig.add_trace(go.Scatter(x=endpoints.Mass, y=endpoints[y], mode='markers'))
        print(endpoints[['Mass', 'RT', 'Vol']])
        
#     fig.update_layout(
#         width=960*1.2,
#         height=720*1.2,
#         margin=dict(l=0, r=0, t=20, b=0),
#         paper_bgcolor="LightSteelBlue",
#     )
#     fig.update_layout(showlegend=False)
    fig.show()

In [None]:
def thermo_df(df, key_rows_only=True):
    df = df.rename(columns={'Monoisotopic Mass': 'Mass', 'Apex RT': 'RT', 'Sum Intensity': 'Vol',
                           'Relative Abundance': 'RA', 'Fractional Abundance': 'FA'})
    if key_rows_only:
        try:
            vols = ['Mass', 'RT', 'Vol', 'RA', 'FA']
            df = df[vols].dropna()
        except KeyError as err:
            vols = ['Mass', 'RT', 'Vol']
            df = df[vols]
        df = df.astype('float64')
    return df

def load_data(fpath, csv_format=False):
    func = pd.read_csv if csv_format else pd.read_excel
    df = func(fpath)
    df = thermo_df(df)
    return df

def load_excel(fpath, sheet=0):
    df = pd.read_excel(fpath, sheet)
    df = thermo_df(df)
    return df

In [None]:
import os
from itertools import permutations, product, filterfalse
def _home_dir():
    cwd = os.getcwd()
    nb = 'Notebooks'
    loc = cwd.find(nb) + len(nb)
    path = cwd[:loc]
    return path

def _modifications_df():
    homedir = _home_dir()
    csv = os.path.join(homedir, 'statics/bases_methyl.csv')
    df_mod = pd.read_csv(csv)
    df_mod.rename(columns={'Exact Mass': 'Mass'}, inplace=True)
    dfm = df_mod.copy()
    return dfm

def _get_product(m, n):
    def filter_product(i):
        return sum(i) != m

    r = m if m < 20 else int(m // 2)
    l = product(range(r+1), repeat=n-1)
    l = list(filterfalse(filter_product, l))

    max_methyl = m if m < 8 else 8
    max_methyl = 3 if m < 3 else m
    products = list()
    for i in range(max_methyl):
        # allow a base has two methylations
        prod = [item+(i,) for item in l if sum(item[:4])>=i-1]
        products.extend(prod)
    return products

def _get_permutations(m, n):
    """return m**n permutations, where the number of G should be at least 1
    """
    l = list(product(range(m+5), repeat=n))
#     l = [i for i in l if sum(i[:-1]) in range(m-1, m+2) and all([(i[j]<m/3+1) for j in range(n-1)]) and i[-1]<=m/2+1]
    l = [i for i in l if sum(i[:-1]) in range(m-1, m+2) and i[-1]<=m/2+1]
    l = np.array(l)
    return l

def _handle_bases(mass, df_mod):
    """given mass value, find out bases permutations that has sum value near to mass
    return DataFrame, each row contains a permutation of A/C/G/U/D/Methyl and their masses sum
    """
    size = round(mass/320)
    if 200 < mass < 320:
        size = 1
    elif size < 1:
        rounded_count = int(round(mass / df_mod.Mass.iloc[-1]))
        if abs(rounded_count  * df_mod.Mass.iloc[-1] - mass) < 0.2:
            df = pd.DataFrame(columns=df_mod.Name)
#             df.loc[0] = [0, 0, 0, 0, 0, rounded_count]
            df.loc[0, 'Methyl'] = rounded_count
            df['Mass'] = rounded_count * df_mod.Mass.iloc[-1]
            df.fillna(0, inplace=True)
            return df
        return pd.DataFrame()
#     if size > 20:
#         size = size // 2
    perms = _get_product(size, df_mod.shape[0])
    seq_masses = np.matmul(perms, np.array(df_mod.Mass))
    seq_masses_pd = pd.Series(seq_masses)
    res_masses = perms.copy()
    res_masses = pd.DataFrame(res_masses, columns=df_mod.Name)
    res_masses['Mass'] = seq_masses_pd
    return res_masses[res_masses.Mass <= mass+1]

def _calc_bass_perms_and_remainder(mass, df_mod):
    """given mass value
    return DataFrame, each row contains the permutation of A/C/G/U and 
    mass diff between their masses sum and given mass
    """
    seq_masses = _handle_bases(mass, df_mod)
    if seq_masses.empty:
        return pd.DataFrame()

    diff = seq_masses
    diff['MassDiff'] = mass - seq_masses['Mass']
    #valid_diff = diff[diff.Mass > df_mod.Mass.min()]
    valid_diff = diff[(diff.MassDiff>-0.1) & (diff.MassDiff<0.1)]
    return valid_diff

def components(mass):
    mass = abs(mass)
    if mass > 10000: ### !!! We simply CANNOT process such big gaps
        return pd.DataFrame()
    df_mod = _modifications_df()
    df_bpr = _calc_bass_perms_and_remainder(mass, df_mod)
    return df_bpr

In [None]:
def gap_rect(df_ends, mode='all'):
    df_ends = df_ends.astype('float')
    z_zoomin = np.polyfit(df_ends.Mass, df_ends.RT, 1)
    def f_zoomin(x):
        return z_zoomin[0] * x + z_zoomin[1]

    df_zoomin = df_ends.sort_values('Mass')
    if mode == 'left':
        l = df_ends.iloc[0].Mass + 300
        r = df_ends.iloc[1].Mass
    elif mode == 'right':
        l = df_ends.iloc[0].Mass
        r = df_ends.iloc[1].Mass - 300
    else:
        l = df_ends.iloc[0].Mass + 200
        r = df_ends.iloc[1].Mass - 200
    df_zoomin = df_ends.sort_values('RT')
    b = f_zoomin(l-150) 
    t = f_zoomin(r+150)
    if l > 10000:
        b = b - 0.2
        t = t + 0.2
    return l, r, b, t

def all_dots_in_gap(df, df_ends, mode='all'):
    l, r, b, t = gap_rect(df_ends, mode)
    if mode == 'left':
        df_gap = df[(df.Mass > l) & (df.Mass < r) & (df.RT > b)]
    elif mode == 'right':
        df_gap = df[(df.Mass > l) & (df.Mass < r) & (df.RT < t)]
    else:
        df_gap = df[(df.Mass > l) & (df.Mass < r) & (df.RT > b) & (df.RT < t)]
    print(l, r, b, t)
    return df_gap

def standalone_dots_in_gap(df_gap, df_ends, mode='all'):
#     df['Delta'] = df.Mass - df_ends.iloc[1]['Mass']
    idxs = list()
    for idx, row in df_gap.iterrows():
#         print('Processing {}'.format(row.Mass))
        delta_left = abs(row.Mass - df_ends.iloc[0]['Mass'])
        delta_right = abs(row.Mass - df_ends.iloc[1]['Mass'])
#         print(delta_left, delta_right)
        df_res_left = components(delta_left)
        df_res_right = components(delta_right)
#         print(delta_left, df_res_right)
#         if df_res_left.shape[0] > 0:
#             print('left')
#             print(df_res_left)
#         if df_res_right.shape[0] > 0:
#             print('right')
#             print(df_res_right)
        if mode == 'left' and df_res_left.shape[0] > 0:
            idxs.append(idx)
        elif mode == 'right' and df_res_right.shape[0] > 0:
#             print('Mass {} Left Delta {} \n{}\nRight Delta {} \n{}'.format(row.Mass, delta_left, df_res_left, delta_right, df_res_right))
            idxs.append(idx)
        elif mode == 'all' and (df_res_left.shape[0] > 0 and df_res_right.shape[0] > 0):
            idxs.append(idx)
    
#     if not idxs:
#         print('No dots found in the gap.')
#     else:
#         print('Got {} items. {}'.format(len(idxs), df_gap.loc[idxs]))
    return df_gap.loc[idxs]

def standalone_dots(df, df_ends, mode='all'):
    df_res_list = list()
    for i in range(0, df_ends.shape[0], 2):
        df_end = df_ends.iloc[i:i+2]
        df_gap = all_dots_in_gap(df, df_end, mode)
        print('Processing the gap {}-{}, {} dots'.format(df_end.iloc[0]['Mass'], df_end.iloc[1]['Mass'], df_gap.shape[0]))
        df_res = standalone_dots_in_gap(df_gap, df_end, mode)
        if not df_res.empty:
            df_res_list.append(df_res)

    if not df_res_list:
        print('No dots found in these gaps.')
        return pd.DataFrame()
    df_standalones = pd.concat(df_res_list)
    df_standalones.drop_duplicates(inplace=True)
#     print(df_standalones)
    return df_standalones

In [None]:
TAG_5P = 79.9663
TAG_3OH = 18.0106

def dots_in_left_gap(df, mass, orientation=5):
    columns = ['Mass', 'RT']
    df_ends = pd.DataFrame(index=[0, 1], columns=columns, dtype=float)
    left_mass = TAG_5P + TAG_3OH
    if orientation == 3:
        left_mass = TAG_3OH - TAG_5P
    df_ends.loc[0][columns] = [left_mass, 0.0]
    df_ends.loc[1][columns] = df[(df.Mass < mass+0.1) & (df.Mass > mass-0.1)].iloc[0][columns]
    df_gap = standalone_dots(df, df_ends, mode='all')
    return df_gap

def dots_in_right_gap(df, mass, full_mass, orientation=5):
    columns = ['Mass', 'RT']
    df_ends = pd.DataFrame(index=[0, 1], columns=columns, dtype=float)
    df_ends.loc[0][columns] = df[(df.Mass < mass+0.1) & (df.Mass > mass-0.1)].iloc[0][columns]
    df_ends.loc[1][columns] = df[(df.Mass < full_mass+0.1) & (df.Mass > full_mass-0.1)].iloc[0][columns]
    if orientation == 5:
        df_ends.loc[1]['Mass'] += TAG_5P
    elif orientation == 3:
        df_ends.loc[1]['Mass'] -= TAG_5P
    df_gap = standalone_dots(df, df_ends, mode='all')
    return df_gap

def dots_in_mid_gap(df, left_mass, right_mass):
    columns = ['Mass', 'RT']
    df_ends = pd.DataFrame(index=[0, 1], columns=columns, dtype=float)
    df_ends.loc[0][columns] = df[(df.Mass < left_mass+0.1) & (df.Mass > left_mass-0.1)].iloc[0][columns]
    df_ends.loc[1][columns] = df[(df.Mass < right_mass+0.1) & (df.Mass > right_mass-0.1)].iloc[0][columns]
    df_gap = standalone_dots(df, df_ends, mode='all')
    return df_gap

In [None]:
def find_gap_dots(df, df_skeleton, fullmass_dot, orientation=5, major=False, start_at=0, stop_at=0):
    TAG_5P = 79.9663
    TAG_3OH = 18.0106

    THRESHOLD = 10000
    if major:
        THRESHOLD = 3200
    df_skeleton = df_skeleton.sort_values('Mass')

    idxs = list()
    gap_idxs = list()
    for idx, row in df_skeleton.iterrows():
        if start_at > 0 and row.Mass < start_at:
            continue
        if stop_at > 0 and row.Mass > stop_at:
            break
        print('Processing forward {}'.format(row.Mass))
        columns = ['Mass', 'RT', 'Vol']
        df_ends = pd.DataFrame(index=[0, 1], columns=columns)
        if not idxs:
            # handle gap between TAG and the first dot
            pre = TAG_5P + TAG_3OH
            if orientation == 3:
                pre = TAG_3OH - TAG_5P
            df_ends.iloc[0] = [pre, 0.001, 0]
        else:
            pre = df_skeleton.loc[idxs[-1]].Mass
            df_ends.iloc[0] = df_skeleton.loc[idxs[-1]][columns]
        is_last = True if idx == df_skeleton.index[-1] else False
        if not is_last and row.Mass - pre < 600:
            continue
        if row.Mass - pre > THRESHOLD:
            idxs.append(idx)
            continue
        print('components {}-{}'.format(row.Mass, pre))
        res = components(row.Mass - pre)
        if not res.empty:
            # keep this dot, and fill in the gap
            print('keep this dot {}'.format(row.Mass))
            idxs.append(idx)
    #         continue
            df_ends.iloc[1] = row[columns]
    #         print(df_ends, df_ends.info())
            df_ends = df_ends.astype(float)
            df_alones = standalone_dots(df, df_ends, mode='all')
            df_alones = filter_dots_in_mid_gap(df_alones)
            if not df_alones.empty:
                print('extend dots ', df_alones)
                gap_idxs.extend(df_alones.index)
    #         print(df_alones)

        if is_last and row.Mass - pre < THRESHOLD and not fullmass_dot.empty:
            print('====>', row.Mass, pre, THRESHOLD)
            df_ends.iloc[0] = df_skeleton.loc[idxs[-1]][columns]
            df_ends.iloc[1] = fullmass_dot.iloc[0][columns]
            if stop_at > 0 and df_ends.iloc[1].Mass > stop_at:
                break
            if orientation == 5:
                df_ends.iloc[1]['Mass'] += TAG_5P
            elif orientation == 3:
                df_ends.iloc[1]['Mass'] -= TAG_5P
            df_alones = standalone_dots(df, df_ends, mode='all')
            if not df_alones.empty:
                print('extend dots ', df_alones)
                gap_idxs.extend(df_alones.index)

            # append the fullmass dot
            gap_idxs.append(fullmass_dot.index[0])

    # remove duplicated dots from df_gap
    df_gap = df.loc[gap_idxs]
    df_gap_dup = df_gap[df_gap.index.isin(df_skeleton.index)]
    df_gap = df_gap.drop(df_gap_dup.index)
    return df_gap

In [None]:
def soft_connected(mass_diff):
    if mass_diff <= 0:
        return False
    df = components(mass_diff)
    return not df.empty
comp_vec = np.vectorize(soft_connected)

def zip_list(G):
    if not G.nodes:
        return []
    node_count_list = [(node, len(G.edges(node)), G.nodes[node]['Vol']) for node in G.nodes]
    node_count_list.sort(key=lambda x: x[2])
    node_count_list.sort(key=lambda x: x[1])
    return node_count_list

def filter_dots_in_mid_gap(df):
    if df.empty:
        return pd.DataFrame()
    if df.shape[0] == 1:
        return df
    df_x = df.Mass.copy()
    df_y = df.Mass.copy()
    np_x = np.array(df_x)
    np_y = np.array(df_y)
    np_xy = np_x[:, np.newaxis] - np_y
#     np_xy

    np_conns = comp_vec(np_xy)
    df_conns = pd.DataFrame(np_conns)
#     df_conns

    idx_pairs = list(df_conns[df_conns == True].stack().index)
#     print(idx_pairs)
    # df_mid.iloc[idx_pairs[0][0]], df_mid.iloc[idx_pairs[0][1]]
    if not idx_pairs:
        return pd.DataFrame()
    G = nx.Graph()
    for pair in idx_pairs:
        a = df.iloc[pair[0]].Mass
        a_vol = df.iloc[pair[0]].Vol
        b = df.iloc[pair[1]].Mass
        b_vol = df.iloc[pair[1]].Vol
    #     if pair[0] == 1:
    #         continue
        G.add_edge(a, b)
        G.nodes[a]['Vol'] = a_vol
        G.nodes[b]['Vol'] = b_vol

    while True:
        l = zip_list(G)
        if not l:
            break
        max_edges = l[-1][1]
        if l[0][1] >= max_edges:
            break
        G.remove_node(l[0][0])
    l = zip_list(G)
#     print('final: ', l)
    masses = [node[0] for node in l]
    print('reserved masses: ', masses)
    return df[df.Mass.isin(masses)]

In [None]:
def play_beep():
  display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))