In [None]:
import pandas as pd
import numpy as np

In [None]:
# TAG = 694.2397
MASS_P = 79.96633
MASS_H2O = 18.0106
# MASS_DIFF = TAG + MASS_P - MASS_H2O 

def endpoints(df, tag=694.2397, min_mass=6000, min_vol=5E6):
    mass_diff = tag + MASS_P - MASS_H2O 
    df_top = df[(df.Mass > min_mass) & (df.Vol > min_vol)]
    print("Input shape {} filtered shape {} by min mass {} and min Vol {}".format(df.shape, df_top.shape, min_mass, min_vol))
    
    x = y = np.array(df_top.Mass)
    np_ppm = np.abs((x[:, np.newaxis] - y - mass_diff))
    df_ppm = pd.DataFrame(np_ppm)
    df_ppm = df_ppm[df_ppm < 0.1]
    
    idx_pairs = list(df_ppm[df_ppm.notnull()].stack().index)
    df3_idxs = [pair[0] for pair in idx_pairs]
    df5_idxs = [pair[1] for pair in idx_pairs]
    df3_idxs = list(set(df3_idxs))
    df5_idxs = list(set(df5_idxs))
    print("found 3' ladder ends {} 5' ladder ends {}".format(len(df3_idxs), len(df5_idxs)))
    return df_top.iloc[df3_idxs].sort_values('Mass'), df_top.iloc[df5_idxs].sort_values('Mass')

In [None]:
def get_end5p_points(df5p, sum_value, tag=694.2397):
    end_5p = sum_value - 80 - tag
    masses = df5p.Mass
    print(end_5p)
    BIAS = 0.3
    end5p_idxs = masses[(masses>=end_5p-BIAS) & (masses<=end_5p+BIAS)].index
    print("end5p_idxs {}".format(end5p_idxs))
    
    endpoints = df5p.loc[end5p_idxs]
    endpoints = endpoints.sort_values('Vol', ascending=False)
    return endpoints.iloc[0]

# df5_idxs.extend(list(end5p_idxs))

In [None]:
def computational_data_seperation(df3p, df5p, full_mass, error=0.1, ignore_endpoints=False, has_tag=False, tag=826.3184):
    df3p_mass_np = np.array(df3p['Mass'])
    df5p_mass_np = np.array(df5p['Mass'])
    mass_sum_np = df3p_mass_np[:, np.newaxis] + df5p_mass_np
    mass_sum_1 = np.round(mass_sum_np, 1)
    mass_sum_df = pd.DataFrame(mass_sum_np)
    mass_sum_df1 = pd.DataFrame(mass_sum_1)

    if has_tag:
        sum_value = round(full_mass + tag + 79.9663 - 18.0106, 1)
        print('full_mass {} sum_value {}'.format(full_mass, sum_value))
    else:
        sum_value = round(full_mass + 18.0106, 1)
        print('full_mass {} sum_value {}'.format(full_mass, sum_value))

    tmp = mass_sum_df1[(mass_sum_df1 >= sum_value-error) & (mass_sum_df1 <= sum_value+error)]
    final_idx_pairs = list(tmp[tmp.notnull()].stack().index)
    df3_idxs = [pair[0] for pair in final_idx_pairs]
    df5_idxs = [pair[1] for pair in final_idx_pairs]
    df3_idxs = list(set(df3_idxs))
    df5_idxs = list(set(df5_idxs))
#     print("df3 {} df5 {}".format(len(df3_idxs), len(df5_idxs)))

    df3p_selected = df3p.iloc[df3_idxs]
    df5p_selected = df5p.iloc[df5_idxs]
    
    if not ignore_endpoints:
        df5p_endpoints = get_end5p_points(df5p, full_mass + 18.0106, tag)
        print("df5p_endpoints {}".format(df5p_endpoints[['Mass', 'RT', 'Vol']]))

        df3p_selected.update(df5p_endpoints)
        df5p_selected.update(df5p_endpoints)
        
    return df3p_selected, df5p_selected

In [None]:
def label_unlabel_comparison(df3p, df5p, tag=694.2397):
    diff = tag + 61.9557
    
    df3p_mass_np = np.array(df3p['Mass'])
    df5p_mass_np = np.array(df5p['Mass'])
    mass_diff_np = df3p_mass_np[:, np.newaxis] - df5p_mass_np
    mass_diff_1 = np.round(mass_diff_np, 1)
    mass_diff_df = pd.DataFrame(mass_diff_np)
    mass_diff_df1 = pd.DataFrame(mass_diff_1)

    comparee = round(diff, 1)

    tmp = mass_diff_df1[(mass_diff_df1 >= comparee-0.1) & (mass_diff_df1 <= comparee+0.1)]
    final_idx_pairs = list(tmp[tmp.notnull()].stack().index)
    df3_idxs = [pair[0] for pair in final_idx_pairs]
    df5_idxs = [pair[1] for pair in final_idx_pairs]
    df3_idxs = list(set(df3_idxs))
    df5_idxs = list(set(df5_idxs))
    print("df3_idxs {} df5_idxs {}".format(len(df3_idxs), len(df5_idxs)))

    df3p_selected = df3p.iloc[df3_idxs]
    df5p_selected = df5p.iloc[df5_idxs]
    return df3p_selected, df5p_selected

In [1]:
def process_base_seats(df, mass_pairs, full_mass, col_name='Base', orientation=5):
    seats = int(full_mass // 320)
    df_res = pd.DataFrame()
#     df_res.set_index(range(1, seats+1), inplace=True)
    df_res['position'] = range(1, seats+1)
    df_res[col_name] = ''
    df_res.set_index('position', inplace=True)
    
    for t in mass_pairs:
        df_pair = df[df.Mass.isin(t)]
        if df_pair.empty:
            continue
#         plt.plot(df_pair.Mass, df_pair.RT, 'green')
        
        idx = df_pair['Mass'].idxmax()
        if orientation == 3:
            idx = df_pair['Mass'].idxmin()
        pos = int(df_pair.loc[idx].Mass // 320)
        if orientation == 3:
            pos = seats + 1 - pos
            pos -= 1
        base = df_res.loc[pos, col_name] 
        if not base:
            base = t[2]
        else:
            base = '{}, {}'.format(base, t[2])
        df_res.loc[pos, col_name] = base
    
    return df_res

def process_mass_seats(df, full_mass, col_name='Mass', orientation=5):
    seats = int(full_mass // 320)
    df_res = pd.DataFrame()
    df_res['position'] = range(1, seats+1)
    df_res[col_name] = ''
    df_res.set_index('position', inplace=True)
    
    for idx, row in df.iterrows():
        
        pos = int(row.Mass // 320)
        if orientation == 3:
            pos = seats + 1 - pos
            pos -= 1
        mass = df_res.loc[pos, col_name] 
        if not mass:
            mass = row.Mass
        else:
            mass = '{}, {}'.format(mass, row.Mass)
        df_res.loc[pos, col_name] = mass
    
    return df_res

def process_mass_base_seats(df, mass_pairs, full_mass, orientation=5):
    seats = int(full_mass // 320)
    col_base = 'Base'
    col_mass = 'Mass'
    df_res = pd.DataFrame()
#     df_res.set_index(range(1, seats+1), inplace=True)
    df_res['position'] = range(1, seats+1)
    df_res[col_base] = ''
    df_res[col_mass] = ''
    df_res.set_index('position', inplace=True)
    
    df = df.fillna(0)
    for idx, row in df.iterrows():
        if row.Mass <= 0:
            continue
        pos = int(row.Mass // 320)
        if orientation == 3:
            pos = seats + 1 - pos
#             pos -= 1
        mass = df_res.loc[pos, col_mass] 
        if not mass:
            mass = row.Mass
        else:
            mass = '{}, {}'.format(mass, row.Mass)
        df_res.loc[pos, col_mass] = mass
        
    for t in mass_pairs:
        df_pair = df[df.Mass.isin(t)]
        if df_pair.empty:
            continue
#         plt.plot(df_pair.Mass, df_pair.RT, 'green')
        
        idx = df_pair['Mass'].idxmax()
        if orientation == 3:
            idx = df_pair['Mass'].idxmin()
        pos = int(df_pair.loc[idx].Mass // 320)
        if orientation == 3:
            pos = seats + 1 - pos
            pos -= 1
        base = df_res.loc[pos, col_base] 
        if not base:
            base = t[2]
        else:
            base = '{}, {}'.format(base, t[2])
        df_res.loc[pos, col_base] = base
    
    return df_res