In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
%run ../modules/utils.ipynb
%run ../modules/cds.ipynb
%run ../modules/preprocessing.ipynb

In [None]:
def local_plot_zone(df, trend=False, order=1, y='RT'):
    fig = plt.figure(figsize=(8.5, 4))
    plt.xlabel('Monoisotopic Mass (Da)', fontname="Arial", fontsize=15, color='black')
    if y == 'RT':
        plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
    else:
        plt.ylabel('Intensity', fontname="Arial", fontsize=15, color='black')
    plt.xticks(fontname="Arial", size=13, color='black')
    plt.yticks(fontname="Arial", size=13, color='black')
    if trend:
        sns.regplot(df.Mass, df[y], order=order)
    else:
        plt.scatter(df.Mass, df[y])
    
    return plt, fig

def local_plot_zones(df3p, df5p, trend=False, y='RT'):
    fig = plt.figure(figsize=(8.5, 4))
    plt.xlabel('Monoisotopic Mass (Da)', fontname="Arial", fontsize=15, color='black')
    plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
    plt.xticks(fontname="Arial", size=13, color='black')
    plt.yticks(fontname="Arial", size=13, color='black')
    if trend:
        sns.regplot(df3p.Mass, df3p[y])
        sns.regplot(df5p.Mass, df5p[y], order=2)
    else:
        plt.scatter(df3p.Mass, df3p[y])
        plt.scatter(df5p.Mass, df5p[y])
    
    return plt, fig

In [None]:
def output(df_ds, plt, fig, idx, svg_name):
    fig.tight_layout()
    plt.savefig(f'/Users/bryan/Documents/BioPharmaFinder/ZhangLab/MassSum/GraphsOutput/Fig{idx}_{svg_name}.svg', dpi=300, transparent=True)
    df_ds.to_excel(f'/Users/bryan/Documents/BioPharmaFinder/ZhangLab/MassSum/GraphsOutput/S{idx}_{svg_name}.xlsx')

In [None]:
df_phe = pd.read_excel('/Users/bryan/Documents/BioPharmaFinder/ZhangLab/Data/Excels/tRNA_Phe_0724.xlsx')
# df_phe = pd.read_excel('/Users/bryan/Documents/BioPharmaFinder/BaoLab/FirstBatch/Excels/S2to5combined_20ul_40min.xlsx')
df_phe = thermo_df(df_phe)
plotly_zone(df_phe)
df_phe.shape

In [None]:
df_phe_25k = df_phe[(df_phe.Mass > 24000) & (df_phe.Mass < 26000) & (df_phe.Vol > 1E5)]
plt, fig = local_plot_zone(df_phe_25k, y='Vol')
# Fig 1
# fig.tight_layout()
# plt.savefig('/Users/bryan/Documents/BioPharmaFinder/ZhangLab/MassSum/GraphsOutput/Fig1_intact_mass.svg', dpi=300, transparent=True)
# df_phe_25k.to_excel('/Users/bryan/Documents/BioPharmaFinder/ZhangLab/MassSum/GraphsOutput/intact_mass.xlsx')
output(df_phe_25k, plt, fig, 1, 'intact_mass')

In [None]:
plotly_zone(df_phe_25k, y='Vol')

In [None]:
df_5p = pd.read_excel('/Users/bryan/Documents/BioPharmaFinder/ZhangLab/MassSum/GraphsOutput/phe5p.xlsx')
df_3p = pd.read_excel('/Users/bryan/Documents/BioPharmaFinder/ZhangLab/MassSum/GraphsOutput/phe3p.xlsx')
df_5p.shape, df_3p.shape

In [None]:
df_5p = df_5p[df_5p.Vol > 1E5]
df_3p = df_3p[df_3p.Vol > 1E5]
df_5p.shape, df_3p.shape

In [None]:
full_mass = 24252.31
df_3p_24252, df_5p_24252 = computational_data_seperation(df_3p, df_5p, full_mass, ignore_endpoints=True)
plot_zones(df_5p_24252, df_3p_24252)
df_24252 = pd.concat([df_3p_24252, df_5p_24252]).drop_duplicates()
df_24252.shape

In [None]:
full_mass = 24581.374101
df_3p_24581, df_5p_24581 = computational_data_seperation(df_3p, df_5p, full_mass, ignore_endpoints=True)
plot_zones(df_5p_24581, df_3p_24581)
df_24581 = pd.concat([df_3p_24581, df_5p_24581]).drop_duplicates()
df_24581.shape

In [None]:
plt, fig = local_plot_zones(df_5p_24581, df_3p_24581)
# local_plot_zones(df_5p_24581, df_3p_24581)
# Fig 2 MassSum
df_a = df_5p_24581.copy()
df_b = df_3p_24581.copy()
df_a['is5p'] = True
df_b['is5p'] = False
df_merge = pd.concat([df_a, df_b])
output(df_merge, plt, fig, 2, 'mass_sum')

In [None]:
plotly_zone(df_3p_24252)

In [None]:
full_mass = 24252.32
df_skeleton = df_skeleton_3p_24252_core #df_3p_24252 #df_5p_24252 #df_5p_24597
group = 'Group_24581'

fullmass_dot = df_phe[(df_phe.Mass>full_mass-0.1) & (df_phe.Mass<full_mass+0.1)]
df_gap_dots = find_gap_dots(df_3p, df_skeleton, fullmass_dot, orientation=3)
df_gap_dots.shape

In [None]:
%run ../modules/utils.ipynb
df_tmp = df_5p.loc[[699, 915, 932]]
filter_dots_in_mid_gap(df_tmp)

In [None]:
plotly_zone(df_tmp)

In [None]:
plotly_zones(df_skeleton, df_gap_dots)

In [None]:
%run ../modules/utils.ipynb
dots_in_mid_gap(df_3p, 11855.6515, 12702.7379)
# dots_in_right_gap(df_3p, 22197.1107, full_mass=24252.31, orientation=3)
# dots_in_left_gap(df_3p, 1856.32, orientation=3)

In [None]:
df_tmp = df_3p.loc[[1802, 1816, 1818]]
filter_dots_in_mid_gap(df_tmp)

In [None]:
components(12702.7379 - 12373.685147)

In [None]:
# df_tmp = df_5p[(df_5p.Mass > 1767.213) & (df_5p.Mass < 2685.288)]
df_tmp = df_3p[(df_3p.Mass > 11855.6515) & (df_3p.Mass < 12702.7379)]
plotly_zone(df_tmp)
df_tmp.shape

In [None]:
idxs = list()

def on_click(trace, points, selector):
#     print('points {}'.format(points.point_inds) )
    idxs.extend(points.point_inds)

df_sample = df_5p_24252
f = zone_selection(df_sample, on_click=on_click)
f

In [None]:
df_chosen = df_sample.iloc[idxs]
# df_skeleton_3p_24581_core = df_sample.drop(df_chosen.index)
# df_skeleton_3p_24581.shape, df_skeleton_24581_core.shape

df_skeleton_3p_24252_core = df_sample.drop(df_chosen.index)
df_3p_24252.shape, df_skeleton_3p_24252_core.shape

In [None]:
df_chosen = df_sample.iloc[idxs]
# df_skeleton_3p_24581_core = df_sample.drop(df_chosen.index)
# df_skeleton_3p_24581.shape, df_skeleton_24581_core.shape

df_skeleton_5p_24252_core = df_sample.drop(df_chosen.index)
df_5p_24252.shape, df_skeleton_5p_24252_core.shape

In [None]:
full_mass = 24581.3741
df_skeleton_24581 = df_skeleton_3p_24581_core #df_5p_24252 #df_5p_24597
group = 'Group_24581'
orientation = 3

fullmass_dot_24581 = df_phe[(df_phe.Mass>full_mass-0.1) & (df_phe.Mass<full_mass+0.1)]
df_gap_dots_24581 = find_gap_dots(df_3p, df_skeleton_24581, fullmass_dot_24581, orientation=orientation)
df_gap_dots_24581.shape

In [None]:
plotly_zones(df_skeleton_24581, df_gap_dots_24581)

In [None]:
# components(8641.175070-8268.082)
components(8946.1978-8268.082)

In [None]:
plt, fig = local_plot_zones(df_skeleton, df_gap_dots)

df_a = df_skeleton.copy()
df_b = df_gap_dots.copy()
df_a['isSkeleton'] = True
df_b['isSkeleton'] = False
df_merge = pd.concat([df_a, df_b])
output(df_merge, plt, fig, 3, 'gap_fill')

In [None]:
plt, fig = local_plot_zones(df_skeleton_24581, df_gap_dots_24581)

df_a = df_skeleton_24581.copy()
df_b = df_gap_dots_24581.copy()
df_a['isSkeleton'] = True
df_b['isSkeleton'] = False
df_merge = pd.concat([df_a, df_b])
output(df_merge, plt, fig, 3, 'gap_fill')

In [None]:
df_merge = pd.concat([df_skeleton, df_gap_dots]).drop_duplicates()

bcr = base_calling_random(df_merge, silence=True)
full_mass = 24252.32
df_seats = process_mass_base_seats(df_merge, bcr[1], full_mass=full_mass, orientation=orientation)

In [None]:
df_merge = pd.concat([df_skeleton_24581, df_gap_dots_24581]).drop_duplicates()

bcr = base_calling_random(df_merge, silence=True)
full_mass = 24581.3741
df_seats = process_mass_base_seats(df_merge, bcr[1], full_mass=full_mass, orientation=orientation)

In [None]:
import matplotlib.ticker as ticker

fig = plt.figure(figsize=(17, 4))
ax = plt.subplot()
plt.xlabel('Mass (Da)', fontname="Arial", fontsize=15, color='black')
plt.ylabel('Retention Time (min)', fontname="Arial", fontsize=15, color='black')
# plt.xticks(range(0, 25001, 2500), fontname="Arial", size=13, color='black')
# plt.xticks(range(0, int(df_merge.Mass.max()), 320), fontname="Arial", size=13, color='black')
mass_max = int(df_merge.Mass.max()) + 1
major_loc = ticker.FixedLocator(range(0, mass_max, 2500))
minor_loc = ticker.FixedLocator(range(0, mass_max, 320))
ax.xaxis.set_major_locator(major_loc)
ax.xaxis.set_minor_locator(minor_loc)
ax.tick_params(which='major', colors='black')
ax.tick_params(which='minor', colors='black')
plt.yticks(range(0, 17, 2), fontname="Arial", size=13, color='black')
# plt.grid(True, which='minor', axis='y')
# plt.scatter(df_3p_24252.Mass, df_3p_24252.RT, color='red', marker='s')
plt.scatter(df_merge.Mass, df_merge.RT, color='royalblue')
# plt.xlim(320, 25000)
plt.grid(True, axis='x', linestyle='--', which='minor')
# plt.grid(b=True,which='both')
# plt.scatter(df_skeleton.Mass, df_skeleton.RT, color='royalblue')
# plt.scatter(df_phe_big.iloc[1].Mass, df_phe_big.iloc[1].RT, color='royalblue')
# fig.tight_layout()
# plt.savefig('/Users/bryan/Downloads/tRNA_phe_ladders_sep28.svg', transparent=True, dpi=300)
output(df_seats, plt, fig, 4, 'seats')