In [1]:
import pandas as pd
import matplotlib
import plotly.express as px
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%run ../content/drive/MyDrive/MLC-Seq-main/modules/utils.ipynb
%run ../content/drive/MyDrive/MLC-Seq-main/modules/ladder_separation.ipynb
%run ../content/drive/MyDrive/MLC-Seq-main/modules/homology_search.ipynb
%run ../content/drive/MyDrive/MLC-Seq-main/modules/mass_sum.ipynb
%run ../content/drive/MyDrive/MLC-Seq-main/modules/gap_fill.ipynb
%run ../content/drive/MyDrive/MLC-Seq-main/modules/ladder_complementation.ipynb

In [3]:
# replace ctrl_sample to switch to other control samples.
ctrl_sample = '/content/drive/MyDrive/MLC-Seq-main/samples/total_tRNA/Control1/20230428_total_ctrl_04192306_filtered.xlsx'

df_ctrl = load_data(ctrl_sample)
plotly_zone(df_ctrl)

In [5]:
bcr = homology_search(df_ctrl)
plotly_basecalling(*bcr, y='Vol', title='Homology Search Result')

In [16]:
df = load_data('/content/drive/MyDrive/MLC-Seq-main/samples/20230428_total_30min_04202303.xlsx') # sample 1
df2 = load_data('/content/drive/MyDrive/MLC-Seq-main/samples/20230428_total_0min_04192308.xlsx') # sample 2
#print(df2.head())

          Mass         RT           Vol          RA         FA
0   731.935309  19.899562  1.792981e+09  100.000000  15.318711
3   573.989319   9.599520  1.187675e+09   66.240259  10.147154
6   915.901849  19.899562  1.055336e+09   58.859309   9.016488
9   741.990509   9.599520  6.282201e+08   35.037738   5.367330
12  921.918799  19.899562  4.144928e+08   23.117521   3.541306


In [23]:
"""iloc[:10] takes the first 10 compounds from the list sorted in the descending order of
intensity. Replace 10 with other numbers if more or less compounds are desired.
"""
df_deg_top = df[df.Mass>23000].sort_values('Vol', ascending=False).iloc[:10]
homo_deg = homology_search(df_deg_top)
plotly_basecalling(*homo_deg, y='Vol', title="Homology Search Result of Degraded Sample")
df_homo_deg = homo_deg[0]

In [None]:
# To easily demonstrate the concept, here we just load the data we selected in advance.
df_5p = load_data('./data/phe5p.xlsx')
df_3p = load_data('./data/phe3p.xlsx')

df_common = match_dfs(df_5p, df_3p)
plotly_multi_zones([df_5p, df_3p, df_common],
                   names=["5´ Data", "3´ Data", "Overlapped Data"],
                   title="Manually Divided Data")

In [None]:
sampling_num = 1000

df_5p_top = df_5p.sort_values('Vol', ascending=False).iloc[:sampling_num]
df_3p_top = df_3p.sort_values('Vol', ascending=False).iloc[:sampling_num]

plotly_zones(df_5p_top, df_3p_top,
             names=["5´ Filtered Data", "3´ Filtered Data"],
             title='Filtered Data')

In [None]:
isoform_idx = 0

full_mass = df_homo_deg.sort_values('Vol', ascending=False).iloc[isoform_idx].Mass
df_masssum_3p, df_masssum_5p = mass_sum(df_3p_top, df_5p_top, full_mass=full_mass)
plotly_zones(df_masssum_5p, df_masssum_3p,
             names=["5´ Data", "3´ Data"],
             title='MassSum Result')

In [None]:
%%capture

# GapFill for 5´ ladder
fullmass_dot = df_5p[(df_5p.Mass>full_mass-0.1) & (df_5p.Mass<full_mass+0.1)]
df_gap_5p = gap_fill(df_5p, df_masssum_5p, fullmass_dot, major=True, orientation=5)

# GapFill for 3´ ladder
fullmass_dot = df_3p[(df_3p.Mass>full_mass-0.1) & (df_3p.Mass<full_mass+0.1)]
df_gap_3p = gap_fill(df_3p, df_masssum_3p, fullmass_dot, major=True, orientation=3)

In [None]:
# Now we already have two ladders, just combine their skeleton dots with gap dots.
df_ladder_5p = pd.concat([df_masssum_5p, df_gap_5p]).reset_index()
df_ladder_3p = pd.concat([df_masssum_3p, df_gap_3p]).reset_index()
plotly_zones(df_ladder_5p, df_ladder_3p,
             names=["5´ Data", "3´ Data"],
             title='GapFill Result')

In [None]:
ladder1 = Ladder(df_ladder_5p, full_mass, 5)
ladder2 = Ladder(df_ladder_3p, full_mass, 3)
ladders = [ladder1, ladder2]
df_ladder_comp = ladder_complementation(ladders)
print('Ladder Complementation Result')
df_ladder_comp.to_excel('../outputs/ladder_comp_res_lite.xlsx')
df_ladder_comp.iloc[10:70]