In [None]:
from tools import *
from models import *
import plotly.graph_objects as go
import plotly.figure_factory as ff
from Bio.SeqUtils import GC
from Bio import SeqIO
import os
from random import sample
from plotly.subplots import make_subplots
import pickle
from scipy import stats
from collections import Counter

plt.ioff()

import warnings
warnings.filterwarnings('ignore')

In [None]:
#for reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

In [None]:
TFs = []
with open("../data/Analyzed_TFs.txt", "r") as f:
    for line in f:
        TFs.append(line.strip())

In [None]:
#RECORDING THE PERFORMANCE
results = {}

new_model = {}
new_model_TL = {}
new_model_no_TL = {}
new_model_TL_freezed = {}

for i in range(1,11):
    
    pkl_file = open("../RESULTS_50_SORTED/iterat_TL_"+str(i)+"/mccoef_old.pkl", 'rb')
    mccoef_new_model = pickle.load(pkl_file)
    pkl_file.close()
    
    pkl_file = open("../RESULTS_50_SORTED/iterat_TL_"+str(i)+"/mccoef.pkl", 'rb')
    mccoef_new_model_TL = pickle.load(pkl_file)
    pkl_file.close()
    
    pkl_file = open("../RESULTS_50_SORTED/iterat_noTL_"+str(i)+"/mccoef.pkl", 'rb')
    mccoef_new_model_no_TL = pickle.load(pkl_file)
    pkl_file.close()
    
    pkl_file = open("../RESULTS_50_SORTED_BN_FR_LAYERS/iterat_TL_"+str(i)+"/mccoef.pkl",
                    'rb')
    mccoef_new_model_TL_freezed = pickle.load(pkl_file)
    pkl_file.close()
    
    for TF in TFs:
        
        ##################################################################    
        if TF not in new_model.keys() and TF in mccoef_new_model.keys():
            new_model[TF] = []
            new_model[TF].append(mccoef_new_model[TF])
        elif TF in mccoef_new_model.keys():
            new_model[TF].append(mccoef_new_model[TF])
        ##################################################################
            
            
        ##################################################################
        if TF not in new_model_TL.keys() and TF in mccoef_new_model_TL.keys():
            new_model_TL[TF] = []
            new_model_TL[TF].append(mccoef_new_model_TL[TF])
        elif TF in mccoef_new_model_TL.keys():
            new_model_TL[TF].append(mccoef_new_model_TL[TF])
        ##################################################################
            
        ##################################################################
        if TF not in new_model_no_TL.keys() and TF in mccoef_new_model_no_TL.keys():
            new_model_no_TL[TF] = []
            new_model_no_TL[TF].append(mccoef_new_model_no_TL[TF])
        elif TF in mccoef_new_model_no_TL.keys():
            new_model_no_TL[TF].append(mccoef_new_model_no_TL[TF])
        ##################################################################
        
        ##################################################################
        if TF not in new_model_TL_freezed.keys() and TF in mccoef_new_model_TL_freezed.keys():
            new_model_TL_freezed[TF] = []
            new_model_TL_freezed[TF].append(mccoef_new_model_TL_freezed[TF])
        elif TF in new_model_TL_freezed.keys():
            new_model_TL_freezed[TF].append(mccoef_new_model_TL_freezed[TF])
        ##################################################################

In [None]:
new_model = pd.Series(new_model) #49 - no ARNT, because it didn't have enough data
new_model_TL = pd.Series(new_model_TL) #148
new_model_no_TL = pd.Series(new_model_no_TL) #148
new_model_TL_freezed = pd.Series(new_model_TL_freezed)

In [None]:
new_model_TL_mean = new_model_TL.apply(lambda x: np.mean(x))
new_model_no_TL_mean = new_model_no_TL.apply(lambda x: np.mean(x))
new_model_TL_freezed_mean = new_model_TL_freezed.apply(lambda x: np.mean(x))

TL_diff_multi = new_model_TL_mean[new_model.index].subtract(new_model_no_TL_mean[new_model.index])

TL_fr_diff_multi = new_model_TL_freezed_mean[new_model.index].subtract(new_model_no_TL_mean[new_model.index])

In [None]:
TL_diff_not_multi = new_model_TL_mean[~np.isin(new_model_TL_mean.index,
                          new_model.index)].subtract(new_model_no_TL_mean[~np.isin(new_model_no_TL_mean.index,
                                                                                   new_model.index)])

TL_fr_diff_not_multi = new_model_TL_freezed_mean[~np.isin(new_model_TL_freezed_mean.index,
                          new_model.index)].subtract(new_model_no_TL_mean[~np.isin(new_model_no_TL_mean.index,
                                                                                   new_model.index)])

In [None]:
labels = [["in_multi"]*49, ["not_in_multi"]*99]
labels = [item for sublist in labels for item in sublist]

In [None]:
fig = go.Figure()

fig.add_trace(go.Box(
    y=list(TL_diff_multi.values)+list(TL_diff_not_multi.values),
    x=labels,
    name='Original_TL_vs_noTL',
    marker_color='#3D9970'
))
fig.add_trace(go.Box(
    y=list(TL_fr_diff_multi.values)+list(TL_fr_diff_not_multi.values),
    x=labels,
    name='Freezed_TL_vs_noTL',
    marker_color='#FF4136'
))

#fig['layout'].update(shapes=[{'type': 'line','y0':0,
#                              'y1': 0,'x0':"in_multi", 
#                              'x1':"not_in_multi",'xref':'x1','yref':'y1',
#                              'line': {'color': 'black','width': 2.5}}])

fig.update_layout(title='TL minus noTL',
                 yaxis_title='Change in performance',
                 plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)',
                 boxmode='group')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black')
fig.update_yaxes(showline=True, linewidth=2, linecolor='black')

fig.show()

In [None]:
stats.ttest_ind(TL_diff_multi.values, TL_fr_diff_multi.values, equal_var = False)

In [None]:
stats.ttest_ind(TL_diff_not_multi.values, TL_fr_diff_not_multi.values, equal_var = False)

In [None]:
tfs_labels = [[tf]*10 for tf in new_model_TL.index]
tfs_labels = [item for sublist in tfs_labels for item in sublist]

In [None]:
new_model_TL_tfs = []
for tf in new_model_TL.index:
    new_model_TL_tfs = new_model_TL_tfs + new_model_TL[tf]
    
new_model_no_TL_tfs = []
for tf in new_model_TL.index:
    new_model_no_TL_tfs = new_model_no_TL_tfs + new_model_no_TL[tf]
    
new_model_TL_fr_tfs = []
for tf in new_model_TL.index:
    new_model_TL_fr_tfs = new_model_TL_fr_tfs + new_model_TL_freezed[tf]   

In [None]:
fig = go.Figure()

fig.add_trace(go.Box(
    y=new_model_TL_tfs[120*10:],
    x=tfs_labels[120*10:],
    name='Original_TL',
    marker_color='red',
    showlegend=True
))

fig.add_trace(go.Box(
    y=new_model_TL_fr_tfs[120*10:],
    x=tfs_labels[120*10:],
    name='Freezed_TL',
    marker_color='goldenrod',
    showlegend=True
))

fig.add_trace(go.Box(
    y=new_model_no_TL_tfs[120*10:],
    x=tfs_labels[120*10:],
    name='No_TL',
    marker_color='green',
    showlegend=True
))

layout = go.Layout(
   title = "",
   xaxis = dict(
      title = '',
      titlefont = dict(
         family = 'Courier New, monospace',
         size = 18,
         color = 'black'
      )     
   ),
   yaxis = dict(
      title = 'Mcor value',
      titlefont = dict(
         family = 'Courier New, monospace',
         size = 18,
         color = 'black'
      )
   )
)

#fig.update_yaxes(range=[0, 1], title= 'Mcor value', secondary_y=False)
fig.update_yaxes(range=[0, 1])
fig.update_layout(title='',
                 plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)',
                 font=dict(
                     family="Courier New, monospace",
                     size=14,
                     color="black"
                 ), boxmode='group')
fig.update_layout(layout)

fig.update_layout(legend=dict(x=1.1, y=1))
fig.update_layout(autosize=False,width=1000,height=500)

fig.update_xaxes(showline=True, linewidth=2, linecolor='black')
fig.update_yaxes(showline=True, linewidth=2, linecolor='black')

fig.show()