**<center><h1>T-SNE Notebook</h1></center>**

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import FenicsATL as FATL
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.manifold import TSNE

from multiprocessing import Process


# **I. DATA INITIALIZATION**

I.2 FENICS 02

In [3]:
irrad = [106,107,108,109,110,116,117,118,119,120,126,127,128,129,130,136,137,138,139,140,1021,1022,1023,1024,1025,101,102,103,104,105,111,112,114,115,121,122,123,124,125,131,132,133,134,135]
LTT_id = [29,30,31,32,33,34,35,36,37,38,39,40,1041,1042,1043,1044,1045,1046,1047]
All = irrad + LTT_id

In [4]:
FENICS2 = FATL.FenicsVers_filter(FATL.FenicsData_read('/AtlasDisk/home2/sisaid/data/FENICS2_data.json'),irrad)

FastFeaturesToDrop = [("NoiseHG","Mean"),("NoiseHG","Mu"),
                      ("NoiseHGIG1","Mean"),("NoiseHGIG1","Mu"),
                      ("NoiseHGIG2","Mean"),("NoiseHGIG2","Mu"),
                      ("NoiseHGIG6","Mean"),("NoiseHGIG6","Mu"),
                      ("NoiseLG","Mean"),("NoiseLG","Mu"),
                      ("NoiseLGIG1","Mean"),("NoiseLGIG1","Mu"),
                      ("NoiseLGIG2","Mean"),("NoiseLGIG2","Mu"),
                      ("NoiseLGIG6","Mean"),("NoiseLGIG6","Mu")]

# FENICS2 = FENICS2[FENICS2.Board.burnTime.notna()]

# temp_allindex = []
# for id in FENICS2.Board.id.unique() :
#     temp_index = FENICS2[FENICS2.Board.id == id][FENICS2.Board.channel == FENICS2[FENICS2.Board.id == id].Board.channel.value_counts().index[0]].index.tolist()
#     temp_allindex = temp_allindex + temp_index
    
# FENICS2 = FENICS2.loc[temp_allindex]
FENICS2 = FENICS2[(FENICS2.Board.StatusFast != -1) | (FENICS2.Board.StatusSlow != -1)]

Fast2 = FATL.FenicsTestTable(FENICS2,'fast')
Slow2 = FATL.FenicsTestTable(FENICS2,'slow')


# Slow2 = FATL.Normalizer(Slow2)
# Fast2 = FATL.Normalizer(Fast2)

for feature in FastFeaturesToDrop:
    Fast2 = Fast2.drop(feature, axis=1) 

slow2_index = Slow2.index

Slow2.dropna(subset=[('Gain0','Linearity','SlopeLin'),('Gain1','Linearity','SlopeLin'),('Gain3','Linearity','SlopeLin'),('Gain4','Linearity','SlopeLin'),('Gain5','Linearity','SlopeLin'),
                     ('Gain0','Linearity','ConstLin'),('Gain1','Linearity','ConstLin'),('Gain3','Linearity','ConstLin'),('Gain4','Linearity','ConstLin'),('Gain5','Linearity','ConstLin'),], inplace=True)
Slow2.dropna(inplace=True,axis=1)
Fast2.drop(index=slow2_index.difference(Slow2.index), inplace=True)
FENICS2.drop(index=slow2_index.difference(Slow2.index), inplace=True)



FastSlow2 = pd.concat([Slow2, Fast2], axis=1)

In [5]:
FEN2_batch01_ids = [29,30,31,32,33,34,35,36,37,38,39,40]
FEN2_batch02_ids = [1041,1042,1043,1044,1045,1046,1047]
NIEL_ids = [101,102,103,104,105,111,112,114,115,121,122,123,124,125,131,132,133,134,135]
TID_ids = [106,107,108,109,110,116,117,118,119,120,126,127,128,129,130,136,137,138,139,140,1021,1022,1023,1024,1025]

AllFen2 = list(set(FENICS2.Board.id[(FENICS2.Board.StatusFast==0 ) | (FENICS2.Board.StatusSlow==0)])
               .union(
                set(FEN2_batch01_ids),
                set(FEN2_batch02_ids),
                set(NIEL_ids),
                set(TID_ids))
                )

AllFen2LTT = list(set(FEN2_batch01_ids).union(set(FEN2_batch02_ids)))


In [6]:
main_dir = '/users/divers/atlas/sisaid/home2/'

# **III. FENICS 2**

III.1 FENICS 02 : FAST DATA

In [7]:
tuples = [('Fast', 'NonNormed'),('Fast', 'Normed'),('Slow', 'NonNormed'),('Slow', 'Normed'),('FastSlow', 'NonNormed'),('FastSlow', 'Normed')]
mux = pd.MultiIndex.from_tuples(tuples)
Labels2 = pd.DataFrame(index=FENICS2.index,columns=mux)

In [8]:
CanalStatus = {
    'StatusSlow' : FENICS2.Board.StatusSlow,
    'StatusFast' :  FENICS2.Board.StatusFast
}

Normalization = {
    'NonNormed' : False,
    'Normed' : True,
                 }


for norm in Normalization.keys() :
    tsne_param = TSNE()
    tsne_param.fit(Fast2)

    tsne_bokeh = FATL.FenBokehTSNE(Fast2,normalize=Normalization[norm],perplexity=tsne_param.perplexity,n_iter=tsne_param.n_iter,learning_rate=tsne_param.learning_rate)

    OutputPath = main_dir+'fen2_output/t-sne/fast/HTML/'

    FATL.checkFolderAt(OutputPath)

    custom_filter = [('Combo',dict(labels_filter=irrad,
                                radio_categories=[dict(name = 'All',indexes=irrad),
                                                    dict(name = 'TID',indexes=TID_ids),
                                                    dict(name = 'NIEL',indexes=NIEL_ids),
                                                    ]))]

    tsne= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,FENICS2.Board.version,FENICS2.Board.burnTime,FENICS2.Board.StatusFast,FENICS2.Board.StatusSlow,
                            colors=FENICS2.Board.burnTime ,labels=FENICS2.Board.id,
                            tooltips=[("card","FENICS0@version @labels"),("burnTime","@burnTime"),
                                        ("StatusFast","@StatusFast"),("StatusSlow","@StatusSlow")])

    tsne.plotter(plotType='histogram',filters=custom_filter,
                title=f't-SNE : FENICS02 FAST; normed {tsne_bokeh.normed}; t-SNE params : perplexity={tsne_param.perplexity},n_iter={tsne_param.n_iter},learning_rate={tsne_param.learning_rate}')

    # tsne.save_as_html(path=OutputPath,filename=f'FEN2_FAST_t-SNE_perplexity_{tsne_param.perplexity}_wburn_normed_{tsne_bokeh.normed}.html')

    epsilon = 5
    min_samples = 2
    dbscan = DBSCAN(eps=epsilon, min_samples=min_samples)
    dbscan.fit(tsne_bokeh.embedding)

    Labels2[('Fast',norm)]= dbscan.labels_   

    Cluster_Filtred= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,
                                        colors=Labels2[('Fast',norm)] ,labels=FENICS2.Board.id)

    Cluster_Filtred.plotter(plotType='histogram',filters=custom_filter,title='t-SNE : FENICS02 FAST (DBSCAN)')


    for status in CanalStatus.keys() :
        Cluster_Filtred= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,FENICS2.Board.version,FENICS2.Board.burnTime,FENICS2.Board.StatusFast,FENICS2.Board.StatusSlow,
                                            colors=CanalStatus[status] ,labels=FENICS2.Board.id,
                                            tooltips=[("card","FENICS0@version @labels"),("burnTime","@burnTime"),
                                                        ("StatusFast","@StatusFast"),("StatusSlow","@StatusSlow")])

        Cluster_Filtred.plotter(plotType='histogram',filters=custom_filter,
                                title=f't-SNE : FENICS02 FAST (Fail : {status}); normed {tsne_bokeh.normed}; t-SNE params : perplexity={tsne_param.perplexity}')
        
        # Cluster_Filtred.save_as_html(path=OutputPath,filename=f'FEN2_FAST_t-SNE_perplexity_{tsne_param.perplexity}_wSStatusSlow_normed_{tsne_bokeh.normed}.html')


III.2 FENICS 02 : SLOW DATA

In [9]:
Slow2_filtred = pd.DataFrame()
Slow2_filtred = Slow2
removed_features_lienarity = ['DACforPed']
Slow2_filtred = Slow2_filtred.drop([('Gain0',   'NoisePhys'),('Gain1',  'NoisePhys'),
                                    ('Gain2',   'NoisePhys'),('Gain3',   'NoisePhys'),
                                    ('Gain4',   'NoisePhys'),('Gain5',  'NoisePhys')], axis=1) 

Slow2_filtred = Slow2_filtred.drop([('Gain0',   'NoisePosPed'),('Gain1',    'NoisePosPed'),
                                    ('Gain2',   'NoisePosPed'),('Gain3',   'NoisePosPed'),
                                    ('Gain4',   'NoisePosPed'),('Gain5',  'NoisePosPed')], axis=1) 

for feature in removed_features_lienarity :
    Slow2_filtred = Slow2_filtred.drop([('Gain0',   'Linearity',   feature),('Gain1',   'Linearity',    feature),
                                        ('Gain2',   'Linearity',    feature),('Gain3',   'Linearity',    feature),
                                        ('Gain4',   'Linearity',    feature),('Gain5',   'Linearity',    feature)], axis=1)

In [10]:
tsne_param = TSNE()
tsne_param.fit(Slow2_filtred)

for norm in Normalization.keys() :
    tsne_bokeh = FATL.FenBokehTSNE(Slow2_filtred,normalize=Normalization[norm],perplexity=tsne_param.perplexity,n_iter=tsne_param.n_iter,learning_rate=tsne_param.learning_rate)

    OutputPath = main_dir+'fen2_output/t-sne/slow/HTML/'

    FATL.checkFolderAt(OutputPath)

    tsne= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,FENICS2.Board.version,FENICS2.Board.burnTime,FENICS2.Board.StatusFast,FENICS2.Board.StatusSlow,
                            colors=FENICS2.Board.burnTime ,labels=FENICS2.Board.id,
                            tooltips=[("card","FENICS0@version @labels"),("burnTime","@burnTime"),
                                        ("StatusFast","@StatusFast"),("StatusSlow","@StatusSlow")])

    tsne.plotter(plotType='histogram',filters=custom_filter,
                title=f't-SNE : FENICS02 SLOW; normed {tsne_bokeh.normed}; t-SNE params : perplexity={tsne_param.perplexity},n_iter={tsne_param.n_iter},learning_rate={tsne_param.learning_rate}')

    # tsne.save_as_html(path=OutputPath,filename=f'FEN2_SLOW_t-SNE_perplexity_{tsne_param.perplexity}_wburn_normed_{tsne_bokeh.normed}.html')

    epsilon = 5
    min_samples = 2
    dbscan = DBSCAN(eps=epsilon, min_samples=min_samples)
    dbscan.fit(tsne_bokeh.embedding)

    Labels2[('Slow',norm)]= dbscan.labels_   

    Cluster_Filtred= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,
                                        colors=Labels2[('Slow',norm)] ,labels=FENICS2.Board.id)

    Cluster_Filtred.plotter(plotType='histogram',filters=custom_filter,title='t-SNE : FENICS02 SLOW (DBSCAN)')
    
    
    
    for status in CanalStatus.keys() :
        Cluster_Filtred= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,FENICS2.Board.version,FENICS2.Board.burnTime,FENICS2.Board.StatusFast,FENICS2.Board.StatusSlow,
                                            colors=CanalStatus[status] ,labels=FENICS2.Board.id,
                                            tooltips=[("card","FENICS0@version @labels"),("burnTime","@burnTime"),
                                                        ("StatusFast","@StatusFast"),("StatusSlow","@StatusSlow")])

        Cluster_Filtred.plotter(plotType='histogram',filters=custom_filter,
                                title=f't-SNE : FENICS02 SLOW (Fail : {status}); normed {tsne_bokeh.normed}; t-SNE params : perplexity={tsne_param.perplexity}')

        # Cluster_Filtred.save_as_html(path=OutputPath,filename=f'FEN2_SLOW_t-SNE_perplexity_{tsne_param.perplexity}_wStatusSlow_normed_{tsne_bokeh.normed}.html')
        

III.3 FENICS 02 : FAST + SLOW DATA

In [11]:
FastSlow2_filtred = pd.concat([Slow2_filtred, Fast2], axis=1)

In [12]:
tsne_param = TSNE()
tsne_param.fit(FastSlow2_filtred)

for norm in Normalization.keys() :
    tsne_bokeh = FATL.FenBokehTSNE(FastSlow2_filtred,normalize=Normalization[norm],perplexity=20,n_iter=200000,learning_rate=200)

    OutputPath = main_dir+'fen2_output/t-sne/fastslow/HTML/'

    FATL.checkFolderAt(OutputPath)


    tsne= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,FENICS2.Board.version,FENICS2.Board.burnTime,FENICS2.Board.StatusFast,FENICS2.Board.StatusSlow,
                            colors=FENICS2.Board.burnTime ,labels=FENICS2.Board.id,
                            tooltips=[("card","FENICS0@version @labels"),("burnTime","@burnTime"),
                                        ("StatusFast","@StatusFast"),("StatusSlow","@StatusSlow")])

    tsne.plotter(plotType='histogram',filters=custom_filter,
                title=f't-SNE : FENICS02 FAST + SLOW; normed {tsne_bokeh.normed}; t-SNE params : perplexity={tsne_param.perplexity},n_iter={tsne_param.n_iter},learning_rate={tsne_param.learning_rate}')

    # tsne.save_as_html(path=OutputPath,filename=f'FEN2_FASTSLOW_t-SNE_perplexity_{tsne_param.perplexity}_wburn_normed_{tsne_bokeh.normed}.html')

    epsilon = 5
    min_samples = 2
    dbscan = DBSCAN(eps=epsilon, min_samples=min_samples)
    dbscan.fit(tsne_bokeh.embedding)

    Labels2[('FastSlow',norm)]= dbscan.labels_   

    Cluster_Filtred= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,
                                        colors=Labels2[('FastSlow',norm)] ,labels=FENICS2.Board.id)

    Cluster_Filtred.plotter(plotType='histogram',filters=custom_filter,title='t-SNE : FENICS02 FastSlow (DBSCAN)')
    
    
    for status in CanalStatus.keys() :
        Cluster_Filtred= FATL.FenBokehGrapher(tsne_bokeh.tsne_x,tsne_bokeh.tsne_y,FENICS2.Board.version,FENICS2.Board.burnTime,FENICS2.Board.StatusFast,FENICS2.Board.StatusSlow,
                                            colors=CanalStatus[status] ,labels=FENICS2.Board.id,
                                            tooltips=[("card","FENICS0@version @labels"),("burnTime","@burnTime"),
                                                        ("StatusFast","@StatusFast"),("StatusSlow","@StatusSlow")])

        Cluster_Filtred.plotter(plotType='histogram',filters=custom_filter,
                                title=f't-SNE : FENICS02 Fast + SLOW (Fail : {status}); normed {tsne_bokeh.normed}; t-SNE params : perplexity={tsne_param.perplexity}')
        # Cluster_Filtred.save_as_html(path=OutputPath,filename=f'FEN2_FASTSLOW_t-SNE_perplexity_{tsne_param.perplexity}_wStatusSlow_normed_{tsne_bokeh.normed}.html')


# Clusters comparaison

In [None]:
F1022 = Fast2[FENICS2.Board.id ==1022].describe() ; F1023 = Fast2[FENICS2.Board.id ==1023].describe()
F1022.compare(F1023)

NameError: name 'Fast2' is not defined

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway

data = pd.concat([Slow2, Labels2, FENICS2.Board.StatusFast, FENICS2.Board.StatusSlow], axis=1)

# Group Fast2 by the labels in Labels2
groups = data.groupby(('Slow','Normed'))

import pandas as pd
from scipy.stats import f_oneway

def perform_anova(df1, df2):
    combined_data = pd.concat([df1, df2], keys=['df1', 'df2'])
    group_labels = combined_data.index.get_level_values(0)

    groups = []
    for label in combined_data.index.unique():
        groups.append(combined_data.xs(label))

    f_statistic, p_value = f_oneway(*groups)
    
    
    return f_statistic, p_value


for ii in list(groups.groups.keys()) :
    for jj in list(groups.groups.keys()) :

        f_statistic, p_value = perform_anova(groups.get_group(ii), groups.get_group(jj))

        print("F-statistic:", f_statistic)
        print("p-value:", p_value)

F-statistic: 1.6807641828231124
p-value: 2.996033434059801e-24
F-statistic: 1.6833986095381919
p-value: 4.107683693991107e-18
F-statistic: 1.416930510705475
p-value: 6.452573386669612e-08
F-statistic: 1.6833986094485478
p-value: 2.3618735508634425e-13
F-statistic: 1.6833986094857312
p-value: 6.079994016406177e-15
F-statistic: 1.6833986094775562
p-value: 9.127183643734065e-15
F-statistic: 1.6833986095381925
p-value: 4.107683693991107e-18
F-statistic: 5.895139653010906
p-value: 3.868087674865415e-185
F-statistic: 1.0000000007027754
p-value: 0.4878415566766204
F-statistic: 5.9226423753756645
p-value: 4.032090714194351e-98
F-statistic: 5.188593968783757
p-value: 4.493056229097041e-104
F-statistic: 5.150823575199968
p-value: 2.9856827554021496e-100
F-statistic: 1.4169305107054753
p-value: 6.452573386669612e-08
F-statistic: 1.0000000007027758
p-value: 0.4878415566766204
F-statistic: 0.9944751388133385
p-value: 0.5071393418776328
F-statistic: 1.0000000006989849
p-value: 0.48113616805018355
F-