In [1]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')

import pandas as pd
import numpy as np
from timeit import default_timer as timer
import scipy.io as sio
from scipy.special import comb

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import rbf_kernel

from sklearn.svm import SVC
from scipy.stats import expon
from sklearn.model_selection import RandomizedSearchCV
from sklearn.manifold import spectral_embedding
import math


import ensemble_learning as el

# Functions

In [2]:
def onegramsTransform(data,dim1,dim2,minb=2,maxbh=30,cv = 5):
    #Removing missing values and store the result in the same variable
    data.dropna(inplace=True)
    #Grouping data by series
    group = data.groupby('Series')
    #Getting the different classes
    classe = group['classe'].apply(lambda x : x.iloc[0]).reset_index(drop=True)
    #Providing train/test indices to split data in train/test sets. Split dataset into cv consecutive folds, and shuffling the data before it
    kf = KFold(n_splits=cv,shuffle=True)
    mscore = 0
    #b1 from 2 to 29...by default
    for b1 in range(minb,maxbh):
        hist = group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                    np.histogram(x.iloc[:,dim1].values,bins=b1,density=True)[0])))
        hist_tr = pd.DataFrame(hist.values.tolist())

        clf= KNeighborsClassifier(1)
        clf.fit(hist_tr, classe)

        scores = cross_val_score(clf, hist_tr, classe, cv=kf)

        if mscore < np.median(scores):
            
            mscore = np.median(scores)
            ubh1=b1
            
    #print("Bins: ", [ubh1], " \nMean scores : ", mscore)       
            
    mscore=0
    for b1 in range(minb,maxbh):
        hist = group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                    np.histogram(x.iloc[:,dim2].values,bins=b1,density=True)[0])))
        hist_tr = pd.DataFrame(hist.values.tolist())

        clf=KNeighborsClassifier(1)
        clf.fit(hist_tr, classe)

        scores = cross_val_score(clf, hist_tr, classe, cv=kf)

        if mscore < np.median(scores):
            
            mscore = np.median(scores)
            ubh2=b1

    #print("Bins: ", [ubh2], " \nMean scores : ", mscore)

    return ubh1,ubh2

In [3]:
def bigramsTransform(data,dim1,dim2,minb=2,maxb=11,cv = 5):
    data.dropna(inplace=True)
    group = data.groupby('Series')
    classe = group['classe'].apply(lambda x : x.iloc[0]).reset_index(drop=True)
    kf = KFold(n_splits=cv,shuffle=True)
    mscore = 0
    for b1 in range(minb,maxb):
        for b2 in range(minb,maxb):
            hist = group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram2d(x.iloc[:,dim1].values,x.iloc[:,dim2].values,bins=[b1,b2],density=True)[0])))
            hist_tr = pd.DataFrame(hist.values.tolist())

            clf= KNeighborsClassifier(1)
            clf.fit(hist_tr, classe)

            scores = cross_val_score(clf, hist_tr, classe, cv=kf)

            if mscore < np.median(scores):
                
                mscore = np.median(scores)
                ub1=b1
                ub2=b2
    #print("Bins: ", [ub1,ub2], " \nMean scores : ", mscore) 
    return ub1,ub2

# Datasets

### Text

In [4]:
dataset = [["ArabicDigits/ArabicDigits"," "],#0
           ["AUSLAN/AUSLAN"," ","\t"],#1
           ["CharacterTrajectories/CharacterTrajectories","\t"], #2          
           ["CMUsubject16/CMUsubject16","\t"],#3
           ["ECG/ECG"," "],#4
           ["Libras/Libras"," "],#5
           ["PenDigits/PenDigits","\t"],#6
           ['UWave/uWave'," "],#7
           ['RobotFailure/LP1'," "],#8
           ['RobotFailure/LP2'," "],#9
           ['RobotFailure/LP3'," "],#10
           ['RobotFailure/LP4'," "],#11
           ['RobotFailure/LP5'," "],#12
           ['Wafer/Wafer'," "],#13
           ["JapaneseVowels/JapaneseVowels", " "], #14
           ["ArticularyWordRecognition/ArticularyWordRecognition", " "],#15
           ["BasicMotions/BasicMotions", " "], #16
           ["Cricket/Cricket", " "], #17
           ["DuckDuckGeese/DuckDuckGeese", " "], #18
           ["EigenWorms/EigenWorms", " "], #19
           ["FingerMovements/FingerMovements", " "], #20
           ["HandMovementDirection/HandMovementDirection", " "], #21
           ["Heartbeat/Heartbeat", " "], #22
           ["LSST/LSST", " "], #23
           ["MotorImagery/MotorImagery", " "], #24
           ["NATOPS/NATOPS", " "], #25
           ["RacketSports/RacketSports", " "], #26
           ["SelfRegulationSCP1/SelfRegulationSCP1", " "], #27
           ["SelfRegulationSCP2/SelfRegulationSCP2", " "] #28
              ]

In [16]:
i=24

train = pd.read_table("MTS_Datasets/"+dataset[i][0]+"_TRAIN",sep=dataset[i][1],header=None)


# Renommage des 3 premières colonnes représentant resp. les numéros des STM,
# les indices de leurs obs et leur classe
train.rename(index=str, columns={0: "Series", 1: "index", 2:"classe"},inplace=True)
# Suppression de la colonne des index des obs des STM, infos inutiles pour les futures traitements
# (d'où l'utilisation de inplace=True) sinon le dataframe appelant la méthode drop serait inchangé et 
# une copie modifiée serait retournée en résultat
train.drop(['index'],axis=1,inplace=True) 
# Suppression et sauvegarde de la colonne des classes et celle des séries
cl = train.pop('classe') 
se = train.pop('Series')
# Insertion des colonnes précédemment supprimées à la fin du dataframe
train['classe'] = cl
train['Series'] = se
# Convertion du type des colonnes en string
train.columns = train.columns.astype(str)
print(train.head())


test = pd.read_table("MTS_Datasets/"+dataset[i][0]+"_TEST",sep=dataset[i][1],header=None)
test.rename(index=str, columns={0: "Series", 1: "index", 2:"classe"},inplace=True)
test.drop(['index'],axis=1,inplace=True)
cl = test.pop('classe')
se = test.pop('Series')
test['classe'] = cl
test['Series'] = se
test.columns = test.columns.astype(str)

         3         4         5         6         7         8         9  \
0  6.59375  13.00000  11.93750  16.40625  18.87500  18.90625  17.12500   
1  6.59375  12.71875  12.43750  16.50000  19.56250  18.34375  17.03125   
2  6.18750  12.84375  12.96875  16.06250  20.31250  17.75000  16.87500   
3  6.03125  13.21875  13.12500  16.31250  20.84375  17.06250  16.50000   
4  5.50000  13.31250  12.06250  16.46875  21.75000  16.56250  16.12500   

         10        11       12  ...       59       60       61        62  \
0  13.90625  10.15625  2.06250  ... -0.12500 -10.4375 -8.12500 -11.37500   
1  14.78125  10.03125  1.31250  ...  0.00000 -10.1250 -7.28125 -11.03125   
2  14.62500  10.18750  1.18750  ... -0.53125  -9.9375 -6.53125 -10.87500   
3  14.21875  10.18750  0.78125  ... -1.90625  -9.8125 -5.90625 -10.81250   
4  14.03125   9.84375  0.40625  ... -2.56250  -9.8750 -5.62500 -10.68750   

        63        64       65       66  classe  Series  
0 -6.31250 -10.12500 -6.46875 -7.68750   

### Matlab

In [None]:
mat = sio.loadmat('MTS_Datasets/PEMS/PEMS.mat')
mdata = mat['mts']
mdtype = mdata.dtype  
ndata = {n: mdata[n][0, 0] for n in mdtype.names}
columns = [n for n, v in ndata.items()]

c='train'
train = pd.DataFrame()
for i in range(0,len(ndata[c][0])):
    series = pd.DataFrame(np.transpose(ndata[c][0][i]))
    series['classe']=ndata['trainlabels'][0][i]
    series['Series']=i
    train = pd.concat([train,series])
train.reset_index(drop=True,inplace=True)


c='test'
test = pd.DataFrame()
for i in range(0,len(ndata[c][0])):
    series = pd.DataFrame(np.transpose(ndata[c][0][i]))
    series['classe']=ndata['testlabels'][0][i]
    series['Series']=i
    test = pd.concat([test,series])
test.reset_index(drop=True,inplace=True)

# Pre_treatments

In [17]:
# Détermination du nbre d'instances ou observations composant (les dimensions des) les STM
nb_col = len(train.drop(['classe','Series'],axis=1).columns)
print(nb_col)
print(train.head())

64
         3         4         5         6         7         8         9  \
0  6.59375  13.00000  11.93750  16.40625  18.87500  18.90625  17.12500   
1  6.59375  12.71875  12.43750  16.50000  19.56250  18.34375  17.03125   
2  6.18750  12.84375  12.96875  16.06250  20.31250  17.75000  16.87500   
3  6.03125  13.21875  13.12500  16.31250  20.84375  17.06250  16.50000   
4  5.50000  13.31250  12.06250  16.46875  21.75000  16.56250  16.12500   

         10        11       12  ...       59       60       61        62  \
0  13.90625  10.15625  2.06250  ... -0.12500 -10.4375 -8.12500 -11.37500   
1  14.78125  10.03125  1.31250  ...  0.00000 -10.1250 -7.28125 -11.03125   
2  14.62500  10.18750  1.18750  ... -0.53125  -9.9375 -6.53125 -10.87500   
3  14.21875  10.18750  0.78125  ... -1.90625  -9.8125 -5.90625 -10.81250   
4  14.03125   9.84375  0.40625  ... -2.56250  -9.8750 -5.62500 -10.68750   

        63        64       65       66  classe  Series  
0 -6.31250 -10.12500 -6.46875 -7.68750

## Normalisation

In [18]:
train.iloc[:,0:nb_col] = train.groupby('Series').transform(lambda x: (x - x.min()) / (x.max()-x.min())).iloc[:,0:nb_col]
test.iloc[:,0:nb_col] = test.groupby('Series').transform(lambda x: (x - x.min()) / (x.max()-x.min())).iloc[:,0:nb_col]

In [19]:
print(train)

               3         4         5         6         7         8         9  \
0       0.387556  0.642804  0.617712  0.730216  0.799322  1.000000  0.966193   
1       0.387556  0.636162  0.629520  0.732374  0.814237  0.987887  0.964165   
2       0.376000  0.639114  0.642066  0.722302  0.830508  0.975101  0.960784   
3       0.371556  0.647970  0.645756  0.728058  0.842034  0.960296  0.952671   
4       0.356444  0.650185  0.620664  0.731655  0.861695  0.949529  0.944557   
...          ...       ...       ...       ...       ...       ...       ...   
833995  0.569044  0.083141  0.052348  0.212163  0.181755  0.049067  0.057360   
833996  0.549317  0.080062  0.056197  0.212854  0.197650  0.022115  0.043538   
833997  0.547800  0.082371  0.056197  0.185902  0.199724  0.017277  0.048376   
833998  0.552352  0.077752  0.051578  0.173462  0.203870  0.018659  0.046994   
833999  0.564492  0.058507  0.043110  0.165169  0.185211  0.019350  0.061507   

              10        11        12  .

In [20]:
mask  = train.isna().all()

#Drop columns where all elements are missing and keep the dataframe with valid entries in the same variable
train.dropna(axis=1,how='all',inplace=True)

train.interpolate(inplace=True)
train.dropna(inplace=True)

In [21]:
test = test[test.columns[~mask]]
test.dropna(axis=1,how='all',inplace=True)
test.interpolate(inplace=True)
test.dropna(inplace=True)

# Manifold learning

In [None]:
grouped_df = train.groupby("Series")
aaaaa = []
for key, item in grouped_df:
    #print(grouped_df.get_group(key).shape[0], "\n\n")
    aaaaa.append(grouped_df.get_group(key).shape[0])

aaaaa.sort()
print(aaaaa)

In [11]:
!pip install tslearn
from tslearn.metrics import dtw



In [None]:
def reduce(data, nbcol, dim=3, repetition=False):
  # data is a dataframe representing a serie of the dataset
  size = data.shape[0]
    
  # If the number or data points if less or equal to the reduced dimension wished, drop the series
  if dim >= size:
    data.drop(data.index, inplace=True)
    return data
  #if size > 25:
  #  repetition = True
    
  end = size - 1 # Last index of A
  bend = end - 1
    
  R = np.zeros((size, size)) # Repetition neighbourhood matrix

  A = np.diag(np.diag(rbf_kernel(data, gamma=1),1), 1)
  A = (A + A.transpose())
  

  if repetition == True: # We compute simultaneously the adjacent temporal neighbourhood matrix and the repetition temporal neighbourhood matrix

    endRepetition = end-10 # Last index of the data points while computing the repetition neighbourhoods
    fragmentNumbers = endRepetition - 9 # Number of fragments
    M = np.zeros((fragmentNumbers, fragmentNumbers)) # Similarity matrix of the fragments
    fragments = list() # List containing the fragments. Each element of the list contains the coordinates (extracted dataframe) of the data points in the fragment
    M_mod = np.zeros((fragmentNumbers, fragmentNumbers)) # Similarity matrix M after windowing

   
    for i in range(1,end):
      if i >= 10 and i <= endRepetition:
        fragments.append(data.iloc[i-10:i+11, :nbcol])

    # Computing the M matrix
    for i in range(0,fragmentNumbers):
      for j in range(0,i):
        M[i,j] = dtw(fragments[i], fragments[j])
        M[j,i] = M[i,j]

    # Performing temporal windowing of M
    for i in range(0,fragmentNumbers):
      for j in range(i+1, fragmentNumbers):
        for b in range(0, 20):
          if i-b > 0 and j-b >0:  #  ET LES AUTRES ?
            M_mod[i,j] = M_mod[i,j] + M[i-b,j-b]
        M_mod[i,j] = M_mod[i,j] / 20
        M_mod[j,i] = M_mod[i,j]
        
      

    
    # Searching for similar fragments, extracting the similar fragments and building the matrix R
    b = 0.75
    Bool = (M_mod < M_mod.mean(axis=1) - b * M_mod.std(axis=1))
    for i in range(0, fragmentNumbers):
      for j in range(i+1, fragmentNumbers):
        pointI = i+10 # Center of the fragment
        pointJ = j+10
        if Bool[i,j]:
          if A[pointI,pointJ] != 0 and A[pointI,pointJ] != 1 :
            R[pointI,pointJ] = A[pointI,pointJ]
          elif A[pointI,pointJ] == 0:
            R[pointI,pointJ] = math.exp(-np.linalg.norm(data.iloc[pointI,:nbcol] - data.iloc[pointJ,:nbcol])**2)
          R[pointJ,pointI] = R[pointI,pointJ]



  data.drop(data.iloc[:, dim:nbcol], axis=1, inplace=True)
  data.iloc[:,:dim] = spectral_embedding(A+R,dim)
  del A
  del R
  return data



def dimensionReduction(data, nbcol, dim=3):
  group = data.groupby("Series")
  reducedData = group.apply(reduce, nbcol, dim)
  return reducedData

train2 = train
test2 = test

dim = 6

start = timer()
train_r = dimensionReduction(train, nb_col, dim)
end = timer()
time1 = end - start
print(train_r)

In [None]:
# Quatre voisins

def reduce(data, nbcol, dim=3, repetition=False):
  # data is a dataframe representing a serie of the dataset
  size = data.shape[0]
    
  # If the number or data points if less or equal to the reduced dimension wished, drop the series
  if dim >= size:
    data.drop(data.index, inplace=True)
    return data
  #if size > 25:
  #  repetition = True
    

  A = np.zeros((size, size))
  np.fill_diagonal(A, 1) 
  R = np.zeros((size, size)) # Repetition neighbourhood matrix
  

  # Handling the extreme data points
  A[0,1] = math.exp(-np.linalg.norm(data.iloc[0,:nbcol] - data.iloc[1,:nbcol]))
  A[1,0] = A[0,1]
  end = size - 1 # Last index of A
  bend = end - 1
  A[end,bend] = math.exp(-np.linalg.norm(data.iloc[end,:nbcol] - data.iloc[bend,:nbcol]))
  A[bend,end] = A[end,bend]

  if repetition == True: # We compute simultaneously the adjacent temporal neighbourhood matrix and the repetition temporal neighbourhood matrix

    endRepetition = end-10 # Last index of the data points while computing the repetition neighbourhoods
    fragmentNumbers = endRepetition - 9 # Number of fragments
    M = np.zeros((fragmentNumbers, fragmentNumbers)) # Similarity matrix of the fragments
    fragments = list() # List containing the fragments. Each element of the list contains the coordinates (extracted dataframe) of the data points in the fragment
    M_mod = np.zeros((fragmentNumbers, fragmentNumbers)) # Similarity matrix M after windowing

    # Handling the intermediate data points for adjacent neighbours
    for i in range(1,end):
      A[i,i-1] =  math.exp(-np.linalg.norm(data.iloc[i,:nbcol] - data.iloc[i-1,:nbcol]))
      A[i-1,i] = A[i,i-1]
      A[i,i+1] = math.exp(-np.linalg.norm(data.iloc[i,:nbcol] - data.iloc[i+1,:nbcol]))
      A[i+1,i] = A[i,i+1]
      if i >= 10 and i <= endRepetition:
        fragments.append(data.iloc[i-10:i+11, :nbcol])

    # Computing the M matrix
    for i in range(0,fragmentNumbers):
      for j in range(0,i):
        M[i,j] = dtw(fragments[i], fragments[j])
        M[j,i] = M[i,j]

    # Performing temporal windowing of M
    for i in range(0,fragmentNumbers):
      for j in range(i+1, fragmentNumbers):
        for b in range(0, 20):
          if i-b > 0 and j-b >0:  #  ET LES AUTRES ?
            M_mod[i,j] = M_mod[i,j] + M[i-b,j-b]
        M_mod[i,j] = M_mod[i,j] / 20
        M_mod[j,i] = M_mod[i,j]
        
      

    
    # Searching for similar fragments, extracting the similar fragments and building the matrix R
    b = 0.75
    Bool = (M_mod < M_mod.mean(axis=1) - b * M_mod.std(axis=1))
    for i in range(0, fragmentNumbers):
      for j in range(i+1, fragmentNumbers):
        pointI = i+10 # Center of the fragment
        pointJ = j+10
        if Bool[i,j]:
          if A[pointI,pointJ] != 0 and A[pointI,pointJ] != 1 :
            R[pointI,pointJ] = A[pointI,pointJ]
          elif A[pointI,pointJ] == 0:
            R[pointI,pointJ] = math.exp(-np.linalg.norm(data.iloc[pointI,:nbcol] - data.iloc[pointJ,:nbcol]))
          R[pointJ,pointI] = R[pointI,pointJ]



  else: # We compute only the adjacent temporal neighbourhood matrix
    

    # Handling the intermediate data points for adjacent neighbours
    for i in range(2,end-1):
      A[i,i-1] =  math.exp(-np.linalg.norm(data.iloc[i,:nbcol] - data.iloc[i-1,:nbcol]))
      A[i-1,i] = A[i,i-1]
      A[i,i-2] =  math.exp(-np.linalg.norm(data.iloc[i,:nbcol] - data.iloc[i-2,:nbcol]))
      A[i-2,i] = A[i,i-2]
      A[i,i+1] = math.exp(-np.linalg.norm(data.iloc[i,:nbcol] - data.iloc[i+1,:nbcol]))
      A[i+1,i] = A[i,i+1]
      A[i,i+2] =  math.exp(-np.linalg.norm(data.iloc[i,:nbcol] - data.iloc[i+2,:nbcol]))
      A[i+2,i] = A[i,i+2]
  
  #to_drop = ["7", "8", "9", "10", "11", "12", "13", "14", "15"]
  data.drop(data.iloc[:, dim:nbcol], axis=1, inplace=True)
  #data.drop(columns=to_drop, inplace=True)
  data.iloc[:,:dim] = spectral_embedding(A+R,dim)
  return data



def dimensionReduction(data, nbcol, dim=3):
  #data = data.dropna()
  group = data.groupby("Series")
  reducedData = group.apply(reduce, nbcol, dim)
  return reducedData

train2 = train
test2 = test

dim = 2

start = timer()
train_r = dimensionReduction(train, nb_col, dim)
end = timer()
time1 = end - start
print(train_r)

In [None]:
start = timer()
test_r = dimensionReduction(test, nb_col, dim)
end = timer()
time2 = end - start
train = train2
test = test2
print(test_r)

## Corr coeff

In [None]:
start = timer()


nb_col = len(train_r.drop(['classe','Series'],axis=1).columns)
nb_col

# Calcul de la dérivée et de la somme cumulée
deriv = train_r.groupby('Series').transform(lambda x: x.diff()).iloc[:,0:nb_col]

cumsum = train_r.groupby('Series').transform(lambda x: x.cumsum()).iloc[:,0:nb_col]

data = pd.concat([train_r.iloc[:,0:nb_col],deriv,cumsum],axis=1)
data.columns = np.linspace(0,3*nb_col,3*nb_col,dtype=int)

train_r = pd.concat([data,train_r[['classe','Series']]], axis=1)
train_r.dropna(inplace=True)

#print(train_r)

deriv = test_r.groupby('Series').transform(lambda x: x.diff()).iloc[:,0:nb_col]

cumsum = test_r.groupby('Series').transform(lambda x: x.cumsum()).iloc[:,0:nb_col]

data = pd.concat([test_r.iloc[:,0:nb_col],deriv,cumsum],axis=1)
data.columns = np.linspace(0,3*nb_col,3*nb_col,dtype=int)

test_r = pd.concat([test_r.iloc[:,0:nb_col],deriv,cumsum,test_r[['classe','Series']]],
          axis=1)
test_r.dropna(inplace=True)

#print(test_r)


####### M-histogramme #########

# Paramètres
max_comb = comb(nb_col,2)*3*2


print("Nombre de M-histogrammes : ",max_comb)

eval_int = 0
# Apprentissage vue
for app in range(0,4):
    print("*****************************")
    print("Apprentissage: ",app)
    print("*****************************")
    train_nngrams = list() #Liste des M-Histogrammes de l'ensemble d'apprentissage pour chaque vue
    test_nngrams = list() #Liste des M-Histogrammes de l'ensemble de test
    learners = list()
    combin=list()           #Liste des combinaisons dim1+dim2+TypeHistogramme
    
    # Apprentissage M-histogramme
    i=0
    while i<max_comb :
        #if i%5==0:
            #print("*****************************")
            #print("TOUR : ",i)
            #print("*****************************")
        # Choosing : Dimensional features ||  Simple, Deriv or Cumsum ||  Bigrams or 1grams 
        dim1rand = np.random.randint(0,nb_col)
        dim2rand = np.random.randint(0,nb_col)
        while dim2rand == dim1rand :
            dim2rand = np.random.randint(0,nb_col)


        transform = np.random.randint(0,3)
        dim1 = dim1rand+transform*nb_col
        dim2 = dim2rand+transform*nb_col

        nngrams = np.random.randint(0,2)    

        #print("Dim1 : ",dim1,"   Dim2 : ",dim2,"    NNGrams: ",nngrams)

        if [dim1,dim2,nngrams] in combin or [dim2,dim1,nngrams] in combin : 
            continue
        else :
            combin.append([dim1,dim2,nngrams]) 
            i+=1

        if nngrams == 0:

            ub1,ub2 = bigramsTransform(train_r.copy(),dim1,dim2)

            train_group = train_r.groupby('Series')        
            htr = train_group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram2d(x.iloc[:,dim1].values,x.iloc[:,dim2].values,bins=[ub1,ub2],density=True)[0])))
            htr = pd.DataFrame(htr.values.tolist())
            train_nngrams.append(htr)


            test_group = test_r.groupby('Series')
            htt = test_group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram2d(x.iloc[:,dim1].values,x.iloc[:,dim2].values,bins=[ub1,ub2],density=True)[0])))
            htt = pd.DataFrame(htt.values.tolist())
            test_nngrams.append(htt)

            learners.append(('clf',KNeighborsClassifier(1)))


        else :

            ub1h,ub2h = onegramsTransform(train_r.copy(),dim1,dim2) 

            train_group = train_r.groupby('Series')
            h1htr = train_group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram(x.iloc[:,dim1].values,bins=ub1h,density=True)[0])))
            h1htr = pd.DataFrame(h1htr.values.tolist())
            train_nngrams.append(h1htr)

            h2htr = train_group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram(x.iloc[:,dim2].values,bins=ub2h,density=True)[0])))
            h2htr = pd.DataFrame(h2htr.values.tolist())
            train_nngrams.append(h2htr)


            test_group = test_r.groupby('Series')
            h1htt = test_group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram(x.iloc[:,dim1].values,bins=ub1h,density=True)[0])))
            h1htt = pd.DataFrame(h1htt.values.tolist())
            test_nngrams.append(h1htt)

            h2htt = test_group.apply(lambda x : np.ndarray.tolist(np.ndarray.flatten(
                        np.histogram(x.iloc[:,dim2].values,bins=ub2h,density=True)[0])))
            h2htt = pd.DataFrame(h2htt.values.tolist())
            test_nngrams.append(h2htt)

            learners.append(('clf',KNeighborsClassifier(1)))
            learners.append(('clf',KNeighborsClassifier(1)))

    # Score en validation
    cl = train_group['classe'].apply(lambda x : x.iloc[0]).reset_index(drop=True) #Liste des classes pour chaque série, en ordre
    se = np.arange(len(train_nngrams[0]))
    #train_index : index des séries de l'ensemble d'apprentissage, y_train: classes des séries de l'ensemble d'apprentissage, indexées par les index des séries
    train_index, test_index, y_train, y_test = train_test_split(se, cl,stratify=cl,test_size=0.2) 

    x_train = list()
    x_test = list()
    for j in range(len(train_nngrams)): #Chaque item de x_train contiendra pour une vue la liste des histogrammes des séries d'entraînement
        x_train.append(train_nngrams[j].iloc[train_index,:].reset_index(drop=True))  
    for j in range(len(train_nngrams)):
        x_test.append(train_nngrams[j].iloc[test_index,:].reset_index(drop=True))

    fitted_estimators, label_encoder = el.fit_multiple_estimators(learners, x_train, y_train.reset_index(drop=True))
    y_pred = el.predict_from_multiple_estimator(fitted_estimators, label_encoder, x_test)
    score_val = np.round(accuracy_score(y_pred, y_test.reset_index(drop=True)),4)
    print("SCORE Training VUE N°",app," : ",score_val )
    
    # Score en test
    train_group = train_r.groupby('Series')
    train_classe = train_group['classe'].apply(lambda x : x.iloc[0]).reset_index(drop=True)

    test_group = test_r.groupby('Series')
    test_classe = test_group['classe'].apply(lambda x : x.iloc[0]).reset_index(drop=True)

    fitted_estimators, label_encoder = el.fit_multiple_estimators(learners, train_nngrams, train_classe)
    y_pred = el.predict_from_multiple_estimator(fitted_estimators, label_encoder, test_nngrams)
    
    score_test = np.round(accuracy_score(y_pred, test_classe),4)
    print("SCORE TEST: ", score_test)
  
    
    if eval_int < score_val :
        eval_int=score_val
        eval_fi=score_test
        
print("SCORE FINAL: ", eval_fi)

end = timer()
time3 = end - start
timeF = time1 + time2 + time3
print("TOTAL TIME :", timeF)