# KNN-2: for partial curve classification 

The idea with this is to classify one partial curve using KNN (k = 1). The idea is get the number of cycle of the partial curve usign the closest full cycle curve. 
The distance to see the closest full curve is compute with ``fastdtw`` (determines the distanc between 2 time series).

**What is happening with this is that we are getting the same cycle for whatever partial curve we test. We think this is because for the same matlab file, all the partial curves are really close together so there is no much difference between them.**

In [1]:
import pandas as pd
import numpy as np
import import_data
import sort_data
import matplotlib.pyplot as plt

%matplotlib inline

  from ._conv import register_converters as _register_converters


In [18]:
PL03c,PL03d = sort_data.charge_discharge('converted_PL03.mat')
full_curvesc,full_curvesd = sort_data.charge_discharge('converted_PL11.mat')

In [3]:
a = PL03d[444]['voltage']


In [7]:
import pandas as pd
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw

def curve_distance(a,b):
    '''Calculates the euclidean distances between 2 set of time series data.
        Inputs are two NumPy array.
    '''
    
    distance,path = fastdtw(a, b, dist=euclidean)
    return distance

def distance_to_full(curve,dic):
    '''Calculates the euclidean distances between a partial discharge curve to a set of full discharge curves.
        Inputs are a Pandas dataframe and a dictionary.
    '''
    
    #Convert the data in the data frame to a NumPy array
    a = curve.values.flatten() 
    Y = []
    
    #Calculate the distances between the testing curve and each of the full curve cycle
    for i in range(1,len(dic.keys()) + 1):
        if i in dic.keys():
            b = dic[i][['voltage']].values.flatten()
            if len(b) > 100: #This is to mae sure the full curve has values (check curve 102, it has only 1 value) 
                d = curve_distance(a,b)
            
                A = [i,d]
                Y.append(A)
            else:
                pass
        else:
            pass
    #Creat a Pandas dataframe with the distances for each cycle. Sort the data frame by distance
    df = pd.DataFrame(Y, columns = ['Cycle','Distance'])
    df = df.sort_values(by = ['Distance'])
    return df

def Predict_Cycle(df):
    '''Given a dataframe with distances between curves, calculates the closest full curve to the testing curve.
        Inputs are a Pandas dataframe.
    '''
    Cycle = df['Cycle'][df.index[0]]
    return Cycle

def KNN(curve,dic,t):
    '''Predicts the number of cycle the discharge curve is and the full curve associated with it.
    Inputs are a dataframe with the testing curve, a dictionary with the full curve cycles 
    and the time between each measurements in the testing curve
    '''
    df = distance_to_full(curve,dic)
    Cycle = Predict_Cycle(df)
    full_curve = dic[Cycle]
    
    #Correct the cycle number accordingly, using 10 seconds as the measurements time in the full cycle data
    corr_factor = t/10 #1
    Cycle = Cycle*corr_factor
    return Cycle , full_curve




In [9]:
KNN(a,full_curvesd,5)

(404.0,
                  time       datetime  step  cycle  current_amp   voltage  \
 1539730  1.681326e+07  736138.780949   6.0  808.0    -0.749760  4.081907   
 1539731  1.681327e+07  736138.781065   6.0  808.0    -0.749760  4.072032   
 1539732  1.681328e+07  736138.781181   6.0  808.0    -0.749941  4.064424   
 1539733  1.681329e+07  736138.781296   6.0  808.0    -0.750123  4.058272   
 1539734  1.681330e+07  736138.781412   6.0  808.0    -0.750123  4.052768   
 1539735  1.681331e+07  736138.781528   6.0  808.0    -0.749760  4.048073   
 1539736  1.681332e+07  736138.781644   6.0  808.0    -0.750123  4.043540   
 1539737  1.681333e+07  736138.781759   6.0  808.0    -0.749941  4.039655   
 1539738  1.681334e+07  736138.781875   6.0  808.0    -0.750123  4.036094   
 1539739  1.681335e+07  736138.781991   6.0  808.0    -0.750123  4.032370   
 1539740  1.681336e+07  736138.782106   6.0  808.0    -0.749941  4.028971   
 1539741  1.681337e+07  736138.782222   6.0  808.0    -0.750123  4.0

In [11]:
KNN(PL03d[10]['voltage'],full_curvesd,5)

(404.0,
                  time       datetime  step  cycle  current_amp   voltage  \
 1539730  1.681326e+07  736138.780949   6.0  808.0    -0.749760  4.081907   
 1539731  1.681327e+07  736138.781065   6.0  808.0    -0.749760  4.072032   
 1539732  1.681328e+07  736138.781181   6.0  808.0    -0.749941  4.064424   
 1539733  1.681329e+07  736138.781296   6.0  808.0    -0.750123  4.058272   
 1539734  1.681330e+07  736138.781412   6.0  808.0    -0.750123  4.052768   
 1539735  1.681331e+07  736138.781528   6.0  808.0    -0.749760  4.048073   
 1539736  1.681332e+07  736138.781644   6.0  808.0    -0.750123  4.043540   
 1539737  1.681333e+07  736138.781759   6.0  808.0    -0.749941  4.039655   
 1539738  1.681334e+07  736138.781875   6.0  808.0    -0.750123  4.036094   
 1539739  1.681335e+07  736138.781991   6.0  808.0    -0.750123  4.032370   
 1539740  1.681336e+07  736138.782106   6.0  808.0    -0.749941  4.028971   
 1539741  1.681337e+07  736138.782222   6.0  808.0    -0.750123  4.0

In [8]:
distance_to_full(PL03d[10]['voltage'],full_curvesd)

Unnamed: 0,Cycle,Distance
732,808,12.884456
731,807,12.944880
730,806,13.001481
729,805,13.029223
726,802,13.048145
733,809,13.113628
725,801,13.172595
724,800,13.194370
719,795,13.225240
721,797,13.264486


In [9]:
distance_to_full(PL03d[100]['voltage'],full_curvesd)

Unnamed: 0,Cycle,Distance
732,808,13.119667
731,807,13.180470
730,806,13.221817
729,805,13.254694
726,802,13.287098
733,809,13.341317
725,801,13.393999
724,800,13.429449
719,795,13.443385
718,794,13.483205
