In [1]:
import logging
logging.basicConfig(level=logging.DEBUG)
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import utilities
import os

In [24]:
datasets_binary = ['BeetleFly','BirdChicken','Coffee','Computers','DistalPhalanxOutlineCorrect','Earthquakes','ECG200',
                   'ECGFiveDays','FordA','FordB','GunPoint','Ham','HandOutlines','Herring','ItalyPowerDemand','Lightning2',
                   'MiddlePhalanxOutlineCorrect', 'MoteStrain','PhalangesOutlinesCorrect','ProximalPhalanxOutlineCorrect',
                   'ShapeletSim','SonyAIBORobotSurface1','SonyAIBORobotSurface2','Strawberry','ToeSegmentation1','ToeSegmentation2',
                   'TwoLeadECG','Wafer','Wine','WormsTwoClass','Yoga','Chinatown','DodgerLoopGame','DodgerLoopWeekend',
                   'FreezerRegularTrain','FreezerSmallTrain','GunPointAgeSpan','GunPointMaleVersusFemale','GunPointOldVersusYoung',
                   'HouseTwenty','PowerCons','SemgHandGenderCh2']
datasets_small = ['BeetleFly', 'BirdChicken', 'Coffee', 'Computers', 'DistalPhalanxOutlineCorrect', 'Earthquakes', 'ECG200', 
                  'ECGFiveDays', 'GunPoint', 'Ham', 'Herring', 'Lightning2', 'MiddlePhalanxOutlineCorrect', 'ProximalPhalanxOutlineCorrect', 
                  'ShapeletSim', 'SonyAIBORobotSurface1', 'SonyAIBORobotSurface2', 'Strawberry', 'ToeSegmentation1', 'ToeSegmentation2', 'Wine', 
                  'WormsTwoClass', 'Chinatown', 'DodgerLoopGame', 'DodgerLoopWeekend', 'GunPointAgeSpan', 'GunPointMaleVersusFemale', 
                  'GunPointOldVersusYoung', 'HouseTwenty', 'PowerCons', 'SemgHandGenderCh2']
dataset_large=['FordA', 'FordB', 'HandOutlines', 'ItalyPowerDemand', 'MoteStrain', 'PhalangesOutlinesCorrect', 
               'TwoLeadECG', 'Wafer', 'Yoga', 'FreezerRegularTrain', 'FreezerSmallTrain']

### Implement shapelet distance

In [3]:
from scipy.spatial import distance
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics.pairwise import paired_distances
import numpy as np
import itertools
from itertools import permutations

In [4]:
def min_distance(S,T):
    if len(S)>len(T):
        print("Error: Shapelet length larger than Time series")
        return None
    else:
        dist_list=[]
        m=len(T)
        w=len(S)
        
        for i in range(m-w+1):
            #print(T[i:i+w].shape)
            dist=np.linalg.norm(T[i:i+w]-S)
            #print(dist)
            dist_list.append(dist)
        return min(dist_list)

In [29]:
#K(T1,T2)=K_s((s11,...,s1U), (s21,...,s2V))=min_{i,j}( min_dist(s1i,s2j) )
def pairwise_min_shapelet(T1,T2):
    #T1,T2 should be lists of subsequences
    
    u=len(T1)
    v=len(T2)
    unique_combinations = []
    '''
    dist_list=[]
    for i in U[:-1]:
        i_dist_list=[]
        for j in V[i+1:]:
            print(i,j,T1[i],T2[j])
            i_dist=np.linalg.norm(T1[i]-T2[j])
            i_dist_list.append(i_dist)
        dist_list.append(min(i_dist_list))
    return min(dist_list)
    '''
    c = list(itertools.product(T1, T2))
    c=np.array(c)
    dist_list=[]
    dist_list = [np.linalg.norm(grp[0]-grp[1]) for grp in c]
    #for grp in c:
        #i_dist=np.linalg.norm(grp[0]-grp[1])
        #i_dist=np.abs(c[0] - c[1])
        #dist_list.append(i_dist)
    return min(dist_list)
    
 
    

In [31]:
#train and test all 42 datasets
dataset_list=[]
auroc_list=[]
auprc_list=[]
parameters = {'n_neighbors':[1]}

for dataset in datasets_small:
    X_train, y_train, X_test, y_test = utilities.get_ucr_dataset('../UCRArchive_2018/',dataset)
    #clf = KNeighborsClassifier(n_neighbors=1,metric=fastdtw)
    #clf = FaissKNeighbors(n_neighbors=1,metric=fastdtw)
    clf = GridSearchCV(KNeighborsClassifier(metric=pairwise_min_shapelet), parameters, cv=2, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_test)
    auroc=roc_auc_score(y_test, y_pred[:, 1])
    auprc=average_precision_score(y_test, y_pred[:,1])
    print(dataset, " AUROC is: ", auroc," AUPRC is: ", auprc)
    dataset_list.append(dataset)
    auroc_list.append(auroc)
    auprc_list.append(auprc)
       
    

Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  7.0min finished


BeetleFly  AUROC is:  0.44999999999999996  AUPRC is:  0.5277777777777778
Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  6.7min finished


BirdChicken  AUROC is:  0.6499999999999999  AUPRC is:  0.44999999999999996
Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  3.8min finished


Coffee  AUROC is:  0.5358974358974358  AUPRC is:  0.4835164835164835
Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


KeyboardInterrupt: 

In [40]:
np.savetxt('pairwise_min_shapelet_KNN.csv', [p for p in zip(dataset_list, auroc_list,auprc_list)], delimiter=',',fmt="%s")