In [1]:
import numpy as np
import pandas as pd
import os
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
import pickle

from stru_utils_v2 import *
from cascadeUtil import weval, wUpdate, rmFilesInFolder, predWeakClf, saveWeakClf, loadWeakClf
from cascadeUtil import loadDataset, loadTrnTestDataset, XYTrnUpdateWithTopNFeats, updateTestSetbySelPosSamples, updateTrnsetWithFPtrueSamples
from cascadeUtil import buildStrongClfDefThres, loadStrongClfDefThres,loadStrongClfAdjThres



# building a cascaded detector

# with feeding dataset

In [2]:
featList = [23,24,21,28,8,3,4,1,7,14,27,11,13,26,6,17,18,22,2,29,16,25,9,5,30,20,19,10,12,15]
print(len(featList))

30


In [9]:
datasets = ['feeding' , 'cancer']
dataset = datasets[0]
print('dataset:', dataset)

XY = loadDataset(dataset)

print(XY)


# initialization
XYPos =  XY[np.where(XY[:,-1]==1)[0],:]
XYNeg =  XY[np.where(XY[:,-1]==0)[0],:]

print('XYPos:',XYPos.shape)

XYPosTrn, XYPosTest = tt_split(XYPos, 0.3)
XYNegTrn, XYNegTest = tt_split(XYNeg, 0.3)

P = XYPosTrn
N = XYNegTrn

XYTrn = np.vstack((P,N))
print('XYTrn shape:', XYTrn.shape)
XYTest = np.vstack((XYPosTest,XYNegTest))

cntFeats = XYTrn.shape[1]-1

T = 100

f = 0.6
d = 0.95

FTar = 0.1

FList = []

F = 1 #F0
FPrev = 0
D = 1 #D0
DPrev = 0

clfThresList = []

# stage
i = 0 

if i == 0: # build the first stage
    nFeats = 0
    FPrev = F

# while F > FTar:
#     i = i + 1
#     nFeats = 0
#     FList.append(F)
#     FPrev = F    
#     DPrev = D
    
    print("Build stage 1:")


    while F > FPrev*f and nFeats < cntFeats:
        
        nFeats = nFeats + 1
        print('nfeats:', nFeats)
        
        ######################################################################################
        #       build strong classifier with only first nFeats features in train set 
        ######################################################################################
        
        mdlpath = './fd_model_stage'+str(i)+'/'
    
        XYTrnNFeat = XYTrnUpdateWithTopNFeats(XYTrn, featList, nFeats)

#       build stage with features selected
        buildStrongClfDefThres(XYTrnNFeat, T, mdlpath)
    
#       evaluate cascaded classifier on validation set to determine F and D
        yRes, clfThres = loadStrongClfDefThres(XYTest, T, mdlpath)
        
        print("clfThres: ", clfThres)
        prec_pos, D, f1_pos, TPR, F, Specificity, MCC, CKappa, w_acc, cm = calc_cm_rcall(XYTest[:,-1], yRes)
        print("\nrecall_pos: ",D)
        print("false positive rate: ", F)
        
        if D > d and F < FPrev*f:
            clfThresList.append(clfThres)
            FList.append(F)
            
        elif D < d:
            thres = int(clfThres)
            
            while thres > 2 and D < d:
                
                thres = thres - 2
                yRes = loadStrongClfAdjThres(XYTrnNFeat, T, mdlpath , thres)
                print("thres: ", thres)

                prec_pos, D, f1_pos, TPR, F, Specificity, MCC, CKappa, w_acc, cm = calc_cm_rcall(XYTrnNFeat[:,-1], yRes)

                print("recall_pos: ",D)
                print("false positive rate: ", F)
                

            if D > d:
                if F < FPrev*f:
                    clfThresList.append(thres)
                    FList.append(F)
                    print('Succeed!', '\n')
                else:
                    print('Fail: F cannot be less than f when D is greater than or equal to d', '\n')
            else:
                print('Fail, D cannot be greater than or equal to d', '\n')
            
            

        elif F > FPrev*f:
                print('Specific case pursued came out: F > f !!! need to tune F, D, f, d', '\n')
                
#       prediction for 
        
#       evalutation

#     update sample set
    XYTrn = updateTrnsetWithFPtrueSamples(XYTrn, yRes)



dataset: feeding
[[  1.09483296e+00   4.88885661e-01   6.38511072e+00 ...,   6.57927156e-01
    5.00000000e+01   1.00000000e+00]
 [  8.26554694e-01   8.26009982e-01   2.97287670e+00 ...,   9.86151786e-01
    5.20000000e+01   1.00000000e+00]
 [  6.06988504e-02   1.79986986e-01   7.04646999e-01 ...,   2.28641465e-01
    4.80000000e+01   0.00000000e+00]
 ..., 
 [ -4.84644233e-03  -4.54346028e-03   3.76703845e-02 ...,   2.15830856e-02
    5.40000000e+01   0.00000000e+00]
 [  1.17599614e+00   5.40081712e-01   4.53218151e+00 ...,   8.21070057e-01
    4.60000000e+01   1.00000000e+00]
 [  5.55490528e-02  -1.28633516e-03   1.11125689e+00 ...,   7.71751574e-01
    1.50000000e+02   0.00000000e+00]]
XYPos: (572, 68)
XYTrn shape: (342, 68)
Build stage 1:
nfeats: 1


  yPred = h.predict(X[:,feat].reshape(-1, 1))
  yPred = h.predict(X[:,feat].reshape(-1, 1))
  precision_all = sum(cm[i,i]/sum(cm[j,i] for j in range(len(set(y_test)))) for i in range(len(set(y_test))))/(len(set(y_test)))
  fscore_all = sum(2*(cm[i,i]/sum(cm[i,j] for j in range(len(set(y_test)))))*(cm[i,i]/sum(cm[j,i] for j in range(len(set(y_test)))))/(cm[i,i]/sum(cm[i,j] for j in range(len(set(y_test))))+cm[i,i]/sum(cm[j,i] for j in range(len(set(y_test))))) for i in range(len(set(y_test))))/len(set(y_test))
  mcc = cov_ytyp / np.sqrt(var_yt * var_yp)


clfThres:  5.54578370386

recall_pos:  0.720698254364
false positive rate:  0.576059850374
thres:  3
recall_pos:  1.0
false positive rate:  1.0
Fail: F cannot be less than f when D is greater than or equal to d 

nfeats: 2
clfThres:  33.393064684

recall_pos:  0.600997506234
false positive rate:  0.199501246883
thres:  31
recall_pos:  0.391812865497
false positive rate:  0.0526315789474
thres:  29
recall_pos:  0.391812865497
false positive rate:  0.0526315789474
thres:  27
recall_pos:  0.391812865497
false positive rate:  0.0526315789474
thres:  25
recall_pos:  0.391812865497
false positive rate:  0.0526315789474
thres:  23
recall_pos:  0.391812865497
false positive rate:  0.0526315789474
thres:  21
recall_pos:  0.391812865497
false positive rate:  0.0526315789474
thres:  19
recall_pos:  0.666666666667
false positive rate:  0.12865497076
thres:  17
recall_pos:  0.830409356725
false positive rate:  0.292397660819
thres:  15
recall_pos:  0.912280701754
false positive rate:  0.35672514619

In [10]:
FList

[0.40350877192982454]

In [11]:
clfThresList

[13]

In [12]:
XYTrn.shape

(240, 68)

In [13]:
i = 1

if i == 1: # build the 2nd stage
    FPrev = FList[0]
    F = FPrev
    print("Build stage 2:")

    while F > FPrev*f and nFeats < cntFeats:
        nFeats = nFeats + 1
        print('nFeats:', nFeats)
        
        ######################################################################################
        #       build strong classifier with only first nFeats features in train set 
        ######################################################################################
        
        mdlpath = './fd_model_stage'+str(i)+'/'
        
        XYTrnNFeat = XYTrnUpdateWithTopNFeats(XYTrn, featList, nFeats)# XYTrn is updated

#       build stage with features selected
        buildStrongClfDefThres(XYTrnNFeat, T, mdlpath)
    
#       evaluate cascaded classifier on validation set to determine F and D
        yRes = loadStrongClfAdjThres(XYTest, T, mdlpath, clfThresList[0])
        
        print("clfThres: ", clfThres)
        prec_pos, D, f1_pos, TPR, F, Specificity, MCC, CKappa, w_acc, cm = calc_cm_rcall(XYTest[:,-1], yRes)
        print("\nrecall_pos: ",D)
        print("false positive rate: ", F)
        
        if D > d and F < FPrev*f:
            clfThresList.append(clfThres)
            FList.append(F)
            
        elif D < d:
            thres = int(clfThres)
            
            while thres > 2 and D < d:
                
                thres = thres - 2
                yRes = loadStrongClfAdjThres(XYTrnNFeat, T, mdlpath , thres)
                print("thres: ", thres)

                prec_pos, D, f1_pos, TPR, F, Specificity, MCC, CKappa, w_acc, cm = calc_cm_rcall(XYTrnNFeat[:,-1], yRes)

                print("recall_pos: ",D)
                print("false positive rate: ", F)
                

            if D > d:
                if F < FPrev*f:
                    clfThresList.append(thres)
                    FList.append(F)
                    print('Succeed!', '\n')
                else:
                    print('Fail: F cannot be less than f when D is greater than or equal to d', '\n')
            else:
                print('Fail, D cannot be greater than or equal to d', '\n')
            
            

        elif F > FPrev*f:
                print('Specific case pursued came out: F > f !!! need to tune F, D, f, d', '\n')
                
#       prediction for 
        
#       evalutation



#     update sample set
#     XYTrn = XYTrnUpdate(XYTrn, yRes, featList, nFeats, thres)



Build stage 2:
nFeats: 3


  yPred = h.predict(X[:,feat].reshape(-1, 1))
  precision_all = sum(cm[i,i]/sum(cm[j,i] for j in range(len(set(y_test)))) for i in range(len(set(y_test))))/(len(set(y_test)))
  fscore_all = sum(2*(cm[i,i]/sum(cm[i,j] for j in range(len(set(y_test)))))*(cm[i,i]/sum(cm[j,i] for j in range(len(set(y_test)))))/(cm[i,i]/sum(cm[i,j] for j in range(len(set(y_test))))+cm[i,i]/sum(cm[j,i] for j in range(len(set(y_test))))) for i in range(len(set(y_test))))/len(set(y_test))
  mcc = cov_ytyp / np.sqrt(var_yt * var_yp)


clfThres:  33.393064684

recall_pos:  1.0
false positive rate:  1.0
Specific case pursued came out: F > f !!! need to tune F, D, f, d 

nFeats: 4
clfThres:  33.393064684

recall_pos:  0.980049875312
false positive rate:  0.613466334165
Specific case pursued came out: F > f !!! need to tune F, D, f, d 

nFeats: 5
clfThres:  33.393064684

recall_pos:  0.972568578554
false positive rate:  0.521197007481
Specific case pursued came out: F > f !!! need to tune F, D, f, d 

nFeats: 6
clfThres:  33.393064684

recall_pos:  0.972568578554
false positive rate:  0.608478802993
Specific case pursued came out: F > f !!! need to tune F, D, f, d 

nFeats: 7
clfThres:  33.393064684

recall_pos:  0.972568578554
false positive rate:  0.608478802993
Specific case pursued came out: F > f !!! need to tune F, D, f, d 

nFeats: 8
clfThres:  33.393064684

recall_pos:  0.972568578554
false positive rate:  0.608478802993
Specific case pursued came out: F > f !!! need to tune F, D, f, d 

nFeats: 9
clfThres:  33.

  prec_pos = TP/(TP + FP)


clfThres:  33.393064684

recall_pos:  0.0673316708229
false positive rate:  0.0324189526185
thres:  31
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  29
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  27
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  25
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  23
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  21
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  19
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  17
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  15
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  13
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  11
recall_pos:  0.941520467836
false positive rate:  0.507246376812
thres:  9
recall_pos:  0.941520467836
false positive rate:  0.5072463768

In [25]:
file = '/Volumes/SHIBO/BeYourself/BeYourself/CLEAN/P120/wrist_readable/Accelerometer/09-05-17_13.csv'

tmpdf =pd.read_csv(file)
tmpdf

Unnamed: 0,Time,accX,accY,accZ,Datetime
0,1504634406910,-1.157806,-12.484421,-6.967178,2017-09-05 18:00:06.910
1,1504634406957,-3.510559,-9.498947,-8.433960,2017-09-05 18:00:06.957
2,1504634407012,-4.220856,-9.648910,-7.786499,2017-09-05 18:00:07.012
3,1504634407058,-1.459625,-10.493469,-3.431381,2017-09-05 18:00:07.058
4,1504634407105,-0.395126,-11.169022,-0.991501,2017-09-05 18:00:07.105
5,1504634407160,-0.820267,-11.550827,-4.692978,2017-09-05 18:00:07.160
6,1504634407206,0.203766,-6.447311,-2.559692,2017-09-05 18:00:07.206
7,1504634407262,1.296844,-11.927887,-2.766296,2017-09-05 18:00:07.262
8,1504634407308,2.908356,-17.379410,-1.362839,2017-09-05 18:00:07.308
9,1504634407363,6.289429,-13.386093,3.241272,2017-09-05 18:00:07.363


In [26]:
aa = tmpdf['Datetime'].values

In [27]:
aa

array(['2017-09-05 18:00:06.910', '2017-09-05 18:00:06.957',
       '2017-09-05 18:00:07.012', ..., '2017-09-05 19:00:04.590',
       '2017-09-05 19:00:04.645', '2017-09-05 19:00:04.691'], dtype=object)

In [29]:
len(set(aa))

89000