#  Intro to Hardware Security and Trust Final Project

## Import Libraries

In [17]:
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.cluster import KMeans,DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import random
import os
from statistics import mean


## Dataset Splitting Function

In [18]:
xlsx_path = 'C:\\Users\patri\\git\\HSTFinalProject\\Classifier-for-Hardware-Trojan-Detection\\ROFreq\\ROFreq\\Chip'
csv_path ='C:\\Users\patri\\git\HSTFinalProject\Classifier-for-Hardware-Trojan-Detection\ROFreq_C\Chip'

for i in np.arange(1,34):
    data_xls = pd.read_excel('{}{}.xlsx'.format(xlsx_path,i), dtype=str, index_col=None)
    data_xls.to_csv('{}{}.csv'.format(csv_path,i), encoding='utf-8', index=False)

In [19]:
def csv_read(chip_num,type,csv_path):
    data= []
    data_list= []
    label_list = np.zeros(25)
    label_list[0] = 1
    label_list[-1] = 1
    with open('{}{}.csv'.format(csv_path,chip_num)) as csvfile:
        csv_reader = csv.reader(csvfile)
   
        for row in csv_reader:
            # rounding
            row = [round(float(num[0:12]),3) for num in row]     
            data_list.append(row)   

        if type == 'TI':
            data.append(data_list[0])
            data.append(data_list[-1])
            label = [1,1]
        elif type == 'TF':
            data = data_list[1:-1]
            label = label_list[1:-1]
        else:
            data = data_list
            label = label_list 
                  
        
    return data,label

In [20]:
def train_test_split(num_sel,case_sel,csv_path):
    chips_num = np.arange(1,34)
    indices = range(33)
    # randomly select #num chips
    sel = random.sample(indices,num_sel)
    # split tain and test data
    train_chips = chips_num[sel]
    test_chips = np.delete(chips_num,sel)
    print('Training chips num: ',train_chips)
    temp = []

    for i in range(int(num_sel/2)):
        temp.extend(['TF','TI'])
 
    type_sel_1 = random.sample(temp,num_sel)
    type_sel_3 = random.choices(['TI','TF'],k=num_sel)
    
    if case_sel == 1:
        print('Type selection:', type_sel_1)
        print('TI:',type_sel_1.count('TI'))
        print('TF:',type_sel_1.count('TF'))

    else:
        print('Type selection:', type_sel_3)
        print('TI:',type_sel_3.count('TI'))
        print('TF:',type_sel_3.count('TF'))

    cnt = 0
    train_data = []
    train_label = []

    for chip_num in train_chips:
        if case_sel == 1:
            data,label = csv_read(chip_num,type_sel_1[cnt],csv_path)
        elif case_sel ==3:
            data,label = csv_read(chip_num,type_sel_3[cnt],csv_path)
        train_data.extend(data)
        train_label.extend(label)
        cnt += 1

    test_data = []
    test_label = []
    for chip_num in test_chips:
        data,label = csv_read(chip_num,'ALL',csv_path)
        test_data.extend(data)
        test_label.extend(label)
    
    return train_data, train_label, test_data, test_label

##  CASE 1

### Case 1 Datasets

In [21]:
# arg1 => number of training samples
# arg2 => number of case (enter 1 or 3 only)
train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)

Training chips num:  [17 19 29  2 22 14 31  9  8 16 28  4 10 23 24 32 30 18 15 13  5  1 12 26]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF']
TI: 12
TF: 12
Training chips num:  [26 20 25 29 21 30 24 17 31  8 10 32]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF']
TI: 6
TF: 6
Training chips num:  [ 2 15 14  4 19 26]
Type selection: ['TI', 'TF', 'TI', 'TF', 'TI', 'TF']
TI: 3
TF: 3


In [22]:
pd.DataFrame(train24_data)

Unnamed: 0,0,1,2,3,4,5,6,7
0,184.4,194.8,214.4,224.8,215.6,220.4,212.0,242.4
1,178.0,188.4,207.6,217.6,208.8,213.2,205.2,234.4
2,174.4,184.4,203.2,213.2,204.0,208.4,200.8,229.2
3,170.8,181.2,199.2,208.8,200.4,204.4,196.8,224.4
4,167.2,177.2,194.8,204.0,196.0,200.0,192.8,220.0
...,...,...,...,...,...,...,...,...
295,157.6,168.8,184.0,194.0,185.6,188.0,182.0,208.0
296,151.2,162.0,176.4,186.0,178.0,180.4,174.4,199.6
297,145.2,156.0,170.0,179.2,171.6,173.6,167.6,192.0
298,139.6,149.6,163.2,172.0,164.4,166.8,161.6,184.8


In [23]:
# 24 Training - Create Dataframe objects and compute correlations
train24_data_df = pd.DataFrame(train24_data)
train24_label_s = pd.Series(train24_label)
train24_data_df.corrwith(train24_label_s).sort_values(ascending=False)

1    0.289779
0    0.285115
7    0.284896
3    0.282454
4    0.280606
6    0.276865
5    0.276520
2    0.273438
dtype: float64

In [8]:
# 12 Training - Create Dataframe objects and compute correlations
train12_data_df = pd.DataFrame(train12_data)
train12_label_s = pd.Series(train12_label)
train12_data_df.corrwith(train12_label_s).sort_values(ascending=False)

1    0.494850
4    0.490975
0    0.483849
3    0.479704
7    0.478776
5    0.468136
2    0.467527
6    0.461397
dtype: float64

In [9]:
# 6 Training - Create Dataframe objects and compute correlations
train6_data_df = pd.DataFrame(train6_data)
train6_label_s = pd.Series(train6_label)
train6_data_df.corrwith(train6_label_s).sort_values(ascending=False)

6    0.646390
5    0.643603
2    0.643085
3    0.641045
0    0.631108
4    0.626977
7    0.626884
1    0.619144
dtype: float64

Correlation between features seems to be about equal for predictions with training and test splits

### Tuning KNeighbors Classifier

In [24]:
# Creating KNeighborsClassifier Pipeline
KneighC = Pipeline([('scaler',StandardScaler()),('knc', KNeighborsClassifier())])

# Finding the best parameters for KNeighborsClassifier

## Create parameter grid for gridsearch algorithm
knc_param_Grid=  { 'knc__n_neighbors': np.arange(2,10,1),
                  'knc__leaf_size': np.arange(2,30,2),
                 }

## Create Grid search
knc_grid_search = GridSearchCV(KneighC,
                               param_grid=knc_param_Grid,
                               scoring='accuracy',
                               refit=True,
                               cv=5,
                               verbose=1)

ii=0
### Fit to training data
KNC_trainscores=[]
KNC_testscores=[]

tpr_train_scores = []
fpr_train_scores = []
tpr_scores = []
fpr_scores = []

while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)
    
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    

    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1
    
    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        knc_grid_search.fit(i,j)
        ### Print Best parameters
        print("\n",knc_grid_search.best_params_)
        ## Saving the tuned model
        model = knc_grid_search.best_estimator_
        print("Best Accuracy: ",knc_grid_search.best_score_)
        KNC_trainscores+= [knc_grid_search.best_score_]
        y_pred = model.predict(m)
        KNC_testscores += [accuracy_score(n,y_pred)]
        
        # Calculate Average TPR and Average FPR for training set
        tn, fp, fn, tp = confusion_matrix(j, model.predict(i)).ravel()
        tpr_train = tp/(tp+fn)
        fpr_train = fp/(fp+tn)

        # Create list of TPR and FPR for train
        tpr_train_scores += [tpr_train]
        fpr_train_scores += [fpr_train]

        # Calculate Average TPR and Average FPR for test set
        tn, fp, fn, tp = confusion_matrix(n, y_pred).ravel()
        tpr_test = tp/(tp+fn)
        fpr_test = fp/(fp+tn)
        
        # Create list of TPR and FPR
        tpr_scores += [tpr_test]
        fpr_scores += [fpr_test]

Training chips num:  [24 33  3  6 32 28  1 13 21 25 29  9 19 31 22  8 12 16 27 20 17  5  7 14]
Type selection: ['TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI']
TI: 12
TF: 12
Training chips num:  [27  4 13 21 20  3 18  5 31  8 32 14]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF']
TI: 6
TF: 6
Training chips num:  [17 26 31  8  1 19]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_size': 2, 'knc__n_neighbors': 8}
Best Accuracy:  0.9266666666666665

 -----12 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_size': 2, 'knc__n_neighbors': 3}
Best Accuracy:  0.9266666666666665

 -----6 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits



### Results for KneighborsClassifier

In [27]:
# ----- Accuracy scores for test and training sets -----
# Slicing training scores into individual lists
KNC_24_train_scores = KNC_trainscores[0::3]
KNC_12_train_scores= KNC_trainscores[1::3]
KNC_6_train_scores = KNC_trainscores[2::3]

# Slicing test scores into individual lists
KNC_24_test_scores = KNC_testscores[0::3]
KNC_12_test_scores = KNC_testscores[1::3]
KNC_6_test_scores = KNC_testscores[2::3]

print("---------- Kneighbors Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(KNC_24_train_scores)
test24_avg = mean(KNC_24_test_scores)
print("TRAIN24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(KNC_12_train_scores)
test12_avg = mean(KNC_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(KNC_6_train_scores)
test6_avg = mean(KNC_6_test_scores)
print("TRAIN6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# ----- Average TPR and Average FPR -----
# TRAINING SET
KNC_24train_TPR = tpr_train_scores[0::3]
KNC_12train_TPR = tpr_train_scores[1::3]
KNC_6train_TPR = tpr_train_scores[2::3]
KNC_24train_FPR = fpr_train_scores[0::3]
KNC_12train_FPR = fpr_train_scores[1::3]
KNC_6train_FPR = fpr_train_scores[2::3]


# TEST SET
KNC_24test_TPR = tpr_scores[0::3]
KNC_12test_TPR = tpr_scores[1::3]
KNC_6test_TPR = tpr_scores[2::3]
KNC_24test_FPR = fpr_scores[0::3]
KNC_12test_FPR = fpr_scores[1::3]
KNC_6test_FPR = fpr_scores[2::3]

print("\n","---------- KNC Average TPR----------")
TPR_train24 = mean(KNC_24train_TPR)
TPR_test24 = mean(KNC_24test_TPR)
print("TRAIN24 Average TPR: ", TPR_train24)
print("Test24 Average TPR: " , TPR_test24)
TPR_train12 = mean(KNC_12train_TPR)
TPR_test12 = mean(KNC_12test_TPR)
print("Train12 Average TPR: ", TPR_train12)
print("Test12 Average TPR: " , TPR_test12)
TPR_train6 = mean(KNC_6train_TPR)
TPR_test6 = mean(KNC_6test_TPR)
print("TRAIN6 Average TPR: ", TPR_train6)
print("Test6 Average TPR: " , TPR_test6)

print("\n","---------- KNC Average FPR----------")
FPR_train24 = mean(KNC_24train_FPR)
FPR_test24 = mean(KNC_24test_FPR)
print("TRAIN24 Average FPR: ", FPR_train24)
print("Test24 Average FPR: " , FPR_test24)
FPR_train12 = mean(KNC_12train_FPR)
FPR_test12 = mean(KNC_12test_FPR)
print("Train12 Average FPR: ", FPR_train12)
print("Test12 Average FPR: " , FPR_test12)
FPR_train6 = mean(KNC_6train_FPR)
FPR_test6 = mean(KNC_6test_FPR)
print("TRAIN6 Average FPR: ", FPR_train6)
print("Test6 Average FPR: " , FPR_test6)


---------- Kneighbors Accuracy Scores----------
TRAIN24 Average Accuracy:  0.937
Test24 Average Accruacy:  0.9271111111111111
Train12 Average Accuracy:  0.9513333333333334
Test12 Average Accruacy:  0.9191428571428572
TRAIN6 Average Accuracy:  0.9733333333333334
Test6 Average Accruacy:  0.9087407407407407

 ---------- KNC Average TPR----------
TRAIN24 Average TPR:  0.775
Test24 Average TPR:  0.5583333333333333
Train12 Average TPR:  0.7333333333333333
Test12 Average TPR:  0.5142857142857142
TRAIN6 Average TPR:  0.8416666666666667
Test6 Average TPR:  0.7101851851851851

 ---------- KNC Average FPR----------
TRAIN24 Average FPR:  0.0125
Test24 Average FPR:  0.04082125603864734
Train12 Average FPR:  0.004347826086956522
Test12 Average FPR:  0.04565217391304348
TRAIN6 Average FPR:  0.0014492753623188406
Test6 Average FPR:  0.07399355877616746


### Tuning Logistic Regression

In [37]:
# Create Logisitic Regression Model
Logreg = Pipeline([('scaler',StandardScaler()),('logreg',LogisticRegression(random_state=42))])
Logreg

# Finding the best parameters for Logistic Regression

## Create Parameter Grid
logreg_param_grid = {'logreg__C': [0.001,0.01,0.1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],
                     'logreg__tol': [0.0001, 0.001, 0.01]
                    }

## Create Log Reg Gridsearch
logreg_gridsearch = GridSearchCV(Logreg,
                                 param_grid=logreg_param_grid,
                                 scoring='accuracy',
                                 refit=True,
                                 cv=5,
                                 verbose=1)

### Fit to training data
logreg_trainscores=[]
logreg_testscores=[]

# tpr and fpr scores
tpr_scores = []
fpr_scores = []

tpr_train_scores = []
fpr_train_scores = []
tpr_test_scores = []
fpr_test_scores = []

ii=0
while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)
    
    # Create pandas dataframes and series
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    
    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1
    
  

    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        logreg_gridsearch.fit(i,j)
        ### Print Best parameters
        print("\n",logreg_gridsearch.best_params_)
        ## Saving the tuned model
        model = logreg_gridsearch.best_estimator_
        print("Best Accuracy: ",logreg_gridsearch.best_score_)
       
        logreg_trainscores+= [logreg_gridsearch.best_score_]
       
        y_pred = model.predict(m)
        logreg_testscores += [accuracy_score(n,y_pred)]

        # Calculate TPR and FPR on training set
        tp, fn, fp, tn = confusion_matrix(j,model.predict(i)).ravel()
        fpr = fp/(fp+tn)
        tpr = tp/(tp+fn)
        
        tpr_train_scores += [tpr]
        fpr_train_scores += [fpr]
        
        # Calculate FPR and TPR on test set
        tp, fn, fp, tn = confusion_matrix(n,y_pred).ravel()
        fpr_test = fp/(fp+tn)
        tpr_test = tp/(tp+fn)

        tpr_test_scores += [tpr_test]
        fpr_test_scores += [fpr_test]

Training chips num:  [27 33  8 25 23  2 13 12  3 20  7 31  6 10 26 22 14 32 24 28  5 19 17 11]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF']
TI: 12
TF: 12
Training chips num:  [20  9 25 28 13 24 18 33 26  8  3 19]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF']
TI: 6
TF: 6
Training chips num:  [ 5 20 29  3  6 18]
Type selection: ['TI', 'TF', 'TI', 'TI', 'TF', 'TF']
TI: 3
TF: 3

 -----24 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logreg__C': 0.01, 'logreg__tol': 0.0001}
Best Accuracy:  0.9333333333333332

 -----12 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logreg__C': 0.001, 'logreg__tol': 0.0001}
Best Accuracy:  0.9199999999999999

 -----6 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'log

### Log Reg Results

In [47]:
# Slicing training scores into individual lists
logreg_24_train_scores = logreg_trainscores[0::3]
logreg_12_train_scores= logreg_trainscores[1::3]
logreg_6_train_scores = logreg_trainscores[2::3]

# Slicing test scores into individual lists
logreg_24_test_scores = logreg_testscores[0::3]
logreg_12_test_scores = logreg_testscores[1::3]
logreg_6_test_scores = logreg_testscores[2::3]

print("---------- Logistic Regression Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(logreg_24_train_scores)
test24_avg = mean(logreg_24_test_scores)
print("TRAIN24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(logreg_12_train_scores)
test12_avg = mean(logreg_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(logreg_6_train_scores)
test6_avg = mean(logreg_6_test_scores)
print("TRAIN6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# Slicing fpr and tpr scores for each set
logreg_24_fpr_train = fpr_train_scores[0::3]
logreg_24_tpr_train = tpr_train_scores[0::3]
logreg_12_fpr_train = fpr_train_scores[1::3]
logreg_12_tpr_train = tpr_train_scores[1::3]
logreg_6_fpr_train = fpr_train_scores[2::3]
logreg_6_tpr_train = tpr_train_scores[2::3]

logreg_24_fpr_test = fpr_test_scores[0::3]
logreg_24_tpr_test = tpr_test_scores[0::3]
logreg_12_fpr_test = fpr_test_scores[1::3]
logreg_12_tpr_test = tpr_test_scores[1::3]
logreg_6_fpr_test = fpr_test_scores[2::3]
logreg_6_tpr_test = tpr_test_scores[2::3]


# Printing Mean score for each set

print("---------- Logistic Regression Mean FPR and TPR Scores----------")

# Mean FPR and TPR scores for training sets
print("Mean FPR for 24 Sample Training Set: ", mean(logreg_24_fpr_train))
print("Mean TPR for 24 Sample Training Set: ", mean(logreg_24_tpr_train))
print("Mean FPR for 12 Sample Test Set: ", mean(logreg_12_fpr_test))
print("Mean TPR for 12 Sample Test Set: ", mean(logreg_12_tpr_test))

print("Mean FPR for 12 Sample Training Set: ", mean(logreg_12_fpr_train))
print("Mean TPR for 12 Sample Training Set: ", mean(logreg_12_tpr_train))
print("Mean FPR for 12 Sample Test Set: ", mean(logreg_12_fpr_test))
print("Mean TPR for 12 Sample Test Set: ", mean(logreg_12_tpr_test))

print("Mean FPR for 6 Sample Training Set: ", mean(logreg_6_fpr_train))
print("Mean TPR for 6 Sample Training Set: ", mean(logreg_6_tpr_train))
print("Mean FPR for 6 Sample Test Set: ", mean(logreg_6_fpr_test))
print("Mean TPR for 6 Sample Test Set: ", mean(logreg_6_tpr_test))




---------- Logistic Regression Accuracy Scores----------
TRAIN24 Average Accuracy:  0.9249999999999999
Test24 Average Accruacy:  0.9222222222222223
Train12 Average Accuracy:  0.9299999999999999
Test12 Average Accruacy:  0.9133333333333333
TRAIN6 Average Accuracy:  0.9466666666666669
Test6 Average Accruacy:  0.914074074074074
---------- Logistic Regression Mean FPR and TPR Scores----------
Mean FPR for 24 Sample Training Set:  0.7952862986686516
Mean TPR for 24 Sample Training Set:  0.9733918367438188
Mean FPR for 12 Sample Test Set:  0.7714285714285715
Mean TPR for 12 Sample Test Set:  0.9766045548654244
Mean FPR for 12 Sample Training Set:  0.8193452380952381
Mean TPR for 12 Sample Training Set:  0.9819073498964803
Mean FPR for 12 Sample Test Set:  0.7714285714285715
Mean TPR for 12 Sample Test Set:  0.9766045548654244
Mean FPR for 6 Sample Training Set:  nan
Mean TPR for 6 Sample Training Set:  0.9835507246376811
Mean FPR for 6 Sample Test Set:  0.7981481481481482
Mean TPR for 6 Samp

## CASE 3

### Case 3 Datasets

In [43]:
# Kmeans pipeline
Kmeans = Pipeline([('scaler',StandardScaler()),('kmeans',KMeans(random_state=42))])
Kmeans

### KMeans

In [46]:
# Create KMeans model
kmeans = Pipeline([('scaler', StandardScaler()),('kmeans',KMeans())])


# Finding the best parameters for Logistic Regression

## Create parameter grid for kmeans gridsearch
kmeans_param_grid = {'kmeans__n_clusters': [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}

## Create Kmeans gridsearch
kmeans_gridsearch = GridSearchCV(kmeans,kmeans_param_grid,cv=5,return_train_score=True)

### kmeans accuracy score lists
kmeans_trainscores=[]
kmeans_testscores=[]

# tpr and fpr scores
tpr_scores = []
fpr_scores = []

tpr_train_scores = []
fpr_train_scores = []
tpr_test_scores = []
fpr_test_scores = []

ii=0
while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,3,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,3,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,3,csv_path)
    
    # Create pandas dataframes and series
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    
    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1

    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        # fit the gridsearch
        kmeans_gridsearch.fit(i,j)

        ### Print Best parameters
        print("Best Parameters: ",kmeans_gridsearch.best_params_)      

        # Create model with best parameters
        model = logreg_gridsearch.best_estimator_

        # Accuracy metric for KMeans
        print("Accuracy Score: ",kmeans_gridsearch.best_score_)
       
        # Append best accuracy to list
        kmeans_trainscores += [kmeans_gridsearch.best_score_]
       
        # Predict on test set
        y_pred = model.predict(m)

        # Calculate accuracy score and append to kmeans_testscores
        kmeans_testscores += [accuracy_score(n,y_pred)]
        
        # Calculate TPR and FPR on training set
        tp, fn, fp, tn = confusion_matrix(j,model.predict(i)).ravel()
        fpr = fp/(fp+tn)
        tpr = tp/(tp+fn)
        
        tpr_train_scores += [tpr]
        fpr_train_scores += [fpr]
        
        # Calculate FPR and TPR on test set
        tp, fn, fp, tn = confusion_matrix(n,y_pred).ravel()
        fpr_test = fp/(fp+tn)
        tpr_test = tp/(tp+fn)

        tpr_test_scores += [tpr_test]
        fpr_test_scores += [fpr_test]

Training chips num:  [ 9 21  1  6 14 16 15 33 19 27 18 32 24 26 28  3 29 23 22  5 17 25 30 11]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF']
TI: 12
TF: 12
Training chips num:  [ 9 11 28 29 32 26 17 33  1 19 10 31]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI']
TI: 7
TF: 5
Training chips num:  [25 15  2 21 17 12]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TF', 'TF']
TI: 2
TF: 4

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -3.365567723232734

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -5.7693372858191845

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -1.7319681141278351
Training chips num:  [ 7  2 19 31  5 33  1 26 20 11  3 28 32 12 13 14 30 27 25  8 21 18 22  4]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI']
TI: 9
TF: 15
Training chips num:  [18 21  6 25 29 12 31 11  9 19  3 26]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF']
TI: 6
TF: 6
Training chips num:  [31 12 30 13 19 24]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TI']
TI: 4
TF: 2

 -----24 Sample Training Set----- 

Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -9.351398689153111

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -9.858927449232528

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -34.44804198077337
Training chips num:  [19  3 22  6 33  7 14  9  5 27 32  8 11 17 20 25 30  2 31 26 15 21 13  4]
Type selection: ['TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF']
TI: 17
TF: 7
Training chips num:  [10 16 26  8  9  1 12 30  5 24 23 14]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI']
TI: 6
TF: 6
Training chips num:  [ 3  6 32 31  9 28]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TI', 'TI']
TI: 4
TF: 2

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -4.795384986646134

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.1398817605937612

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -1.8008051281269104
Training chips num:  [ 7 16 29 27  6  8 30 21 23  3 14 18 12 22 31  2  9 19 28 33 24 15 26 11]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI']
TI: 12
TF: 12
Training chips num:  [ 5  9 25 22  3 11 15  2 27 29 33 13]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF']
TI: 5
TF: 7
Training chips num:  [15 17  3 12 19  9]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TF', 'TF']
TI: 0
TF: 6

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -3.953951908835832

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -5.459557711093823

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.5292860487764246
Training chips num:  [33 31  9  1 18 26  6  7 25 20 28 32 22 24 17 19 15 11 23 27 21 16  3  4]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF']
TI: 12
TF: 12
Training chips num:  [27 10  8  7  2 25 28 26 13 12 15  6]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF']
TI: 6
TF: 6
Training chips num:  [19  8  6 22 12 14]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TF', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 



  fpr = fp/(fp+tn)


Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -4.193290832288253

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -5.626423485951511

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -6.926069118212557
Training chips num:  [33 19  2  9 30 10 15  7  1 18 17 31  4 26  5 25  6 29 27 23 11 28 24 21]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI']
TI: 15
TF: 9
Training chips num:  [16 31  2 14  8 22 24  6 29 27 28 32]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF']
TI: 5
TF: 7
Training chips num:  [25  3  1 26 32 23]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TI', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -3.718727958558494

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -2.8086438098982667

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -1.2362197720882926
Training chips num:  [12  2 21 11 25 14 20  4 17 29  3 22 10  8 27 31 15 19  9 28 33 24 13 26]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF']
TI: 12
TF: 12
Training chips num:  [31 11  8 20 30 23 19  6 26  1 29 27]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF']
TI: 5
TF: 7
Training chips num:  [30 27 31 15 25  6]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TI']
TI: 4
TF: 2

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -6.326382515561758

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.3653437240288033

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -1.9987218968447973
Training chips num:  [31  2 14 23  8 10  5  7 28 11 15 18 29 12 21 20 33 26 17 16  1 27 13  3]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF']
TI: 11
TF: 13
Training chips num:  [32 22  1  8 18 20  4 28 12 24  5 14]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 4
TF: 8
Training chips num:  [21 13 14 23 31 26]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TF', 'TF']
TI: 2
TF: 4

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -9.344346843268571

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -3.3917923349738515

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -5.451294136126494
Training chips num:  [17  4 13 21  3 12 30 22 29  1 33  6 32 23 18 20 15 16 26 19  8  5 10  2]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI']
TI: 10
TF: 14
Training chips num:  [15 26 14  1  3  7  6  2 11 17 25 18]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF']
TI: 5
TF: 7
Training chips num:  [14 26 27  8  9 16]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TF', 'TF']
TI: 1
TF: 5

 -----24 Sample Training Set----- 

Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -7.311576196347339

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -4.261678029570677

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -4.032617060526912
Training chips num:  [20 24 21 26 32 28 15 16 13  4 17 23 22 12 30  1 31  7  3 18 10  6  2 27]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF']
TI: 7
TF: 17
Training chips num:  [ 6  3 13 32 17 33 27 25 23 22  2 21]
Type selection: ['TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF']
TI: 8
TF: 4
Training chips num:  [14 15 19  9 25 22]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 4
TF: 2

 -----24 Sample Training Set----- 

Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -16.674817636281144

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -4.416636232568598

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -7.488960858718559
Training chips num:  [ 8 24  7 32 17 31 11 10 16  9 30 26 14 15  1 22 27 20  6 29 18  4 28 13]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI']
TI: 11
TF: 13
Training chips num:  [17 10  5 26 16 19 15 11 27 20 25  3]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF']
TI: 5
TF: 7
Training chips num:  [ 2 18 16 24 30 20]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TI']
TI: 4
TF: 2

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -7.984586744613081

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.9942935063571143

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -5.12356433253157
Training chips num:  [ 7 19 31  6  4 17 32 22  8 21 26 27 29  5 28 25 12  3 24  2 11 30 10 18]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF']
TI: 11
TF: 13
Training chips num:  [10  1 22 27  4 25  5  9 18 21 11 29]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF']
TI: 7
TF: 5
Training chips num:  [14 28 27 29  6  1]
Type selection: ['TI', 'TI', 'TF', 'TI', 'TI', 'TI']
TI: 5
TF: 1

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 18}
Accuracy Score:  -7.4815095483842

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -1.6253242123369405

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -7.740317278541582
Training chips num:  [ 3 11 16  8 32 29 27 20 15  4 18 13 33 19 30 23 12  9  2  7 22 24 10  6]
Type selection: ['TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF']
TI: 8
TF: 16
Training chips num:  [12 10 15 33 24 29 31  1 22 20 16 14]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI']
TI: 8
TF: 4
Training chips num:  [24  2 28  6  4 29]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TI', 'TF']
TI: 4
TF: 2

 -----24 Sample Training Set----- 

Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -10.406639983547356

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -3.1790632440591233

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -3.175129779760642
Training chips num:  [ 4 13 26 10  6  7 18 16 24 28 33 31 27 14 15 17 25  9 30 22 21 23  1 20]
Type selection: ['TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI']
TI: 13
TF: 11
Training chips num:  [ 3 20  9 24 26 12 14 33  1 19 30  2]
Type selection: ['TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI']
TI: 7
TF: 5
Training chips num:  [ 9 10  2 11 24 13]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TF', 'TI']
TI: 2
TF: 4

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -7.536282652217402

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -1.4174127597793231

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -16.138081338809158
Training chips num:  [10 18 19 31 16 12 17 15 27 22 32  9 26 11  5 33  6  3 28 20 24 29  7  8]
Type selection: ['TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF']
TI: 12
TF: 12
Training chips num:  [18  3  6 27 14  7 24  8 21  4 31 26]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI']
TI: 5
TF: 7
Training chips num:  [ 8  4  7  6 17 32]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TI', 'TF']
TI: 2
TF: 4

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -6.013048587636529

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 18}
Accuracy Score:  -2.7107783808981565

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -2.3202915916809124
Training chips num:  [24 31 30 13  4 15 23 12  5  1 11 27 29 21 25  9  8 28 33 16  6 19 26  2]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI']
TI: 12
TF: 12
Training chips num:  [ 8 18 28 11 21 16 26 29 14 17  2  1]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI']
TI: 5
TF: 7
Training chips num:  [15 17 27 16 30 23]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TF', 'TF']
TI: 2
TF: 4

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -6.466649984514167

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -3.165782464286643

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.8749564527599025
Training chips num:  [ 8 17 25  7 14  1 32 21 13 20 33 26 11 22 29 27 28 24 12  5 31 23 10 16]
Type selection: ['TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF']
TI: 16
TF: 8
Training chips num:  [ 7 18 31  1 26 15 27 12  3 29  5 23]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI']
TI: 7
TF: 5
Training chips num:  [ 7 15 23  2 17 19]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TI', 'TF']
TI: 2
TF: 4

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -4.854941067830518

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -1.6408768141879384

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -5.153202073658823
Training chips num:  [ 8 31 15 14 12  7 29 30 10 26 32 16 24  6 11 33  3  4  5 25 21 27 13  9]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF']
TI: 16
TF: 8
Training chips num:  [ 7 26  2 16 19  5 14 12 33 11 24 28]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF']
TI: 3
TF: 9
Training chips num:  [23 27 24 22 10 13]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -4.548828132144659

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.8682487712589526

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -9.015517244164746
Training chips num:  [ 8  5 13  2 15 31 25 20 32 10 18 27 28 29  7 33 24 30  9  3 11 12 22 14]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF']
TI: 9
TF: 15
Training chips num:  [10 12  5 33 22 19 26 25 16 24 30  3]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF']
TI: 3
TF: 9
Training chips num:  [ 2 13 16 24  5  8]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TF', 'TF']
TI: 1
TF: 5

 -----24 Sample Training Set----- 

Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -9.114389519937545

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.7287367436834877

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 19}
Accuracy Score:  -7.966660385996766
Training chips num:  [13 24 25 19 23 31 20  5 33  6 16  4 12 15 27  7 32 17 11 10  1  3 28 29]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF']
TI: 11
TF: 13
Training chips num:  [22  5 14 31 11 24  8 29 21  1 16  3]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF']
TI: 5
TF: 7
Training chips num:  [ 2 29  5 30 15  1]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TI', 'TF']
TI: 4
TF: 2

 -----24 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -7.093851399647666

 -----12 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -5.016771661756767

 -----6 Sample Training Set----- 





Best Parameters:  {'kmeans__n_clusters': 20}
Accuracy Score:  -2.1591866976931753


In [48]:
# Slicing training scores into individual lists
kmeans_24_train_scores = kmeans_trainscores[0::3]
kmeans_12_train_scores = kmeans_trainscores[1::3]
kmeans_6_train_scores = kmeans_trainscores[2::3]

# Slicing test scores into individual lists
kmeans_24_test_scores = kmeans_testscores[0::3]
kmeans_12_test_scores = kmeans_testscores[1::3]
kmeans_6_test_scores = kmeans_testscores[2::3]


print("---------- KMEANS Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(kmeans_24_train_scores)
test24_avg = mean(kmeans_24_test_scores)
print("Train24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(kmeans_12_train_scores)
test12_avg = mean(kmeans_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(kmeans_6_train_scores)
test6_avg = mean(kmeans_6_test_scores)
print("Train6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# Slicing fpr and tpr scores for each set
kmeans_24_fpr = fpr_test_scores[0::3]
kmeans_24_tpr = tpr_test_scores[0::3]
kmeans_12_tpr = tpr_test_scores[1::3]
kmeans_12_fpr = fpr_test_scores[1::3]
kmeans_6_tpr = tpr_test_scores[2::3]
kmeans_6_fpr = fpr_test_scores[2::3]


# Printing Mean score for each set

print("---------- KMEANS Mean FPR and TPR Scores----------")

# Mean FPR and TPR scores for kmeans training set
kmeans_24_fpr_train = mean(fpr_train_scores[0::3])
kmeans_24_tpr_train = mean(tpr_train_scores[0::3])
kmeans_12_fpr_train = mean(fpr_train_scores[1::3])
kmeans_12_tpr_train = mean(tpr_train_scores[1::3])
kmeans_6_fpr_train = mean(fpr_train_scores[2::3])
kmeans_6_tpr_train = mean(tpr_train_scores[2::3])

# Mean FPR and TPR scores for kmeans test set
kmeans_24_fpr_test = mean(fpr_test_scores[0::3])
kmeans_24_tpr_test = mean(tpr_test_scores[0::3])
kmeans_12_fpr_test = mean(fpr_test_scores[1::3])
kmeans_12_tpr_test = mean(tpr_test_scores[1::3])
kmeans_6_fpr_test = mean(fpr_test_scores[2::3])
kmeans_6_tpr_test = mean(tpr_test_scores[2::3])

# Printing Mean FPR and TPR scores for each set
print("Kmeans 24 Sample Training Set FPR: ", kmeans_24_fpr_train)
print("Kmeans 24 Sample Training Set TPR: ", kmeans_24_tpr_train)
print("Kmeans 24 Sample Test Set FPR: ", kmeans_24_fpr_test)
print("Kmeans 24 Sample Test Set TPR: ", kmeans_24_tpr_test)
print("Kmeans 12 Sample Training Set FPR: ", kmeans_12_fpr_train)
print("Kmeans 12 Sample Training Set TPR: ", kmeans_12_tpr_train)
print("Kmeans 12 Sample Test Set FPR: ", kmeans_12_fpr_test)
print("Kmeans 12 Sample Test Set TPR: ", kmeans_12_tpr_test)
print("Kmeans 6 Sample Training Set FPR: ", kmeans_6_fpr_train)
print("Kmeans 6 Sample Training Set TPR: ", kmeans_6_tpr_train)
print("Kmeans 6 Sample Test Set FPR: ", kmeans_6_fpr_test)
print("Kmeans 6 Sample Test Set TPR: ", kmeans_6_tpr_test)




---------- KMEANS Accuracy Scores----------
Train24 Average Accuracy:  -7.026808645532324
Test24 Average Accruacy:  0.916
Train12 Average Accuracy:  -3.6722755191167726
Test12 Average Accruacy:  0.9167619047619048
Train6 Average Accuracy:  -6.465544564495971
Test6 Average Accruacy:  0.9191111111111111
---------- KMEANS Mean FPR and TPR Scores----------
Kmeans 24 Sample Training Set FPR:  0.7952862986686516
Kmeans 24 Sample Training Set TPR:  0.9733918367438188
Kmeans 24 Sample Test Set FPR:  0.8166666666666667
Kmeans 24 Sample Test Set TPR:  0.9797101449275363
Kmeans 12 Sample Training Set FPR:  0.8193452380952381
Kmeans 12 Sample Training Set TPR:  0.9819073498964803
Kmeans 12 Sample Test Set FPR:  0.7714285714285715
Kmeans 12 Sample Test Set TPR:  0.9766045548654244
Kmeans 6 Sample Training Set FPR:  nan
Kmeans 6 Sample Training Set TPR:  0.9835507246376811
Kmeans 6 Sample Test Set FPR:  0.7981481481481482
Kmeans 6 Sample Test Set TPR:  0.9814814814814815


### DBSCAN

In [None]:
# Create DBSCAN Model
dbscan = Pipeline([('scaler',StandardScaler()),('dbscan', DBSCAN())])
dbscan