#  Intro to Hardware Security and Trust Final Project

## Import Libraries

In [1]:
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.cluster import KMeans,DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix
from sklearn.metrics import roc_curve

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import random
import os
from statistics import mean


## Dataset Splitting Function

In [2]:
xlsx_path = 'C:\\Users\patri\\git\\HSTFinalProject\\Classifier-for-Hardware-Trojan-Detection\\ROFreq\\ROFreq\\Chip'
csv_path ='C:\\Users\patri\\git\HSTFinalProject\Classifier-for-Hardware-Trojan-Detection\ROFreq_C\Chip'

for i in np.arange(1,34):
    data_xls = pd.read_excel('{}{}.xlsx'.format(xlsx_path,i), dtype=str, index_col=None)
    data_xls.to_csv('{}{}.csv'.format(csv_path,i), encoding='utf-8', index=False)

In [3]:
def csv_read(chip_num,type,csv_path):
    data= []
    data_list= []
    label_list = np.zeros(25)
    label_list[0] = 1
    label_list[-1] = 1
    with open('{}{}.csv'.format(csv_path,chip_num)) as csvfile:
        csv_reader = csv.reader(csvfile)
   
        for row in csv_reader:
            # rounding
            row = [round(float(num[0:12]),3) for num in row]     
            data_list.append(row)   

        if type == 'TI':
            data.append(data_list[0])
            data.append(data_list[-1])
            label = [1,1]
        elif type == 'TF':
            data = data_list[1:-1]
            label = label_list[1:-1]
        else:
            data = data_list
            label = label_list 
                  
        
    return data,label

In [4]:
def train_test_split(num_sel,case_sel,csv_path):
    chips_num = np.arange(1,34)
    indices = range(33)
    # randomly select #num chips
    sel = random.sample(indices,num_sel)
    # split tain and test data
    train_chips = chips_num[sel]
    test_chips = np.delete(chips_num,sel)
    print('Training chips num: ',train_chips)
    temp = []

    for i in range(int(num_sel/2)):
        temp.extend(['TF','TI'])
 
    type_sel_1 = random.sample(temp,num_sel)
    type_sel_3 = random.choices(['TI','TF'],k=num_sel)
    
    if case_sel == 1:
        print('Type selection:', type_sel_1)
        print('TI:',type_sel_1.count('TI'))
        print('TF:',type_sel_1.count('TF'))

    else:
        print('Type selection:', type_sel_3)
        print('TI:',type_sel_3.count('TI'))
        print('TF:',type_sel_3.count('TF'))

    cnt = 0
    train_data = []
    train_label = []

    for chip_num in train_chips:
        if case_sel == 1:
            data,label = csv_read(chip_num,type_sel_1[cnt],csv_path)
        elif case_sel ==3:
            data,label = csv_read(chip_num,type_sel_3[cnt],csv_path)
        train_data.extend(data)
        train_label.extend(label)
        cnt += 1

    test_data = []
    test_label = []
    for chip_num in test_chips:
        data,label = csv_read(chip_num,'ALL',csv_path)
        test_data.extend(data)
        test_label.extend(label)
    
    return train_data, train_label, test_data, test_label

##  CASE 1

### Case 1 Datasets

In [5]:
# arg1 => number of training samples
# arg2 => number of case (enter 1 or 3 only)
train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)

Training chips num:  [ 5  9 26  3 10 23  1 20 31  6 13 19 12 33 27 25 11 17  8  2 18  4 14 28]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF']
TI: 12
TF: 12
Training chips num:  [ 3 11  8 18 30 20 33 22  1  2 14 13]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF']
TI: 6
TF: 6
Training chips num:  [ 4  9 20  8 24 19]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TI', 'TF']
TI: 3
TF: 3


In [6]:
pd.DataFrame(train24_data)

Unnamed: 0,0,1,2,3,4,5,6,7
0,180.0,191.2,210.0,222.0,211.2,217.6,208.0,238.0
1,174.4,185.2,203.6,214.8,204.8,210.8,201.6,230.4
2,170.4,181.2,198.8,210.0,200.4,206.0,197.2,225.2
3,167.2,177.6,195.2,205.6,196.8,202.4,193.6,221.2
4,163.6,174.0,190.8,201.6,192.4,198.0,189.2,216.4
...,...,...,...,...,...,...,...,...
295,154.0,164.8,181.2,190.4,182.0,185.2,178.4,204.4
296,147.2,158.0,173.2,182.4,174.8,177.6,170.8,196.0
297,141.6,152.0,166.8,176.0,168.0,171.2,164.4,189.2
298,136.4,146.0,160.8,169.2,161.6,164.8,158.4,182.0


In [7]:
# 24 Training - Create Dataframe objects and compute correlations
train24_data_df = pd.DataFrame(train24_data)
train24_label_s = pd.Series(train24_label)
train24_data_df.corrwith(train24_label_s).sort_values(ascending=False)

1    0.353408
3    0.350473
0    0.345148
4    0.341753
2    0.340716
7    0.340712
5    0.339354
6    0.337812
dtype: float64

In [8]:
# 12 Training - Create Dataframe objects and compute correlations
train12_data_df = pd.DataFrame(train12_data)
train12_label_s = pd.Series(train12_label)
train12_data_df.corrwith(train12_label_s).sort_values(ascending=False)

0    0.386344
6    0.375283
2    0.374979
3    0.368546
1    0.364131
5    0.362116
7    0.346136
4    0.339579
dtype: float64

In [9]:
# 6 Training - Create Dataframe objects and compute correlations
train6_data_df = pd.DataFrame(train6_data)
train6_label_s = pd.Series(train6_label)
train6_data_df.corrwith(train6_label_s).sort_values(ascending=False)

5    0.632901
6    0.624794
2    0.621514
7    0.619775
3    0.619472
4    0.612446
1    0.608329
0    0.607035
dtype: float64

Correlation between features seems to be about equal for predictions with training and test splits

### Tuning KNeighbors Classifier

In [10]:
# Creating KNeighborsClassifier Pipeline
KneighC = Pipeline([('scaler',StandardScaler()),('knc', KNeighborsClassifier())])

# Finding the best parameters for KNeighborsClassifier

## Create parameter grid for gridsearch algorithm
knc_param_Grid=  { 'knc__n_neighbors': np.arange(2,10,1),
                  'knc__leaf_size': np.arange(2,30,2),
                 }

## Create Grid search
knc_grid_search = GridSearchCV(KneighC,
                               param_grid=knc_param_Grid,
                               scoring='accuracy',
                               refit=True,
                               cv=5,
                               verbose=1)

ii=0
### Fit to training data
KNC_trainscores=[]
KNC_testscores=[]

tpr_train_scores = []
fpr_train_scores = []
tpr_scores = []
fpr_scores = []

while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)
    
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    

    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1
    
    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        knc_grid_search.fit(i,j)
        ### Print Best parameters
        print("\n",knc_grid_search.best_params_)
        ## Saving the tuned model
        model = knc_grid_search.best_estimator_
        print("Best Accuracy: ",knc_grid_search.best_score_)
        KNC_trainscores+= [knc_grid_search.best_score_]
        y_pred = model.predict(m)
        KNC_testscores += [accuracy_score(n,y_pred)]
        
        # Calculate Average TPR and Average FPR for training set
        tn, fp, fn, tp = confusion_matrix(j, model.predict(i)).ravel()
        tpr_train = tp/(tp+fn)
        fpr_train = fp/(fp+tn)

        # Create list of TPR and FPR for train
        tpr_train_scores += [tpr_train]
        fpr_train_scores += [fpr_train]

        # Calculate Average TPR and Average FPR for test set
        tn, fp, fn, tp = confusion_matrix(n, y_pred).ravel()
        tpr_test = tp/(tp+fn)
        fpr_test = fp/(fp+tn)
        
        # Create list of TPR and FPR
        tpr_scores += [tpr_test]
        fpr_scores += [fpr_test]

Training chips num:  [13  5 30 29 31  7 21 23  6 18  9  3 15  2 28 11 19 10  8 22 17 33 14  4]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF']
TI: 12
TF: 12
Training chips num:  [25  7  2 17 13 20  8  3  9 26 33 21]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI']
TI: 6
TF: 6
Training chips num:  [ 3 32  7 31  5 26]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TI', 'TF']
TI: 3
TF: 3

 -----24 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_size': 2, 'knc__n_neighbors': 8}
Best Accuracy:  0.9400000000000001

 -----12 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_size': 2, 'knc__n_neighbors': 4}
Best Accuracy:  0.9400000000000001

 -----6 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits



### Results for KneighborsClassifier

In [11]:
# ----- Accuracy scores for test and training sets -----
# Slicing training scores into individual lists
KNC_24_train_scores = KNC_trainscores[0::3]
KNC_12_train_scores= KNC_trainscores[1::3]
KNC_6_train_scores = KNC_trainscores[2::3]

# Slicing test scores into individual lists
KNC_24_test_scores = KNC_testscores[0::3]
KNC_12_test_scores = KNC_testscores[1::3]
KNC_6_test_scores = KNC_testscores[2::3]

print("---------- Kneighbors Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(KNC_24_train_scores)
test24_avg = mean(KNC_24_test_scores)
print("TRAIN24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(KNC_12_train_scores)
test12_avg = mean(KNC_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(KNC_6_train_scores)
test6_avg = mean(KNC_6_test_scores)
print("TRAIN6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# ----- Average TPR and Average FPR -----
# TRAINING SET
KNC_24train_TPR = tpr_train_scores[0::3]
KNC_12train_TPR = tpr_train_scores[1::3]
KNC_6train_TPR = tpr_train_scores[2::3]
KNC_24train_FPR = fpr_train_scores[0::3]
KNC_12train_FPR = fpr_train_scores[1::3]
KNC_6train_FPR = fpr_train_scores[2::3]


# TEST SET
KNC_24test_TPR = tpr_scores[0::3]
KNC_12test_TPR = tpr_scores[1::3]
KNC_6test_TPR = tpr_scores[2::3]
KNC_24test_FPR = fpr_scores[0::3]
KNC_12test_FPR = fpr_scores[1::3]
KNC_6test_FPR = fpr_scores[2::3]

print("\n","---------- KNC Average TPR----------")
TPR_train24 = mean(KNC_24train_TPR)
TPR_test24 = mean(KNC_24test_TPR)
print("TRAIN24 Average TPR: ", TPR_train24)
print("Test24 Average TPR: " , TPR_test24)
TPR_train12 = mean(KNC_12train_TPR)
TPR_test12 = mean(KNC_12test_TPR)
print("Train12 Average TPR: ", TPR_train12)
print("Test12 Average TPR: " , TPR_test12)
TPR_train6 = mean(KNC_6train_TPR)
TPR_test6 = mean(KNC_6test_TPR)
print("TRAIN6 Average TPR: ", TPR_train6)
print("Test6 Average TPR: " , TPR_test6)

print("\n","---------- KNC Average FPR----------")
FPR_train24 = mean(KNC_24train_FPR)
FPR_test24 = mean(KNC_24test_FPR)
print("TRAIN24 Average FPR: ", FPR_train24)
print("Test24 Average FPR: " , FPR_test24)
FPR_train12 = mean(KNC_12train_FPR)
FPR_test12 = mean(KNC_12test_FPR)
print("Train12 Average FPR: ", FPR_train12)
print("Test12 Average FPR: " , FPR_test12)
FPR_train6 = mean(KNC_6train_FPR)
FPR_test6 = mean(KNC_6test_FPR)
print("TRAIN6 Average FPR: ", FPR_train6)
print("Test6 Average FPR: " , FPR_test6)


---------- Kneighbors Accuracy Scores----------
TRAIN24 Average Accuracy:  0.937
Test24 Average Accruacy:  0.9331111111111111
Train12 Average Accuracy:  0.9373333333333334
Test12 Average Accruacy:  0.917047619047619
TRAIN6 Average Accuracy:  0.9666666666666667
Test6 Average Accruacy:  0.9082222222222223

 ---------- KNC Average TPR----------
TRAIN24 Average TPR:  0.7958333333333333
Test24 Average TPR:  0.48333333333333334
Train12 Average TPR:  0.7708333333333334
Test12 Average TPR:  0.5535714285714286
TRAIN6 Average TPR:  0.9333333333333333
Test6 Average TPR:  0.7277777777777777

 ---------- KNC Average FPR----------
TRAIN24 Average FPR:  0.01213768115942029
Test24 Average FPR:  0.027777777777777776
Train12 Average FPR:  0.009782608695652175
Test12 Average FPR:  0.05134575569358178
TRAIN6 Average FPR:  0.005072463768115942
Test6 Average FPR:  0.07608695652173914


### Tuning Logistic Regression

In [12]:
# Create Logisitic Regression Model
Logreg = Pipeline([('scaler',StandardScaler()),('logreg',LogisticRegression(random_state=42))])
Logreg

# Finding the best parameters for Logistic Regression

## Create Parameter Grid
logreg_param_grid = {'logreg__C': [0.001,0.01,0.1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],
                     'logreg__tol': [0.0001, 0.001, 0.01]
                    }

## Create Log Reg Gridsearch
logreg_gridsearch = GridSearchCV(Logreg,
                                 param_grid=logreg_param_grid,
                                 scoring='accuracy',
                                 refit=True,
                                 cv=5,
                                 verbose=1)

### Fit to training data
logreg_trainscores=[]
logreg_testscores=[]

# tpr and fpr scores
tpr_scores = []
fpr_scores = []

tpr_train_scores = []
fpr_train_scores = []
tpr_test_scores = []
fpr_test_scores = []

ii=0
while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)
    
    # Create pandas dataframes and series
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    
    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1
    
  

    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        logreg_gridsearch.fit(i,j)
        ### Print Best parameters
        print("\n",logreg_gridsearch.best_params_)
        ## Saving the tuned model
        model = logreg_gridsearch.best_estimator_
        print("Best Accuracy: ",logreg_gridsearch.best_score_)
       
        logreg_trainscores+= [logreg_gridsearch.best_score_]
       
        y_pred = model.predict(m)
        logreg_testscores += [accuracy_score(n,y_pred)]

        # Calculate TPR and FPR on training set
        tp, fn, fp, tn = confusion_matrix(j,model.predict(i)).ravel()
        fpr = fp/(fp+tn)
        tpr = tp/(tp+fn)
        
        tpr_train_scores += [tpr]
        fpr_train_scores += [fpr]
        
        # Calculate FPR and TPR on test set
        tp, fn, fp, tn = confusion_matrix(n,y_pred).ravel()
        fpr_test = fp/(fp+tn)
        tpr_test = tp/(tp+fn)

        tpr_test_scores += [tpr_test]
        fpr_test_scores += [fpr_test]

Training chips num:  [20 11 13  5 12 28 19 27 21 26 31  3  4  6 29 24 22 23 14 30 33  2 15 16]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI']
TI: 12
TF: 12
Training chips num:  [32  3 28 21 10  9 19 23 29  1  5 27]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF']
TI: 6
TF: 6
Training chips num:  [ 3  2 26 29 11  1]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TF', 'TF']
TI: 3
TF: 3

 -----24 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logreg__C': 0.9, 'logreg__tol': 0.0001}
Best Accuracy:  0.9933333333333334

 -----12 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logreg__C': 0.001, 'logreg__tol': 0.0001}
Best Accuracy:  0.9199999999999999

 -----6 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logr

### Log Reg Results

In [13]:
# Slicing training scores into individual lists
logreg_24_train_scores = logreg_trainscores[0::3]
logreg_12_train_scores= logreg_trainscores[1::3]
logreg_6_train_scores = logreg_trainscores[2::3]

# Slicing test scores into individual lists
logreg_24_test_scores = logreg_testscores[0::3]
logreg_12_test_scores = logreg_testscores[1::3]
logreg_6_test_scores = logreg_testscores[2::3]

print("---------- Logistic Regression Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(logreg_24_train_scores)
test24_avg = mean(logreg_24_test_scores)
print("TRAIN24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(logreg_12_train_scores)
test12_avg = mean(logreg_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(logreg_6_train_scores)
test6_avg = mean(logreg_6_test_scores)
print("TRAIN6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# Slicing fpr and tpr scores for each set
logreg_24_fpr_train = fpr_train_scores[0::3]
logreg_24_tpr_train = tpr_train_scores[0::3]
logreg_12_fpr_train = fpr_train_scores[1::3]
logreg_12_tpr_train = tpr_train_scores[1::3]
logreg_6_fpr_train = fpr_train_scores[2::3]
logreg_6_tpr_train = tpr_train_scores[2::3]

logreg_24_fpr_test = fpr_test_scores[0::3]
logreg_24_tpr_test = tpr_test_scores[0::3]
logreg_12_fpr_test = fpr_test_scores[1::3]
logreg_12_tpr_test = tpr_test_scores[1::3]
logreg_6_fpr_test = fpr_test_scores[2::3]
logreg_6_tpr_test = tpr_test_scores[2::3]


# Printing Mean score for each set

print("---------- Logistic Regression Mean FPR and TPR Scores----------")

# Mean FPR and TPR scores for training sets
print("Mean FPR for 24 Sample Training Set: ", mean(logreg_24_fpr_train))
print("Mean TPR for 24 Sample Training Set: ", mean(logreg_24_tpr_train))
print("Mean FPR for 24 Sample Test Set: ", mean(logreg_24_fpr_test))
print("Mean TPR for 24 Sample Test Set: ", mean(logreg_24_tpr_test))

print("Mean FPR for 12 Sample Training Set: ", mean(logreg_12_fpr_train))
print("Mean TPR for 12 Sample Training Set: ", mean(logreg_12_tpr_train))
print("Mean FPR for 12 Sample Test Set: ", mean(logreg_12_fpr_test))
print("Mean TPR for 12 Sample Test Set: ", mean(logreg_12_tpr_test))

print("Mean FPR for 6 Sample Training Set: ", mean(logreg_6_fpr_train))
print("Mean TPR for 6 Sample Training Set: ", mean(logreg_6_tpr_train))
print("Mean FPR for 6 Sample Test Set: ", mean(logreg_6_fpr_test))
print("Mean TPR for 6 Sample Test Set: ", mean(logreg_6_tpr_test))




---------- Logistic Regression Accuracy Scores----------
TRAIN24 Average Accuracy:  0.9316666666666666
Test24 Average Accruacy:  0.9128888888888889
Train12 Average Accuracy:  0.935
Test12 Average Accruacy:  0.9159047619047619
TRAIN6 Average Accuracy:  0.9546666666666667
Test6 Average Accruacy:  0.9139259259259259
---------- Logistic Regression Mean FPR and TPR Scores----------
Mean FPR for 24 Sample Training Set:  0.80625
Mean TPR for 24 Sample Training Set:  0.9963768115942029
Mean FPR for 24 Sample Test Set:  0.8388888888888889
Mean TPR for 24 Sample Test Set:  0.9782608695652174
Mean FPR for 12 Sample Training Set:  0.7916666666666666
Mean TPR for 12 Sample Training Set:  0.9992753623188406
Mean FPR for 12 Sample Test Set:  0.8476190476190476
Mean TPR for 12 Sample Test Set:  0.9822981366459628
Mean FPR for 6 Sample Training Set:  0.5333333333333333
Mean TPR for 6 Sample Training Set:  0.9992753623188406
Mean FPR for 6 Sample Test Set:  0.6888888888888889
Mean TPR for 6 Sample Test 

## CASE 3

### KMeans

In [14]:
# function to derive TPR and FPR
def tpr_fpr(clusters,labels):
    # Create confusion matrix
    cm = confusion_matrix(clusters,labels)
     # Derive TPR and FPR from confusion matrix
    tpr = cm[1,1]/(cm[1,0]+cm[1,1])
    fpr = cm[0,1]/(cm[0,0]+cm[0,1])
    return tpr,fpr

In [15]:
# Create KMeans model

# Kmeans pipeline with scaler and pca



kmeans = Pipeline([('scaler',StandardScaler()),('kmeans',KMeans(random_state=42))])

### kmeans accuracy score lists
kmeans_trainscores=[]
kmeans_testscores=[]

# tpr and fpr scores
tpr_scores = []
fpr_scores = []

tpr_train_scores = []
fpr_train_scores = []
tpr_test_scores = []
fpr_test_scores = []

ii=0
while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,3,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,3,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,3,csv_path)
    
    # Create pandas dataframes and series
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    
    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1

    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        j = [int(i) for i in j]
        # Fit the model
        kmeans.fit(i)

        # Get clusters from training and test set
        train_clusters = kmeans.predict(i)
        test_clusters = kmeans.predict(m)

        # Compute accuracy score between training cluster labels and training labels
        train_score = accuracy_score(train_clusters,j)
        
        # Compute accuracy score between test cluster labels and test labels
        test_score = accuracy_score(test_clusters,n)

        # Appeniding to kmeans_train_scores list
        kmeans_trainscores.append(train_score)

        # Appending to kmeans_test_scores list
        kmeans_testscores.append(test_score)

        # derive TPR and TPR from training clusters and training labels
        tpr_train, fpr_train = tpr_fpr(train_clusters,j)
        tpr_train_scores.append(tpr_train)
        fpr_train_scores.append(fpr_train)

        # derive TPR and TPR from test clusters and test labels
        tpr_test, fpr_test = tpr_fpr(test_clusters,n)
        tpr_test_scores.append(tpr_test)
        fpr_test_scores.append(fpr_test)
        
        

Training chips num:  [27 25  4  1 16  3 24  2  9 12 19 13 11 20 17 15  8 31 23 21 18 28  7 29]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF']
TI: 11
TF: 13
Training chips num:  [30 14  4 31 29 24 27 19  1 25  3 15]
Type selection: ['TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF']
TI: 6
TF: 6
Training chips num:  [25 31 20 10  9  7]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TI', 'TF']
TI: 4
TF: 2

 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [21 32 33 11 28  3 16  4 29  8 12 20 10  6 19 22  1 18 15 31 30 24  5  7]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF']
TI: 13
TF: 11
Training chips num:  [ 3 30 29  7 28 10 12 16  4 23 20 33]
Type selection: ['TF', 'T

In [16]:
# Slicing training scores into individual lists
kmeans_24_train_scores = kmeans_trainscores[0::3]
kmeans_12_train_scores = kmeans_trainscores[1::3]
kmeans_6_train_scores = kmeans_trainscores[2::3]

# Slicing test scores into individual lists
kmeans_24_test_scores = kmeans_testscores[0::3]
kmeans_12_test_scores = kmeans_testscores[1::3]
kmeans_6_test_scores = kmeans_testscores[2::3]


print("---------- KMEANS Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(kmeans_24_train_scores)
test24_avg = mean(kmeans_24_test_scores)
print("Train24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(kmeans_12_train_scores)
test12_avg = mean(kmeans_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(kmeans_6_train_scores)
test6_avg = mean(kmeans_6_test_scores)
print("Train6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# Slicing fpr and tpr scores for each set
kmeans_24_fpr = fpr_test_scores[0::3]
kmeans_24_tpr = tpr_test_scores[0::3]
kmeans_12_tpr = tpr_test_scores[1::3]
kmeans_12_fpr = fpr_test_scores[1::3]
kmeans_6_tpr = tpr_test_scores[2::3]
kmeans_6_fpr = fpr_test_scores[2::3]


# Printing Mean score for each set

print("---------- KMEANS Mean FPR and TPR Scores----------")

# Mean FPR and TPR scores for kmeans training set
kmeans_24_fpr_train = mean(fpr_train_scores[0::3])
kmeans_24_tpr_train = mean(tpr_train_scores[0::3])
kmeans_12_fpr_train = mean(fpr_train_scores[1::3])
kmeans_12_tpr_train = mean(tpr_train_scores[1::3])
kmeans_6_fpr_train = mean(fpr_train_scores[2::3])
kmeans_6_tpr_train = mean(tpr_train_scores[2::3])

# Mean FPR and TPR scores for kmeans test set
# Find the average between each roc curve in fpr_test_scores[0::3]
kmeans_24_fpr_test = mean(fpr_test_scores[0::3])
kmeans_24_tpr_test = mean(tpr_test_scores[0::3])
kmeans_12_fpr_test = mean(fpr_test_scores[1::3])
kmeans_12_tpr_test = mean(tpr_test_scores[1::3])
kmeans_6_fpr_test = mean(fpr_test_scores[2::3])
kmeans_6_tpr_test = mean(tpr_test_scores[2::3])

# Printing Mean FPR and TPR scores for each set
print("Kmeans 24 Sample Training Set FPR: ", kmeans_24_fpr_train)
print("Kmeans 24 Sample Training Set TPR: ", kmeans_24_tpr_train)
print("Kmeans 24 Sample Test Set FPR: ", kmeans_24_fpr_test)
print("Kmeans 24 Sample Test Set TPR: ", kmeans_24_tpr_test)
print("Kmeans 12 Sample Training Set FPR: ", kmeans_12_fpr_train)
print("Kmeans 12 Sample Training Set TPR: ", kmeans_12_tpr_train)
print("Kmeans 12 Sample Test Set FPR: ", kmeans_12_fpr_test)
print("Kmeans 12 Sample Test Set TPR: ", kmeans_12_tpr_test)
print("Kmeans 6 Sample Training Set FPR: ", kmeans_6_fpr_train)
print("Kmeans 6 Sample Training Set TPR: ", kmeans_6_tpr_train)
print("Kmeans 6 Sample Test Set FPR: ", kmeans_6_fpr_test)
print("Kmeans 6 Sample Test Set TPR: ", kmeans_6_tpr_test)

---------- KMEANS Accuracy Scores----------
Train24 Average Accuracy:  0.15515610032870417
Test24 Average Accruacy:  0.15355555555555556
Train12 Average Accuracy:  0.15256653600013065
Test12 Average Accruacy:  0.1417142857142857
Train6 Average Accuracy:  0.1296628787878788
Test6 Average Accruacy:  0.12814814814814815
---------- KMEANS Mean FPR and TPR Scores----------
Kmeans 24 Sample Training Set FPR:  0.08357890193482226
Kmeans 24 Sample Training Set TPR:  0.067448501953052
Kmeans 24 Sample Test Set FPR:  0.1285144927536232
Kmeans 24 Sample Test Set TPR:  0.06514188940675666
Kmeans 12 Sample Training Set FPR:  0.0791592394533571
Kmeans 12 Sample Training Set TPR:  0.09239130434782608
Kmeans 12 Sample Test Set FPR:  0.09028777566200641
Kmeans 12 Sample Test Set TPR:  0.07460426002057094
Kmeans 6 Sample Training Set FPR:  0.20984848484848484
Kmeans 6 Sample Training Set TPR:  0.1297222222222222
Kmeans 6 Sample Test Set FPR:  0.13761192340563672
Kmeans 6 Sample Test Set TPR:  0.11935865

### DBSCAN

In [17]:
# Create DBSCAN Model
dbscan = Pipeline([('scaler',StandardScaler()),('dbscan', DBSCAN())])
dbscan

In [18]:
# Create DBSCAN


#DBSCAN pipeline
dbscan = Pipeline([('scaler',StandardScaler()),('dbscan', DBSCAN())])


### dbscan accuracy score lists
dbscan_trainscores = []
dbscan_testscores = []

# tpr and fpr scores
tpr_scores = []
fpr_scores = []

tpr_train_scores = []
fpr_train_scores = []
tpr_test_scores = []
fpr_test_scores = []

ii=0
while ii < 20:


	# Splits for each dataset
	train24_data, train24_label, test24_data, test24_label = train_test_split(24,3,csv_path)
	train12_data, train12_label, test12_data, test12_label = train_test_split(12,3,csv_path)
	train6_data, train6_label, test6_data, test6_label = train_test_split(6,3,csv_path)

	# Create pandas dataframes and series
	train24_data = pd.DataFrame(train24_data)
	test24_data = pd.DataFrame(test24_data)
	train12_data = pd.DataFrame(train12_data)
	test12_data = pd.DataFrame(test12_data)
	train6_data = pd.DataFrame(train6_data)
	test6_data = pd.DataFrame(test6_data)

	# Create lists of training sets to parse
	training_sets = [train24_data, train12_data, train6_data]
	training_labels = [train24_label, train12_label, train6_label]

	# Create lists of test sets to parse
	test_sets = [test24_data, test12_data, test6_data]
	test_labels =[test24_label, test12_label, test6_label]
	Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
	t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]

	#Iterater
	ii = ii + 1

	# Loop responsible for parsing training sets and training labels
	for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels): 
		print("\n",a,"\n")
		j = [int(i) for i in j]
		# Fit the model
		dbscan.fit(i)

		# Get clusters from training and test set
		train_clusters = dbscan.named_steps['dbscan'].labels_

		# Compute accuracy score between training cluster labels and training labels
		train_score = accuracy_score(train_clusters,j)

		# Appeniding to kmeans_train_scores list
		dbscan_trainscores.append(train_score)

		# Fit model to test set
		dbscan.fit(m)

		# Compute test clusters
		test_clusters = dbscan.named_steps['dbscan'].labels_

		# Compute accuracy score between test cluster labels and test labels
		test_score = accuracy_score(test_clusters,n)

		# Appending to kmeans_test_scores list
		dbscan_testscores.append(test_score)

		# derive TPR and TPR from training clusters and training labels
		tpr_train, fpr_train = tpr_fpr(train_clusters,j)
		tpr_train_scores.append(tpr_train)
		fpr_train_scores.append(fpr_train)

		# derive TPR and TPR from test clusters and test labels
		tpr_test, fpr_test = tpr_fpr(test_clusters,n)
		tpr_test_scores.append(tpr_test)
		fpr_test_scores.append(fpr_test)

Training chips num:  [13 10 15 21 14  6 23 26 24 11  9 32 18  8 19 12 31 25 28  7 30  4  2 33]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF']
TI: 14
TF: 10
Training chips num:  [25  5  7 20 23 30 15  4  6 32 27 18]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI']
TI: 6
TF: 6
Training chips num:  [15  5 13 32 17 19]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TF', 'TF']
TI: 3
TF: 3

 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [27  3 30  4 33 21 23  9 10 25 16 19  1  6 15 24 29 28 18 22  2 26  5  8]
Type selection: ['TI', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF']
TI: 10
TF: 14
Training chips num:  [ 6 18 27 25 15  7  2  4 31 22 19 11]
Type selection: ['TF', 'T

  fpr = cm[0,1]/(cm[0,0]+cm[0,1])



 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [31 10 18 30 33  8 28 13 24  5 15 11  2 23 17 29  1 21  9 26  3  6 14 32]
Type selection: ['TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF']
TI: 6
TF: 18
Training chips num:  [13 32 30 28 21 26 27  1 14  4 10  3]
Type selection: ['TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI']
TI: 9
TF: 3
Training chips num:  [19 31 13 30  5  6]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TF', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [23  4  9 12  2  3 11 16 29 33 26 27 32  1 13 24 25 17 18  6 10  7 19 20]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI']
TI: 8
TF: 16
Training chip

  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  tpr = cm[1,1]/(cm[1,0]+cm[1,1])



 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [12 30 33 32 29 31 25  3  6 26 20  9 15 21 22 19 17 11  5 14 24 23 16  4]
Type selection: ['TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI']
TI: 12
TF: 12
Training chips num:  [33  8 12  2 29 21 20  7 30 19 22  4]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI', 'TI']
TI: 10
TF: 2
Training chips num:  [15 10  4 12  5 31]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TI', 'TF']
TI: 3
TF: 3

 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [22 30 14 12 19  8 24 33  1 11  9 15  3 10 25  2 29 31  6 27 16  7 21 32]
Type selection: ['TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI']
TI: 10
TF: 14
Training c

  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  fpr = cm[0,1]/(cm[0,0]+cm[0,1])



 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [33 27 25  7 18 14 12 26 20 11 10 22  2  1 28 19 29  4 17  5 31 21 23  9]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TI']
TI: 15
TF: 9
Training chips num:  [ 6 18 33 24 30 31 23  7  5 32 11  9]
Type selection: ['TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF']
TI: 4
TF: 8
Training chips num:  [14  1 26 21 12 20]
Type selection: ['TI', 'TI', 'TF', 'TI', 'TF', 'TI']
TI: 4
TF: 2

 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [16  6 33 17 27 11 23  8 29 10 24 31  4  9 32 21 18 12 13  5 14  7  2 25]
Type selection: ['TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', 'TF', '

  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  tpr = cm[1,1]/(cm[1,0]+cm[1,1])
  tpr = cm[1,1]/(cm[1,0]+cm[1,1])
  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  fpr = cm[0,1]/(cm[0,0]+cm[0,1])



 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 

Training chips num:  [22  1 12 30 33  2 32 16  6 27 15  8 29 11 28 31  9 13 20 17  3 25  5 19]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TI', 'TI', 'TI']
TI: 11
TF: 13
Training chips num:  [29 20 27 15 22 32 10 16 24  4  1 25]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF']
TI: 6
TF: 6
Training chips num:  [ 8  2  9 33 23 30]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TF', 'TI']
TI: 2
TF: 4

 -----24 Sample Training Set----- 


 -----12 Sample Training Set----- 


 -----6 Sample Training Set----- 



  tpr = cm[1,1]/(cm[1,0]+cm[1,1])


In [19]:
# Slicing training scores into individual lists
dbscan_24_train_scores = dbscan_trainscores[0::3]
dbscan_12_train_scores = dbscan_trainscores[1::3]
dbscan_6_train_scores = dbscan_trainscores[2::3]

# Slicing test scores into individual lists
dbscan_24_test_scores = dbscan_testscores[0::3]
dbscan_12_test_scores = dbscan_testscores[1::3]
dbscan_6_test_scores = dbscan_testscores[2::3]


print("---------- DBSCAN Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(dbscan_24_train_scores)
test24_avg = mean(dbscan_24_test_scores)
print("Train24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(dbscan_12_train_scores)
test12_avg = mean(dbscan_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(dbscan_6_train_scores)
test6_avg = mean(dbscan_6_test_scores)
print("Train6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

# Slicing fpr and tpr scores for each set
dbscan_24_fpr = fpr_test_scores[0::3]
dbscan_24_tpr = tpr_test_scores[0::3]
dbscan_12_tpr = tpr_test_scores[1::3]
dbscan_12_fpr = fpr_test_scores[1::3]
dbscan_6_tpr = tpr_test_scores[2::3]
dbscan_6_fpr = fpr_test_scores[2::3]


# Printing Mean score for each set

print("---------- DBSCAN Mean FPR and TPR Scores----------")
# Mean FPR and TPR scores for DBSCAN training set
dbscan_24_fpr_train = mean(fpr_train_scores[0::3])
dbscan_24_tpr_train = mean(tpr_train_scores[0::3])
dbscan_12_fpr_train = mean(fpr_train_scores[1::3])
dbscan_12_tpr_train = mean(tpr_train_scores[1::3])
dbscan_6_fpr_train = mean(fpr_train_scores[2::3])
dbscan_6_tpr_train = mean(tpr_train_scores[2::3])

# print the mean scores
print("Mean FPR for 24 Sample Training Set: ", dbscan_24_fpr_train)
print("Mean TPR for 24 Sample Training Set: ", dbscan_24_tpr_train)
print("Mean FPR for 12 Sample Training Set: ", dbscan_12_fpr_train)
print("Mean TPR for 12 Sample Training Set: ", dbscan_12_tpr_train)
print("Mean FPR for 6 Sample Training Set: ", dbscan_6_fpr_train)
print("Mean TPR for 6 Sample Training Set: ", dbscan_6_tpr_train)




---------- DBSCAN Accuracy Scores----------
Train24 Average Accuracy:  0.9064263219370314
Test24 Average Accruacy:  0.8822222222222222
Train12 Average Accuracy:  0.7444471560188267
Test12 Average Accruacy:  0.9154285714285715
Train6 Average Accuracy:  0.5252402600418905
Test6 Average Accruacy:  0.9159259259259259
---------- DBSCAN Mean FPR and TPR Scores----------
Mean FPR for 24 Sample Training Set:  0.8614761199936184
Mean TPR for 24 Sample Training Set:  nan
Mean FPR for 12 Sample Training Set:  nan
Mean TPR for 12 Sample Training Set:  0.95
Mean FPR for 6 Sample Training Set:  1.0
Mean TPR for 6 Sample Training Set:  nan
