#  Intro to Hardware Security and Trust Final Project

## Import Libraries

In [1]:
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.cluster import KMeans,DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
import random
import os
from statistics import mean


## Dataset Splitting Function

In [2]:
xlsx_path = 'C:\\Users\patri\\git\\HSTFinalProject\\Classifier-for-Hardware-Trojan-Detection\\ROFreq\\ROFreq\\Chip'
csv_path ='C:\\Users\patri\\git\HSTFinalProject\Classifier-for-Hardware-Trojan-Detection\ROFreq_C\Chip'

for i in np.arange(1,34):
    data_xls = pd.read_excel('{}{}.xlsx'.format(xlsx_path,i), dtype=str, index_col=None)
    data_xls.to_csv('{}{}.csv'.format(csv_path,i), encoding='utf-8', index=False)

In [3]:
def csv_read(chip_num,type,csv_path):
    data= []
    data_list= []
    label_list = np.zeros(25)
    label_list[0] = 1
    label_list[-1] = 1
    with open('{}{}.csv'.format(csv_path,chip_num)) as csvfile:
        csv_reader = csv.reader(csvfile)
   
        for row in csv_reader:
            # rounding
            row = [round(float(num[0:12]),3) for num in row]     
            data_list.append(row)   

        if type == 'TI':
            data.append(data_list[0])
            data.append(data_list[-1])
            label = [1,1]
        elif type == 'TF':
            data = data_list[1:-1]
            label = label_list[1:-1]
        else:
            data = data_list
            label = label_list 
                  
        
    return data,label

In [4]:
def train_test_split(num_sel,case_sel,csv_path):
    chips_num = np.arange(1,34)
    indices = range(33)
    # randomly select #num chips
    sel = random.sample(indices,num_sel)
    # split tain and test data
    train_chips = chips_num[sel]
    test_chips = np.delete(chips_num,sel)
    print('Training chips num: ',train_chips)
    temp = []

    for i in range(int(num_sel/2)):
        temp.extend(['TF','TI'])
 
    type_sel_1 = random.sample(temp,num_sel)
    type_sel_3 = random.choices(['TI','TF'],k=num_sel)
    
    if case_sel == 1:
        print('Type selection:', type_sel_1)
        print('TI:',type_sel_1.count('TI'))
        print('TF:',type_sel_1.count('TF'))

    else:
        print('Type selection:', type_sel_3)
        print('TI:',type_sel_3.count('TI'))
        print('TF:',type_sel_3.count('TF'))

    cnt = 0
    train_data = []
    train_label = []

    for chip_num in train_chips:
        if case_sel == 1:
            data,label = csv_read(chip_num,type_sel_1[cnt],csv_path)
        elif case_sel ==3:
            data,label = csv_read(chip_num,type_sel_3[cnt],csv_path)
        train_data.extend(data)
        train_label.extend(label)
        cnt += 1

    test_data = []
    test_label = []
    for chip_num in test_chips:
        data,label = csv_read(chip_num,'ALL',csv_path)
        test_data.extend(data)
        test_label.extend(label)
    
    return train_data, train_label, test_data, test_label

##  CASE 1

### Case 1 Datasets

In [5]:
# arg1 => number of training samples
# arg2 => number of case (enter 1 or 3 only)
train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)

Training chips num:  [12 20  2  7 31 26 18  5 19 11 30 24 16 17  4 33  8 21 29 23 27 28  9 10]
Type selection: ['TI', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 12
TF: 12
Training chips num:  [11 10 32  2 30 33 28 21  8  1  4 27]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TI', 'TF', 'TF']
TI: 6
TF: 6
Training chips num:  [15 21  1 14  7 28]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TI', 'TF']
TI: 3
TF: 3


In [6]:
pd.DataFrame(train24_data)

Unnamed: 0,0,1,2,3,4,5,6,7
0,184.0,196.0,217.2,227.6,217.2,224.8,215.2,246.8
1,184.0,196.0,217.2,227.6,217.2,224.8,215.2,246.8
2,184.8,198.0,216.4,228.0,218.0,222.0,214.4,244.4
3,185.2,198.0,216.4,228.0,218.0,222.0,214.4,244.8
4,184.8,198.0,216.4,228.0,218.0,222.0,214.4,244.4
...,...,...,...,...,...,...,...,...
295,148.4,157.6,174.0,182.0,174.8,177.6,170.8,196.0
296,142.8,151.6,167.2,175.6,168.0,170.8,164.0,188.4
297,142.0,151.2,166.8,175.2,167.6,170.4,164.0,188.4
298,187.2,200.4,218.0,230.8,221.2,224.0,214.0,247.6


In [7]:
# 24 Training - Create Dataframe objects and compute correlations
train24_data_df = pd.DataFrame(train24_data)
train24_label_s = pd.Series(train24_label)
train24_data_df.corrwith(train24_label_s).sort_values(ascending=False)

0    0.463858
1    0.460422
7    0.459063
2    0.458386
6    0.457046
5    0.454014
3    0.453864
4    0.453436
dtype: float64

In [8]:
# 12 Training - Create Dataframe objects and compute correlations
train12_data_df = pd.DataFrame(train12_data)
train12_label_s = pd.Series(train12_label)
train12_data_df.corrwith(train12_label_s).sort_values(ascending=False)

1    0.340003
7    0.336068
0    0.334142
3    0.328676
4    0.326465
2    0.322266
5    0.321756
6    0.316479
dtype: float64

In [9]:
# 6 Training - Create Dataframe objects and compute correlations
train6_data_df = pd.DataFrame(train6_data)
train6_label_s = pd.Series(train6_label)
train6_data_df.corrwith(train6_label_s).sort_values(ascending=False)

4    0.557416
7    0.556167
3    0.543263
5    0.541591
0    0.538942
6    0.536256
1    0.533801
2    0.533271
dtype: float64

Correlation between features seems to be about equal for predictions with training and test splits

### Tuning KNeighbors Classifier

In [10]:
# Creating KNeighborsClassifier Pipeline
KneighC = Pipeline([('scaler',StandardScaler()),('knc', KNeighborsClassifier())])

# Finding the best parameters for KNeighborsClassifier

## Create parameter grid for gridsearch algorithm
knc_param_Grid=  { 'knc__n_neighbors': np.arange(2,10,1),
                  'knc__leaf_size': np.arange(2,30,2),
                 }

## Create Grid search
knc_grid_search = GridSearchCV(KneighC,
                               param_grid=knc_param_Grid,
                               scoring='accuracy',
                               refit=True,
                               cv=5,
                               verbose=1)

ii=0
### Fit to training data
KNC_trainscores=[]
KNC_testscores=[]
while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)
    
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    

    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1
    
    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        knc_grid_search.fit(i,j)
        ### Print Best parameters
        print("\n",knc_grid_search.best_params_)
        ## Saving the tuned model
        model = knc_grid_search.best_estimator_
        print("Best Accuracy: ",knc_grid_search.best_score_)
        KNC_trainscores+= [knc_grid_search.best_score_]
        y_pred = model.predict(m)
        KNC_testscores += [accuracy_score(n,y_pred)]

Training chips num:  [ 7 11  3 17 27  1  4  2  9  6 26 18 22 16 23 13 24 20 19 31 10 33 25 28]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 12
TF: 12
Training chips num:  [11 13 19 24 26 20 29 15  9 31 28  7]
Type selection: ['TF', 'TI', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TF', 'TF', 'TI', 'TI']
TI: 6
TF: 6
Training chips num:  [30 31 25 18  2 27]
Type selection: ['TF', 'TI', 'TF', 'TF', 'TI', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_size': 2, 'knc__n_neighbors': 9}
Best Accuracy:  0.96

 -----12 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_size': 2, 'knc__n_neighbors': 4}
Best Accuracy:  0.8933333333333333

 -----6 Sample Training Set----- 

Fitting 5 folds for each of 112 candidates, totalling 560 fits

 {'knc__leaf_s

### Results for KneighborsClassifier

In [11]:
# Slicing training scores into individual lists
KNC_24_train_scores = KNC_trainscores[0::3]
KNC_12_train_scores= KNC_trainscores[1::3]
KNC_6_train_scores = KNC_trainscores[2::3]

# Slicing test scores into individual lists
KNC_24_test_scores = KNC_testscores[0::3]
KNC_12_test_scores = KNC_testscores[1::3]
KNC_6_test_scores = KNC_testscores[2::3]

print("---------- Kneighbors Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(KNC_24_train_scores)
test24_avg = mean(KNC_24_test_scores)
print("TRAIN24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(KNC_12_train_scores)
test12_avg = mean(KNC_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(KNC_6_train_scores)
test6_avg = mean(KNC_6_test_scores)
print("TRAIN6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

---------- Kneighbors Accuracy Scores----------
TRAIN24 Average Accuracy:  0.9443333333333334
Test24 Average Accruacy:  0.9213333333333333
Train12 Average Accuracy:  0.9390000000000001
Test12 Average Accruacy:  0.9253333333333333
TRAIN6 Average Accuracy:  0.9533333333333334
Test6 Average Accruacy:  0.9101481481481482


### Tuning Logistic Regression

In [12]:
# Create Logisitic Regression Model
Logreg = Pipeline([('scaler',StandardScaler()),('logreg',LogisticRegression(random_state=42))])
Logreg

# Finding the best parameters for Logistic Regression

## Create Parameter Grid
logreg_param_grid = {'logreg__C': [0.001,0.01,0.1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],
                     'logreg__tol': [0.0001, 0.001, 0.01]
                    }

## Create Log Reg Gridsearch
logreg_gridsearch = GridSearchCV(Logreg,
                                 param_grid=logreg_param_grid,
                                 scoring='accuracy',
                                 refit=True,
                                 cv=5,
                                 verbose=1)


ii=0
### Fit to training data
logreg_trainscores=[]
logreg_testscores=[]
while ii < 20:

    # Splits for each dataset
    train24_data, train24_label, test24_data, test24_label = train_test_split(24,1,csv_path)
    train12_data, train12_label, test12_data, test12_label = train_test_split(12,1,csv_path)
    train6_data, train6_label, test6_data, test6_label = train_test_split(6,1,csv_path)
    
    # Create pandas dataframes and series
    train24_data = pd.DataFrame(train24_data)
    test24_data = pd.DataFrame(test24_data)
    
    train12_data = pd.DataFrame(train12_data)
    test12_data = pd.DataFrame(test12_data)
    
    train6_data = pd.DataFrame(train6_data)
    test6_data = pd.DataFrame(test6_data)
    
    # Create lists of training sets to parse
    training_sets = [train24_data, train12_data, train6_data]
    training_labels = [train24_label, train12_label, train6_label]
    
    # Create lists of test sets to parse
    test_sets = [test24_data, test12_data, test6_data]
    test_labels =[test24_label, test12_label, test6_label]
    
    Sets = ["-----24 Sample Training Set-----","-----12 Sample Training Set-----", "-----6 Sample Training Set-----"]
    t_sets = ["-----24 Sample Test Set-----","-----12 Sample Test Set-----", "-----6 Sample Test Set-----"]
    
    #Iterater
    ii = ii + 1
    
    # Loop responsible for parsing training sets and training labels
    for (a,i,j,m,n) in zip(Sets, training_sets,training_labels, test_sets, test_labels):
        print("\n",a,"\n")
        logreg_gridsearch.fit(i,j)
        ### Print Best parameters
        print("\n",logreg_gridsearch.best_params_)
        ## Saving the tuned model
        model = logreg_gridsearch.best_estimator_
        print("Best Accuracy: ",logreg_gridsearch.best_score_)
        logreg_trainscores+= [logreg_gridsearch.best_score_]
        y_pred = model.predict(m)
        logreg_testscores += [accuracy_score(n,y_pred)]

Training chips num:  [28 32  9 22 10 33 23 19  3 20  1 15 29 30 16 21 25 26 11  4 27 31 12 14]
Type selection: ['TF', 'TF', 'TF', 'TF', 'TI', 'TI', 'TF', 'TI', 'TF', 'TI', 'TF', 'TI', 'TI', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 12
TF: 12
Training chips num:  [13 28 23 27  9  7 26 12 31  3 29 25]
Type selection: ['TI', 'TI', 'TF', 'TF', 'TI', 'TI', 'TF', 'TF', 'TI', 'TF', 'TF', 'TI']
TI: 6
TF: 6
Training chips num:  [14  1 16 32 20 31]
Type selection: ['TF', 'TI', 'TI', 'TF', 'TF', 'TI']
TI: 3
TF: 3

 -----24 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logreg__C': 0.3, 'logreg__tol': 0.0001}
Best Accuracy:  0.9966666666666667

 -----12 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logreg__C': 0.001, 'logreg__tol': 0.0001}
Best Accuracy:  0.9200000000000002

 -----6 Sample Training Set----- 

Fitting 5 folds for each of 39 candidates, totalling 195 fits

 {'logr

### Log Reg Results

In [14]:
# Slicing training scores into individual lists
logreg_24_train_scores = logreg_trainscores[0::3]
logreg_12_train_scores= logreg_trainscores[1::3]
logreg_6_train_scores = logreg_trainscores[2::3]

# Slicing test scores into individual lists
logreg_24_test_scores = logreg_testscores[0::3]
logreg_12_test_scores = logreg_testscores[1::3]
logreg_6_test_scores = logreg_testscores[2::3]

print("---------- Logistic Regression Accuracy Scores----------")

# Accuracy scores for test and training sets
train24_avg = mean(logreg_24_train_scores)
test24_avg = mean(logreg_24_test_scores)
print("TRAIN24 Average Accuracy: ", train24_avg)
print("Test24 Average Accruacy: " , test24_avg)

# Accuracy scores for test and training sets
train12_avg = mean(logreg_12_train_scores)
test12_avg = mean(logreg_12_test_scores)
print("Train12 Average Accuracy: ", train12_avg)
print("Test12 Average Accruacy: " , test12_avg)

# Accuracy Scores for test and training sets
train6_avg = mean(logreg_6_train_scores)
test6_avg = mean(logreg_6_test_scores)
print("TRAIN6 Average Accuracy: ", train6_avg)
print("Test6 Average Accruacy: " , test6_avg)

---------- Logistic Regression Accuracy Scores----------
TRAIN24 Average Accuracy:  0.9273333333333332
Test24 Average Accruacy:  0.91
Train12 Average Accuracy:  0.933
Test12 Average Accruacy:  0.92
TRAIN6 Average Accuracy:  0.9560000000000001
Test6 Average Accruacy:  0.9130370370370371


## CASE 3

### Case 3 Datasets

In [15]:
type(train24_data)

pandas.core.frame.DataFrame

In [16]:
plt.scatter(train24_data[:,0],train24_data[:,1])

InvalidIndexError: (slice(None, None, None), 0)

### KMeans

In [None]:
# Create Kmeans Model
Kmeans = Pipeline([('scaler', StandardScaler()),('kmeans',KMeans())])
Kmeans

### DBSCAN

In [None]:
# Create DBSCAN Model
dbscan = Pipeline([('scaler',StandardScaler()),('dbscan', DBSCAN())])
dbscan