In [1]:
from numpy import asarray, load, nanargmin, absolute
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC

In [2]:
# load dataset
data = load('ubipr_vgg_flattened_embeddings.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
i = -1
testX = list(testX)
testy = list(testy)
del testX[2911]
del testy[2911]
testX = asarray(testX)
testy = asarray(testy)
print(trainy.shape)
print(testy.shape)

print(trainy[0])

(6967,)
(3231,)
1_L


In [9]:
print(trainX[0][:100])
# print(trainX[1])
# print(trainX[2])

[ 1.6719474   0.          0.77077085  0.          6.1977935   0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          5.128816    0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          6.6873446   0.          0.
 19.556269    0.          0.          0.          0.          0.
  0.          0.          0.12780595  0.          0.          0.
  0.          0.          0.          0.          0.          0.
  9.543406    3.690608    0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.71279526  0.          0.          0.
  0.         27.484472    0.          0.          0.          0.
  0.          0.          0.          2.4446998   0.          0.
  0.          0.          0.          0.          0.          0.
  0.          1.497754   

In [3]:
# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
print(testy)
trainy = out_encoder.transform(trainy)
print(trainy)
testy = out_encoder.transform(testy)
print(testy)

# fit model
# from sklearn
model = SVC(kernel='linear', probability=True, verbose = True)

['1_L' '1_L' '1_L' ... '99_R' '99_R' '99_R']
[110 110 110 ... 503 503 503]
[110 110 110 ... 503 503 503]


In [None]:
print(len(trainX))
model.fit(trainX, trainy)

6967
[LibSVM]

In [None]:
import pickle
print("Saving")
filename = 'ubipr_4dvgg16_svm_trained_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [None]:
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)
# score
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
# summarize
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, threshold = roc_curve(testy, yhat_test, pos_label=1)
fnr = 1 - tpr
print(fpr[nanargmin(absolute((fnr - fpr)))])

In [None]:
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import ZeroPadding2D,Convolution2D,MaxPooling2D
from tensorflow.keras.layers import Dense,Dropout,Softmax,Flatten,Activation,BatchNormalization
from tensorflow.keras.preprocessing.image import load_img,img_to_array
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import tensorflow as tf

In [None]:
classifier_model=Sequential()
classifier_model.add(Dense(units=100,input_dim=trainX.shape[1],kernel_initializer='glorot_uniform'))
classifier_model.add(BatchNormalization())
classifier_model.add(Activation('tanh'))
classifier_model.add(Dropout(0.3))
classifier_model.add(Dense(units=10,kernel_initializer='glorot_uniform'))
classifier_model.add(BatchNormalization())
classifier_model.add(Activation('tanh'))
classifier_model.add(Dropout(0.2))
classifier_model.add(Dense(units=506,kernel_initializer='he_uniform'))
classifier_model.add(Activation('softmax'))
classifier_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer='nadam',metrics=['accuracy'])

In [None]:
classifier_model.fit(trainX,trainy,epochs=300,validation_data=(testX,testy))

In [None]:
len(set(trainy))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators = 25)
rf.fit(trainX, trainy)

In [None]:
import pickle
print("Saving")
filename = 'ubipr_vgg16_rf_trained_model.sav'
pickle.dump(rf, open(filename, 'wb'))

In [None]:
yhat_train = rf.predict(trainX)
yhat_test = rf.predict(testX)
# score
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
# summarize
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, threshold = roc_curve(testy, yhat_test, pos_label=1)
fnr = 1 - tpr
print(1-fpr[nanargmin(absolute((fnr - fpr)))])

In [None]:
import numpy as np
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.sqrt(np.dot(x, x)) * np.sqrt(np.dot(y, y)))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=25, metric = cosine_similarity)
knn.fit(trainX, trainy)

In [None]:
yhat_train = knn.predict(trainX)
yhat_test = knn.predict(testX)
# score
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
# summarize
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
trainX[0][0]