# 1. Import modules and load dataset

In [None]:
#import
import scipy.io
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize

import matplotlib.pyplot as plt

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="6"
print (torch.cuda.device_count())

In [None]:
#load features
features = np.load("/l/ear/electrode/users/zz47/wiki_crop/features.npy")
target_classes = np.load("/l/ear/electrode/users/zz47/wiki_crop/target_classes.npy")

In [None]:
#use base_model to extract feature from image with batchsize=100
mean = torch.tensor([129.186279296875, 104.76238250732422, 93.59396362304688]).view(1, -1, 1, 1)
def extract_features(images, model):
    model.eval()
    N = np.shape(images)[0]
    if len(images.shape) == 4:
        new_features = np.zeros([N,2622])
    else:
        new_features = np.zeros([N,64])
    for i in range(0,N,100):
        if (N-i)>=100:
            batch_size = 100
        else:
            batch_size = N-i
        if len(images.shape) == 4:
            temp_feature = torch.Tensor(np.transpose(images[i:i+batch_size,:,:,:], [0,3,1,2]))
            temp_feature.sub_(mean)
            temp_feature /= 255.0
            temp_feature = temp_feature.to('cuda')
        else:
            temp_feature = torch.Tensor(images[i:i+batch_size,:]).to('cuda')
        
        with torch.no_grad():   
            temp_feature = model(temp_feature)
        new_features[i:i+batch_size,:] = temp_feature.to('cpu').detach().numpy()
    return new_features

In [None]:
#Nearest Neighbor
def l1_1NN(matrix, vector):
    diff_matrix = matrix - vector
    diff_vector = np.sum(np.abs(diff_matrix),axis=1)
    minidx = np.argmin(diff_vector)
    return minidx

# 2. Preparing dataset

In [None]:
scaler = StandardScaler()
scaler.fit(target_classes.reshape(-1, 1))

In [None]:
#novel setting
from sklearn.model_selection import KFold
test_mask1 = target_classes < 20
X_test1 = features[test_mask1,:,:,:]
y_test1 = target_classes[test_mask1]
test_mask2 = (target_classes >= 50) & (target_classes < 70)
X_test2 = features[test_mask2,:,:,:]
y_test2 = target_classes[test_mask2]
test_mask3 = target_classes >= 70
X_test3 = features[test_mask3,:,:,:]
y_test3 = target_classes[test_mask3]

train_mask = ((target_classes < 50) & (target_classes >= 20))
X_train = features[train_mask,:,:,:]
y_train = target_classes[train_mask]

kf = KFold(n_splits=10, shuffle=True, random_state=100)
kf.get_n_splits(X_train)
for i, (train_index, val_index) in enumerate(kf.split(X_train)):
    print (i)
    #fold 8
    if i == 8:
        X_train, X_val = X_train[train_index], X_train[val_index]
        y_train, y_val = y_train[train_index], y_train[val_index]


In [None]:
#check number of samples
print (X_train.shape)
print (X_val.shape)
print (X_test1.shape)
print (X_test2.shape)
print (X_test3.shape)

In [None]:
scaled_y_train = scaler.transform(y_train.reshape(-1, 1))[:,0]
scaled_y_val = scaler.transform(y_val.reshape(-1, 1))[:,0]
scaled_y_test1 = scaler.transform(y_test1.reshape(-1, 1))[:,0]
scaled_y_test2 = scaler.transform(y_test2.reshape(-1, 1))[:,0]
scaled_y_test3 = scaler.transform(y_test3.reshape(-1, 1))[:,0]

In [None]:
def inverse_transform(y, scaler):
    #same as scaler.inverse_transform
    return y * np.sqrt(scaler.var_) + scaler.mean_

In [None]:
#load base_model
class Vgg_face(nn.Module):

    def __init__(self):
        super(Vgg_face, self).__init__()
        self.meta = {'mean': [129.186279296875, 104.76238250732422, 93.59396362304688],
                     'std': [1, 1, 1],
                     'imageSize': [224, 224, 3]}
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
        self.fc6 = nn.Linear(in_features=25088, out_features=4096, bias=True)
        self.relu6 = nn.ReLU(inplace=True)
        self.dropout6 = nn.Dropout(p=0.5)
        self.fc7 = nn.Linear(in_features=4096, out_features=4096, bias=True)
        self.relu7 = nn.ReLU(inplace=True)
        self.dropout7 = nn.Dropout(p=0.5)
        self.fc8 = nn.Linear(in_features=4096, out_features=2622, bias=True)

    def forward(self, x0):
        x1 = self.conv1_1(x0)
        x2 = self.relu1_1(x1)
        x3 = self.conv1_2(x2)
        x4 = self.relu1_2(x3)
        x5 = self.pool1(x4)
        x6 = self.conv2_1(x5)
        x7 = self.relu2_1(x6)
        x8 = self.conv2_2(x7)
        x9 = self.relu2_2(x8)
        x10 = self.pool2(x9)
        x11 = self.conv3_1(x10)
        x12 = self.relu3_1(x11)
        x13 = self.conv3_2(x12)
        x14 = self.relu3_2(x13)
        x15 = self.conv3_3(x14)
        x16 = self.relu3_3(x15)
        x17 = self.pool3(x16)
        x18 = self.conv4_1(x17)
        x19 = self.relu4_1(x18)
        x20 = self.conv4_2(x19)
        x21 = self.relu4_2(x20)
        x22 = self.conv4_3(x21)
        x23 = self.relu4_3(x22)
        x24 = self.pool4(x23)
        x25 = self.conv5_1(x24)
        x26 = self.relu5_1(x25)
        x27 = self.conv5_2(x26)
        x28 = self.relu5_2(x27)
        x29 = self.conv5_3(x28)
        x30 = self.relu5_3(x29)
        x31_preflatten = self.pool5(x30)
        x31 = x31_preflatten.contiguous().view(x31_preflatten.size(0), -1)
        x32 = self.fc6(x31)
        x33 = self.relu6(x32)
        x34 = self.dropout6(x33)
        x35 = self.fc7(x34)
        x36 = self.relu7(x35)
        x37 = self.dropout7(x36)
        x38 = self.fc8(x37)
        return x38

def vgg_face(weights_path=None, **kwargs):
    """
    load imported model instance

    Args:
        weights_path (str): If set, loads model weights from the given path
    """
    model = Vgg_face()
    if weights_path:
        state_dict = torch.load(weights_path)
        model.load_state_dict(state_dict)
    return model

#base_model = nn.DataParallel(vgg_face("/l/ear/electrode/users/zz47/vgg_face_dag.pth")).to('cuda')
base_model = vgg_face("/l/ear/electrode/users/zz47/vgg_face_dag.pth").to('cuda')
base_model.eval()

In [None]:
#use base_model to extract feature from image with batchsize=100
X_train_features = extract_features(X_train, base_model)
X_val_features = extract_features(X_val, base_model)
X_test1_features = extract_features(X_test1, base_model)
X_test2_features = extract_features(X_test2, base_model)
X_test3_features = extract_features(X_test3, base_model)

print (np.shape(X_train_features))
print (np.shape(X_val_features))
print (np.shape(X_test1_features))
print (np.shape(X_test2_features))
print (np.shape(X_test3_features))

In [None]:
#random guess
#use average of training set to guess test set
print (np.mean(np.abs(y_test1-np.mean(y_train))))
print (np.mean(np.abs(y_test2-np.mean(y_train))))
print (np.mean(np.abs(y_test3-np.mean(y_train))))

# 3. Baseline model

In [None]:
#Regular dataset
class FeatureDataset(Dataset):

    def __init__(self, x, y, transform=None):
        self.x = x
        self.y = y
        self.transform = transform

    def __len__(self):
        return np.shape(self.x)[0]

    def __getitem__(self, idx):
        feature = self.x[idx,:].astype('float32')
        target_class = self.y[idx].astype('float32')

        return feature, target_class

train_dataset = FeatureDataset(X_train_features, scaled_y_train)
val_dataset = FeatureDataset(X_val_features, scaled_y_val)
test1_dataset = FeatureDataset(X_test1_features, scaled_y_test1)
test2_dataset = FeatureDataset(X_test2_features, scaled_y_test2)
test3_dataset = FeatureDataset(X_test3_features, scaled_y_test3)

train_dataloader = DataLoader(train_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
val_dataloader = DataLoader(val_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
test1_dataloader = DataLoader(test1_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
test2_dataloader = DataLoader(test2_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
test3_dataloader = DataLoader(test3_dataset, batch_size=50,
                        shuffle=True, num_workers=8)

In [None]:
#baseline model
class Base_regression(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2622, 512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, 16)
        self.dropout2 = nn.Dropout(p=0.5)
        self.relu2 = nn.ReLU(inplace=True)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu2(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

base_regression = Base_regression().to('cuda')
criterion = nn.L1Loss()
optimizer = optim.Adam(base_regression.parameters(), lr=1e-4)

In [None]:
def eval(dataloader, model):
    model.eval()
    count = 0
    criterion = nn.L1Loss()
    total_loss = 0.0
    for i, (x, y) in enumerate(dataloader):
        loss = 0.0
        X_feature = x.to('cuda')
        y = inverse_transform(y, scaler)
        with torch.no_grad():
            y_predict = model(X_feature).squeeze().to('cpu').detach()
            y_predict = inverse_transform(y_predict, scaler)
        loss = criterion(y_predict, y)
        total_loss += loss.item()*y.shape[0]
        count += y.shape[0]
    return (total_loss/count)
#eval(test_dataloader, base_regression)

In [None]:
epochs = 50
best_val_loss = 1e8
for epoch in range(epochs):
    base_regression.train()
    print ("Training epoch: {}".format(epoch))
    running_loss = 0
    for i, (x, y) in enumerate(train_dataloader):
        optimizer.zero_grad()
        loss = 0
        X_feature = x.to('cuda')
        y_predict = base_regression(X_feature).squeeze().to('cpu')
        loss = criterion(y_predict, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    val_loss = eval(val_dataloader, base_regression)
    if val_loss < best_val_loss:
        print ("New best validation loss: {}, saving model.".format(val_loss))
        best_val_loss = val_loss
        torch.save(base_regression.state_dict(), "/l/ear/electrode/users/zz47/adaptation_experiments_final/base_regression_novel8.pth")

In [None]:
base_regression.load_state_dict(torch.load("/l/ear/electrode/users/zz47/adaptation_experiments_final/base_regression_novel8.pth"))
print (eval(test1_dataloader, base_regression))
print (eval(test2_dataloader, base_regression))
print (eval(test3_dataloader, base_regression))

# 4. l1 retrieval+adaptation

In [None]:
#Finding nearest neighbor using l1 distance
X_val_refidx = []
X_val_features_norm = normalize(X_val_features, axis=1, norm='l1')
X_train_features_norm = normalize(X_train_features, axis=1, norm='l1')
for i in range(np.shape(X_val_features_norm)[0]):
    if (i)%100==0:
        print (i)
    X_val_refidx.append(l1_1NN(X_train_features_norm, X_val_features_norm[i]))
print ("done")

np.save("/l/ear/electrode/users/zz47/adaptation_experiments_final/X_val_refidx_l1_novel8.npy", X_val_refidx)

In [None]:
#Finding nearest neighbor using l1 distance
X_test1_refidx = []
X_test1_features_norm = normalize(X_test1_features, axis=1, norm='l1')
X_train_features_norm = normalize(X_train_features, axis=1, norm='l1')
for i in range(np.shape(X_test1_features_norm)[0]):
    if (i)%100==0:
        print (i)
    X_test1_refidx.append(l1_1NN(X_train_features_norm, X_test1_features_norm[i]))
print ("done")

np.save("/l/ear/electrode/users/zz47/adaptation_experiments_final/X_test1_refidx_l1_novel8.npy", X_test1_refidx)

In [None]:
#Finding nearest neighbor using l1 distance
X_test2_refidx = []
X_test2_features_norm = normalize(X_test2_features, axis=1, norm='l1')
X_train_features_norm = normalize(X_train_features, axis=1, norm='l1')
for i in range(np.shape(X_test2_features_norm)[0]):
    if (i)%100==0:
        print (i)
    X_test2_refidx.append(l1_1NN(X_train_features_norm, X_test2_features_norm[i]))
print ("done")

np.save("/l/ear/electrode/users/zz47/adaptation_experiments_final/X_test2_refidx_l1_novel8.npy", X_test2_refidx)

In [None]:
#Finding nearest neighbor using l1 distance
X_test3_refidx = []
X_test3_features_norm = normalize(X_test3_features, axis=1, norm='l1')
X_train_features_norm = normalize(X_train_features, axis=1, norm='l1')
for i in range(np.shape(X_test3_features_norm)[0]):
    if (i)%100==0:
        print (i)
    X_test3_refidx.append(l1_1NN(X_train_features_norm, X_test3_features_norm[i]))
print ("done")

np.save("/l/ear/electrode/users/zz47/adaptation_experiments_final/X_test3_refidx_l1_novel8.npy", X_test3_refidx)

In [None]:
#find reference for each val sample
X_val_ref = X_train_features[X_val_refidx]
y_val_ref = scaled_y_train[X_val_refidx]
X_val_diff = np.concatenate((X_val_features, X_val_ref), axis=1)
y_val_diff = scaled_y_val-y_val_ref

In [None]:
#find reference for each test sample
X_test1_ref = X_train_features[X_test1_refidx]
y_test1_ref = scaled_y_train[X_test1_refidx]
X_test1_diff = np.concatenate((X_test1_features, X_test1_ref), axis=1)
y_test1_diff = scaled_y_test1-y_test1_ref

In [None]:
#find reference for each test sample
X_test2_ref = X_train_features[X_test2_refidx]
y_test2_ref = scaled_y_train[X_test2_refidx]
X_test2_diff = np.concatenate((X_test2_features, X_test2_ref), axis=1)
y_test2_diff = scaled_y_test2-y_test2_ref

In [None]:
#find reference for each test sample
X_test3_ref = X_train_features[X_test3_refidx]
y_test3_ref = scaled_y_train[X_test3_refidx]
X_test3_diff = np.concatenate((X_test3_features, X_test3_ref), axis=1)
y_test3_diff = scaled_y_test3-y_test3_ref

In [None]:
#this is retrieval loss
retrieval_l1_loss = np.mean(abs(inverse_transform(scaled_y_test1, scaler)-inverse_transform(y_test1_ref, scaler)))
print (retrieval_l1_loss)
retrieval_l1_loss = np.mean(abs(inverse_transform(scaled_y_test2, scaler)-inverse_transform(y_test2_ref, scaler)))
print (retrieval_l1_loss)
retrieval_l1_loss = np.mean(abs(inverse_transform(scaled_y_test3, scaler)-inverse_transform(y_test3_ref, scaler)))
print (retrieval_l1_loss)

In [None]:
#create new val dataset with reference
class TrainFeatureDataset(Dataset):

    def __init__(self, x, y, transform=None):
        self.x = x
        self.y = y
        self.transform = transform

    def __len__(self):
        return np.shape(self.x)[0]

    def __getitem__(self, idx):
        featuresN = np.shape(self.x)[0]
        r = [*range(0,idx) ,*range(idx+1, featuresN)]
        refidx = random.choice(r)
        feature = self.x[idx,:].astype('float32')
        reffeature = self.x[refidx,:].astype('float32')
        target_class = self.y[idx].astype('float32')
        reftarget_class = self.y[refidx].astype('float32')
        diff_feature = torch.tensor(np.concatenate((feature, reffeature), axis=0))
        diff_target_class = target_class-reftarget_class
        return diff_feature, diff_target_class

diff_train_dataset = TrainFeatureDataset(X_train_features, scaled_y_train)

diff_train_dataloader = DataLoader(diff_train_dataset, batch_size=50,
                        shuffle=True, num_workers=8)

In [None]:
class ValFeatureDataset(Dataset):

    def __init__(self, x, y, ref_x, ref_y, transform=None):
        self.x = x
        self.y = y
        self.ref_x = ref_x
        self.ref_y = ref_y
        self.transform = transform

    def __len__(self):
        return np.shape(self.x)[0]

    def __getitem__(self, idx):
        feature = np.concatenate((self.x[idx,:], self.ref_x[idx,:]), axis=0).astype('float32')
        feature = torch.tensor(feature)
        target_class = (self.y[idx]-self.ref_y[idx]).astype('float32')
        
        return feature, target_class

diff_val_dataset = ValFeatureDataset(X_val_features, scaled_y_val ,X_val_ref, y_val_ref)
diff_val_dataloader = DataLoader(diff_val_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
diff_test1_dataset = ValFeatureDataset(X_test1_features, scaled_y_test1 ,X_test1_ref, y_test1_ref)
diff_test1_dataloader = DataLoader(diff_test1_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
diff_test2_dataset = ValFeatureDataset(X_test2_features, scaled_y_test2 ,X_test2_ref, y_test2_ref)
diff_test2_dataloader = DataLoader(diff_test2_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
diff_test3_dataset = ValFeatureDataset(X_test3_features, scaled_y_test3 ,X_test3_ref, y_test3_ref)
diff_test3_dataloader = DataLoader(diff_test3_dataset, batch_size=50,
                        shuffle=True, num_workers=8)

In [None]:
class Adaptation_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(5244, 512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, 16)
        self.dropout2 = nn.Dropout(p=0.5)
        self.relu2 = nn.ReLU(inplace=True)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu2(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

adaptation_model = Adaptation_model().to('cuda')
criterion = nn.L1Loss()
optimizer = optim.Adam(adaptation_model.parameters(), lr=1e-4)

In [None]:
def eval(dataloader, model):
    model.eval()
    count = 0
    criterion = nn.L1Loss()
    total_loss = 0.0
    for i, (x, y) in enumerate(dataloader):
        loss = 0.0
        X_feature = x.to('cuda')
        with torch.no_grad():
            y_predict = model(X_feature).squeeze().to('cpu').detach()
        loss = criterion(y_predict, y) * np.sqrt(scaler.var_)
        total_loss += loss.item()*y.shape[0]
        count += y.shape[0]      
    return (total_loss/(count))

In [None]:
#adaptation model
epochs = 50
best_val_loss = 1e8
for epoch in range(epochs):
    adaptation_model.train()
    print ("Training epoch: {}".format(epoch))
    running_loss = 0
    for i, (x, y) in enumerate(diff_train_dataloader):
        optimizer.zero_grad()
        loss = 0
        X_feature = x.to('cuda')
        y_predict = adaptation_model(X_feature).squeeze().to('cpu')
        loss = criterion(y_predict, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    val_loss = eval(diff_val_dataloader, adaptation_model)
    if val_loss < best_val_loss:
        print ("New best validation loss: {}, saving model.".format(val_loss))
        best_val_loss = val_loss
        torch.save(adaptation_model.state_dict(), "/l/ear/electrode/users/zz47/adaptation_experiments_final/adaptation_model_novel8.pth")
    

In [None]:
adaptation_model.load_state_dict(torch.load("/l/ear/electrode/users/zz47/adaptation_experiments_final/adaptation_model_novel8.pth"))
print (eval(diff_test1_dataloader, adaptation_model))
print (eval(diff_test2_dataloader, adaptation_model))
print (eval(diff_test3_dataloader, adaptation_model))

# 5. Learned Distance (Siamese)

In [None]:
class Siamese_dist(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2622, 512, bias=True)
        self.relu1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, 64, bias=True)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.fc2(x)
        return x

siamese_dist = Siamese_dist().to('cuda')
criterion = nn.L1Loss()
optimizer = optim.Adam(siamese_dist.parameters(), lr=1e-4)

In [None]:
class TrainFeatureDataset(Dataset):

    def __init__(self, x, y, transform=None):
        self.x = x
        self.y = y
        self.transform = transform

    def __len__(self):
        return np.shape(self.x)[0]

    def __getitem__(self, idx):
        featuresN = np.shape(self.x)[0]
        target_class = self.y[idx].astype('float32')
        feature = self.x[idx,:].astype('float32')

        posmask = self.y == target_class
        rpos = np.array(np.where(posmask))[0,:]
        posidx = random.choice(rpos)
        posfeature = self.x[posidx,:].astype('float32')
        postarget_class = self.y[posidx].astype('float32')

        negmask = (self.y <= (target_class-10)) | (self.y >= (target_class+10))
        rneg = np.array(np.where(negmask))[0,:]
        negidx = random.choice(rneg)
        negfeature = self.x[negidx,:].astype('float32')
        
        return feature, posfeature, negfeature

siamese_train_dataset = TrainFeatureDataset(X_train_features, y_train)

siamese_train_dataloader = DataLoader(siamese_train_dataset, batch_size=50,
                        shuffle=True, num_workers=8)

In [None]:
def eval_siamese(y_val, X_val_features):
    Siamese_dist.eval()
    X_train_features_64 = extract_features(X_train_features, siamese_dist)
    X_val_features_64 = extract_features(X_val_features, siamese_dist)
    X_val_refidx_siamese = []
    for i in range(np.shape(X_val_features_64)[0]):
        X_val_refidx_siamese.append(l1_1NN(X_train_features_64, X_val_features_64[i]))
    
    y_val_ref_siamese = y_train[X_val_refidx_siamese]
    mae = np.mean(np.abs(y_val-y_val_ref_siamese))
    
    return (mae, X_val_refidx_siamese)

In [None]:
epochs = 50
triplet_loss = nn.TripletMarginLoss(margin=1, p=1.0)
best_mae = 1e8
for epoch in range(epochs):
    siamese_dist.train()
    print ("Training epoch: {}".format(epoch))
    running_loss = 0
    for i, (x, posx, negx) in enumerate(siamese_train_dataloader):
        #print (i)
        optimizer.zero_grad()
        loss = 0
        x = x.to('cuda')
        posx = posx.to('cuda')
        negx = negx.to('cuda')
        y = siamese_dist(x).squeeze().to('cpu')
        posy = siamese_dist(posx).squeeze().to('cpu')
        negy = siamese_dist(negx).squeeze().to('cpu')
        loss = triplet_loss(y, posy, negy)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    mae, _ = eval_siamese(y_val, X_val_features)
    if mae < best_mae:
        print ("new best val mae: {}".format(mae))
        best_mae = mae
        torch.save(siamese_dist.state_dict(), "/l/ear/electrode/users/zz47/adaptation_experiments_final/siamese_dist_novel8.pth")

In [None]:
siamese_dist.load_state_dict(torch.load("/l/ear/electrode/users/zz47/adaptation_experiments_final/siamese_dist_novel8.pth"))
_, X_val_refidx_siamese = eval_siamese(y_val, X_val_features)
mae, X_test1_refidx_siamese = eval_siamese(y_test1, X_test1_features)
print (mae)
mae, X_test2_refidx_siamese = eval_siamese(y_test2, X_test2_features)
print (mae)
mae, X_test3_refidx_siamese = eval_siamese(y_test3, X_test3_features)
print (mae)

# 6. Learned Distance (Siamese) + Adaptation

In [None]:
#find reference for each val sample
X_val_ref = X_train_features[X_val_refidx_siamese]
y_val_ref = scaled_y_train[X_val_refidx_siamese]
X_val_diff = np.concatenate((X_val_features, X_val_ref), axis=1)
y_val_diff = scaled_y_val-y_val_ref

In [None]:
#find reference for each test sample
X_test1_ref = X_train_features[X_test1_refidx_siamese]
y_test1_ref = scaled_y_train[X_test1_refidx_siamese]
X_test1_diff = np.concatenate((X_test1_features, X_test1_ref), axis=1)
y_test1_diff = scaled_y_test1-y_test1_ref

In [None]:
X_test2_ref = X_train_features[X_test2_refidx_siamese]
y_test2_ref = scaled_y_train[X_test2_refidx_siamese]
X_test2_diff = np.concatenate((X_test2_features, X_test2_ref), axis=1)
y_test2_diff = scaled_y_test2-y_test2_ref

In [None]:
X_test3_ref = X_train_features[X_test3_refidx_siamese]
y_test3_ref = scaled_y_train[X_test3_refidx_siamese]
X_test3_diff = np.concatenate((X_test3_features, X_test3_ref), axis=1)
y_test3_diff = scaled_y_test3-y_test3_ref

In [None]:
#this is retrieval loss
retrieval_l1_loss = np.mean(abs(inverse_transform(scaled_y_test1, scaler)-inverse_transform(y_test1_ref, scaler)))
print (retrieval_l1_loss)

In [None]:
#create new val dataset with reference
class TrainFeatureDataset(Dataset):

    def __init__(self, x, y, transform=None):
        self.x = x
        self.y = y
        self.transform = transform

    def __len__(self):
        return np.shape(self.x)[0]

    def __getitem__(self, idx):
        featuresN = np.shape(self.x)[0]
        r = [*range(0,idx) ,*range(idx+1, featuresN)]
        refidx = random.choice(r)
        feature = self.x[idx,:].astype('float32')
        reffeature = self.x[refidx,:].astype('float32')
        target_class = self.y[idx].astype('float32')
        reftarget_class = self.y[refidx].astype('float32')
        diff_feature = torch.tensor(np.concatenate((feature, reffeature), axis=0))
        diff_target_class = target_class-reftarget_class
        return diff_feature, diff_target_class

diff_train_dataset = TrainFeatureDataset(X_train_features, scaled_y_train)

diff_train_dataloader = DataLoader(diff_train_dataset, batch_size=50,
                        shuffle=True, num_workers=8)

In [None]:
class ValFeatureDataset(Dataset):

    def __init__(self, x, y, ref_x, ref_y, transform=None):
        self.x = x
        self.y = y
        self.ref_x = ref_x
        self.ref_y = ref_y
        self.transform = transform

    def __len__(self):
        return np.shape(self.x)[0]

    def __getitem__(self, idx):
        feature = np.concatenate((self.x[idx,:], self.ref_x[idx,:]), axis=0).astype('float32')
        feature = torch.tensor(feature)
        target_class = (self.y[idx]-self.ref_y[idx]).astype('float32')
        
        return feature, target_class

diff_val_dataset = ValFeatureDataset(X_val_features, scaled_y_val ,X_val_ref, y_val_ref)
diff_val_dataloader = DataLoader(diff_val_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
diff_test1_dataset = ValFeatureDataset(X_test1_features, scaled_y_test1 ,X_test1_ref, y_test1_ref)
diff_test1_dataloader = DataLoader(diff_test1_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
diff_test2_dataset = ValFeatureDataset(X_test2_features, scaled_y_test2 ,X_test2_ref, y_test2_ref)
diff_test2_dataloader = DataLoader(diff_test2_dataset, batch_size=50,
                        shuffle=True, num_workers=8)
diff_test3_dataset = ValFeatureDataset(X_test3_features, scaled_y_test3 ,X_test3_ref, y_test3_ref)
diff_test3_dataloader = DataLoader(diff_test3_dataset, batch_size=50,
                        shuffle=True, num_workers=8)

In [None]:
class Adaptation_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(5244, 512)
        self.dropout1 = nn.Dropout(p=0.5)
        self.relu1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, 16)
        self.dropout2 = nn.Dropout(p=0.5)
        self.relu2 = nn.ReLU(inplace=True)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu2(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

adaptation_model = Adaptation_model().to('cuda')
criterion = nn.L1Loss()
optimizer = optim.Adam(adaptation_model.parameters(), lr=1e-4)

In [None]:
def eval(dataloader, model):
    model.eval()
    count = 0
    criterion = nn.L1Loss()
    total_loss = 0.0
    for i, (x, y) in enumerate(dataloader):
        loss = 0.0
        X_feature = x.to('cuda')
        with torch.no_grad():
            y_predict = model(X_feature).squeeze().to('cpu').detach()
        loss = criterion(y_predict, y) * np.sqrt(scaler.var_)
        total_loss += loss.item()*y.shape[0]
        count += y.shape[0]      
    return (total_loss/(count))

In [None]:
adaptation_model.load_state_dict(torch.load("/l/ear/electrode/users/zz47/adaptation_experiments_final/adaptation_model_novel8.pth"))
print (eval(diff_test1_dataloader, adaptation_model))
print (eval(diff_test2_dataloader, adaptation_model))
print (eval(diff_test3_dataloader, adaptation_model))