In [None]:
import os
import sys
import time
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload

from sklearn.metrics import mean_squared_error, r2_score

import torch
import torch.nn as nn
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.tuner import ContinuousParameter, HyperparameterTuner
from sagemaker.amazon.amazon_estimator import get_image_uri

module_path = os.path.abspath(os.path.join('../py-conjugated/'))
if module_path not in sys.path:
    sys.path.append(module_path)
import morphology_networks as net
import model_training as train
import model_testing as test
import physically_informed_loss_functions as pilf
import network_utils as nuts

torch.manual_seed(28)

In [None]:
data_bucket = 'sagemaker-us-east-2-362637960691'
train_data_path = 'py-conjugated/m2py_labels/OPV_labels/train_set/'
test_data_path = 'py-conjugated/m2py_labels/OPV_labels/test_set/'
model_states_path = 's3://{}/py_conjugated/model_states/OPV/OPV_encoder_1/'.format(data_bucket)

In [None]:
%autoreload

train_dataset = nuts.OPV_ImDataset(data_bucket, train_data_path)
test_dataset = nuts.OPV_ImDataset(data_bucket, test_data_path)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 26)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 10)

print(len(train_dataloader))

In [None]:
in_dims = int(x_train_tensor.size(1)) #number of x channels
out_dims = y_test.shape[1] #number of predicted features

In [None]:
def fit(model, criterion, lr, epochs = 30):
    
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    
    train_epoch_pce_losses = []
    train_epoch_voc_losses = []
    train_epoch_jsc_losses = []
    train_epoch_ff_losses = []
    train_loss = []
    
    epoch_pce_losses = []
    epoch_voc_losses = []
    epoch_jsc_losses = []
    epoch_ff_losses = []
    epoch_loss = []
    
    epoch_pce_accs = []
    epoch_voc_accs = []
    epoch_jsc_accs = []
    epoch_ff_accs = []
    epoch_accs = []

    epoch_pce_r2s = []
    epoch_voc_r2s = []
    epoch_jsc_r2s = []
    epoch_ff_r2s = []
    epoch_r2s = []
    
    for epoch in range(epochs):
        train_losses = train.train_OPV_m2py_model(model = model,
                                                training_data_set = train_dataloader,
                                               criterion = criterion,
                                               optimizer = optimizer)

        train_epoch_pce_losses.append(train_losses[0])
        train_epoch_voc_losses.append(train_losses[1])
        train_epoch_jsc_losses.append(train_losses[2])
        train_epoch_ff_losses.append(train_losses[3])
        tot_trn_loss = sum(train_losses)
        train_loss.append(tot_trn_loss)

        test_losses, test_accs, test_r2s = test.eval_OPV_m2py_model(model = model,
                                                                   test_data_set = test_dataloader,
                                                                   criterion = criterion)

        epoch_pce_losses.append(test_losses[0])
        epoch_voc_losses.append(test_losses[1])
        epoch_jsc_losses.append(test_losses[2])
        epoch_ff_losses.append(test_losses[3])
        tot_tst_loss = sum(test_losses)
        epoch_loss.append(tot_tst_loss)
        
        epoch_pce_accs.append(test_accs[0])
        epoch_voc_accs.append(test_accs[1])
        epoch_jsc_accs.append(test_accs[2])
        epoch_ff_accs.append(test_accs[3])
        tot_tst_acc = sum(test_accs)
        epoch_accs.append(tot_tst_acc)
        
        epoch_pce_r2s.append(test_r2s[0])
        epoch_voc_r2s.append(test_r2s[1])
        epoch_jsc_r2s.append(test_r2s[2])
        epoch_ff_r2s.append(test_r2s[3])
        tot_tst_r2 = sum(test_r2s)
        epoch_r2s.append(tot_tst_r2)
        
        print('Finished epoch ', epoch)
        
    best_loss_indx = epoch_loss.index(min(epoch_loss))
    best_acc_indx = epoch_accs.index(min(epoch_accs))
    best_r2_indx = epoch_r2s.index(max(epoch_r2s))
    
    fit_results = {
        'lr': lr,
        'best_loss_epoch': best_loss_indx,
        'best_acc_epoch': best_acc_indx,
        'best_r2_epoch': best_r2_indx,
        'pce_loss': epoch_pce_losses,
        'voc_loss': epoch_voc_losses,
        'jsc_loss': epoch_jsc_losses,
        'ff_loss': epoch_ff_losses,
        'test_losses': epoch_loss,        
        'pce_acc': epoch_pce_accs,
        'voc_acc': epoch_voc_accs,
        'jsc_acc': epoch_jsc_accs,
        'ff_acc': epoch_ff_accs,
        'test_accs': epoch_accs,
        'pce_r2': epoch_pce_r2s,
        'voc_r2': epoch_voc_r2s,
        'jsc_r2': epoch_jsc_r2s,
        'ff_r2': epoch_ff_r2s,
        'test_r2s': epoch_r2s,
        'train_pce_loss': train_epoch_pce_losses,
        'train_voc_loss': train_epoch_voc_losses,
        'train_jsc_loss': train_epoch_jsc_losses,
        'train_ff_loss': train_epoch_ff_losses
    }

    return fit_results

In [None]:
%autoreload

criterion = nn.MSELoss()

lrs = np.linspace(0.01, 0.055, 30)

In [None]:
%autoreload

lr_opt = {}

for i, lr in enumerate(lrs):
    print(f'  optimization loop {i}')
    print('-----------------------------')
    
    model = net.OPV_m2py_NN(8)
    
    lr_opt[i] = fit(model, criterion, lr, epochs = 15)
    
lr_opt

In [None]:
with open('../ipynb/Networks/json/20200723_OPVNN4_hpo_results-r2.json', 'w') as fp:
    json.dump(lr_opt, fp)

------------------------------

## After Hyperparameter optimization:

The best model conditions are used to train a final, best performing model, which will be used to produce final results and figures. This is done in the following cells

------------------------------

In [None]:
criterion = nn.MSELoss()
# best_lr = 0.6057142857142856 
# best_epochs = 5

best_model = net.OPV_m2py_NN(8)
best_model.apply(nuts.init_weights)

In [None]:
best_results = fit(best_model, criterion, best_lr, best_epochs)

with open('../ipynb/Networks/json/OPVNN4_best_results.json', 'w') as fp:
    json.dump(best_results, fp)

In [None]:
with open('../ipynb/Networks/json/OPVNN4_best_results.json') as json_file:
    results_json = json.load(json_file)

In [None]:
nuts.plot_fit_results(best_results)

In [None]:
%autoreload

best_model.eval()

with torch.no_grad():
    for images, labels in test_dataloader:
        
        pce_pred, voc_pred, jsc_pred, ff_pred, im_enc = best_model(images)
        

mape = pilf.reg_MAPE()

pce_mse = mean_squared_error(pce_pred.data.numpy(), labels[:,0].data.numpy())
pce_r2 = r2_score(pce_pred.data.numpy(), labels[:,0].data.numpy())
pce_mape = mape.forward(pce_pred.data.numpy(), labels[:,0].data.numpy())

print(f'mse = {pce_mse}, mape = {pce_mape}, r2 = {pce_r2}')

voc_mse = mean_squared_error(voc_pred.data.numpy(), labels[:,1].data.numpy())
voc_r2 = r2_score(voc_pred.data.numpy(), labels[:,1].data.numpy())
voc_mape = mape.forward(voc_pred.data.numpy(), labels[:,1].data.numpy())

print(f'mse = {voc_mse}, mape = {voc_mape}, r2 = {voc_r2}')

jsc_mse = mean_squared_error(jsc_pred.data.numpy(), labels[:,2].data.numpy())
jsc_r2 = r2_score(jsc_pred.data.numpy(), labels[:,2].data.numpy())
jsc_mape = mape.forward(jsc_pred.data.numpy(), labels[:,2].data.numpy())

print(f'mse = {jsc_mse}, mape = {jsc_mape}, r2 = {jsc_r2}')

ff_mse = mean_squared_error(ff_pred.data.numpy(), labels[:,3].data.numpy())
ff_r2 = r2_score(ff_pred.data.numpy(), labels[:,3].data.numpy())
ff_mape = mape.forward(ff_pred.data.numpy(), labels[:,3].data.numpy())

print(f'mse = {ff_mse}, mape = {ff_mape}, r2 = {ff_r2}')

In [None]:
nuts.plot_OPV_parity(labels[:,0], pce_pred, labels[:,1], voc_pred,
                     labels[:,2], jsc_pred, labels[:,3], ff_pred)