In [1]:
import torch
import numpy as np

import random
import os
import pickle
import matplotlib.pyplot as plt
import copy


def seed_torch(RANDOM_SEED=123):
    random.seed(RANDOM_SEED)
    os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed_all(RANDOM_SEED)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
seed_torch()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from utils import generate_series, temporal_split
from model import HOIST_without_claim
from utils import mse,mae,r2,ccc


In [2]:
mob_mat = pickle.load(open('./data/mob_mat.pkl', 'rb'))
distance_mat = pickle.load(open('./data/distance_mat.pkl', 'rb'))
covid_tensor = pickle.load(open('./data/covid_tensor.pkl', 'rb'))
hospitalizations = pickle.load(open('./data/hospitalizations.pkl', 'rb'))
hos_tensor = pickle.load(open('./data/hos_tensor.pkl', 'rb'))
county_tensor = pickle.load(open('./data/county_tensor.pkl', 'rb'))
feat_name = pickle.load(open('./data/feat_name.pkl', 'rb'))
date_range = np.array(pickle.load(open('./data/date_range.pkl', 'rb')), dtype=np.str)

# Temporal split

In [3]:
covid_tensor = np.expand_dims(covid_tensor, axis=2)
X = np.concatenate([covid_tensor, hos_tensor], axis=2)
y = hospitalizations
X, y = generate_series(X, y, window_size=35, pred_size=28)
date_idx = np.expand_dims(date_range, axis=0)
date_idx = np.expand_dims(date_idx, axis=2)
date_idx, _ = generate_series(date_idx, y, window_size=35, pred_size=28, date=True)

range_idx = (y.mean(1)>0)
county_tensor = county_tensor[range_idx]
y = y[range_idx]
X = X[range_idx]
print(len(y))
mob_mat = mob_mat[range_idx, :][:, range_idx]
distance_mat = distance_mat[range_idx, :][:, range_idx]

y = np.log(y+1)
train_x, val_x, test_x, train_y, val_y, test_y, train_idx, val_idx, test_idx, static, mats, normalize_dict, shuffle_idx = temporal_split(X, y, county_tensor, [mob_mat, distance_mat], 0.2, 0.2, norm='min-max', norm_mat=True)

norm_mob = mats[0]
norm_dist = mats[1]


2299


In [5]:
mae_ = []
mae_exp = []
mse_ = []
mse_exp = []
r2_ = []
r2_exp = []
ccc_ = []
ccc_exp = []

runs = 1
for k in range(runs):
    seed_torch(k)
    model = HOIST_without_claim(5, [4,5,5], 128, device).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    loss_fn = torch.nn.MSELoss(reduction='none')

    epoch = 300
    batch_size = 128
    min_loss = 1e99
    min_epoch = 0

    for i in range(epoch):
        epoch_loss = []
        val_loss = []
        model.train()
        for j in range((len(test_x)//batch_size)+1):
            batch_x = train_x[j*batch_size:(j+1)*batch_size]
            batch_y = train_y[j*batch_size:(j+1)*batch_size]
            batch_x = torch.tensor(batch_x).float().to(device)
            batch_y = torch.tensor(batch_y).float().to(device).unsqueeze(-1)
            batch_static = torch.tensor(static[j*batch_size:(j+1)*batch_size]).float().to(device)
            batch_mob = torch.tensor(norm_mob[j*batch_size:(j+1)*batch_size,:][:,j*batch_size:(j+1)*batch_size]).float().to(device)
            batch_dist = torch.tensor(norm_dist[j*batch_size:(j+1)*batch_size,:][:,j*batch_size:(j+1)*batch_size]).float().to(device)
            batch_mat = torch.cat([batch_mob.unsqueeze(-1), batch_dist.unsqueeze(-1)], dim=2)
            cur_static = [batch_static[:, :4], batch_static[:, 4:9], batch_static[:, 9:14], batch_mat]
            
            optimizer.zero_grad()
            output, _ = model(batch_x, cur_static)
            
            N, T, F = batch_y.shape
            dist = _[0]
            weights = _[1]
            y_p = (weights * batch_x).sum(-1).reshape(N,T,1)*output.detach()
            y_pi = y_p.reshape(N,1,T)
            y_pj = y_p.reshape(1,N,T)
            y_k = ((y_pi * y_pj) * dist.reshape(N,N,1)).sum(1).reshape(N,T,1)
            ising_loss = loss_fn(y_p+y_k, batch_y).mean(1).mean()
            
            loss = loss_fn(output, batch_y).mean(1).mean() + ising_loss
            loss.backward()
            optimizer.step()
            epoch_loss.append(loss.item())
        
        model.eval()
        y_pred = []
        y_true = []
        with torch.no_grad():
            for j in range((len(test_x)//batch_size)+1):
                batch_x = val_x[j*batch_size:(j+1)*batch_size]
                batch_y = val_y[j*batch_size:(j+1)*batch_size]
                batch_x = torch.tensor(batch_x).float().to(device)
                batch_y = torch.tensor(batch_y).float().to(device).unsqueeze(-1)
                batch_static = torch.tensor(static[j*batch_size:(j+1)*batch_size]).float().to(device)
                batch_mob = torch.tensor(norm_mob[j*batch_size:(j+1)*batch_size,:][:,j*batch_size:(j+1)*batch_size]).float().to(device)
                batch_dist = torch.tensor(norm_dist[j*batch_size:(j+1)*batch_size,:][:,j*batch_size:(j+1)*batch_size]).float().to(device)
                batch_mat = torch.cat([batch_mob.unsqueeze(-1), batch_dist.unsqueeze(-1)], dim=2)
                cur_static = [batch_static[:, :4], batch_static[:, 4:9], batch_static[:, 9:14], batch_mat]
                
                output, _ = model(batch_x, cur_static)
                loss = loss_fn(output, batch_y).mean(1).mean()
                y_pred += list(output.squeeze().cpu().detach().numpy())
                y_true += list(batch_y.squeeze().cpu().detach().numpy())
                val_loss.append(loss.item())
        y_pred = np.array(y_pred)
        y_true = np.array(y_true)
        norm_pred = (y_pred * normalize_dict['y'][1]) + normalize_dict['y'][0]
        norm_true = (y_true * normalize_dict['y'][1]) + normalize_dict['y'][0]
        
        cur_mse = mse(norm_true, norm_pred)
        cur_mae = mae(norm_true, norm_pred)
        if i % 100 == 0:
            print('Epoch: %d, Train Loss: %.4f, Val Loss: %.4f, MSE: %.2f, MAE: %.2f'%(i, np.mean(epoch_loss), np.mean(val_loss), cur_mse, cur_mae))
        if cur_mae < min_loss:
            min_loss = cur_mae
            min_epoch = i
            torch.save(model.state_dict(), './model/hoist_%d.pth'%k)
            
    y_pred = []
    y_true = []
    weight_score = []
    batch_size = 128
    #Load state dict
    model.load_state_dict(torch.load('./model/hoist_%d.pth'%k))
    model.eval()

    for j in range((len(test_x)//batch_size)+1):
        batch_x = test_x[j*batch_size:(j+1)*batch_size]
        batch_y = test_y[j*batch_size:(j+1)*batch_size]
        batch_x = torch.tensor(batch_x).float().to(device)
        batch_y = torch.tensor(batch_y).float().to(device).unsqueeze(-1)
        batch_static = torch.tensor(static[j*batch_size:(j+1)*batch_size]).float().to(device)
        batch_mob = torch.tensor(norm_mob[j*batch_size:(j+1)*batch_size,:][:,j*batch_size:(j+1)*batch_size]).float().to(device)
        batch_dist = torch.tensor(norm_dist[j*batch_size:(j+1)*batch_size,:][:,j*batch_size:(j+1)*batch_size]).float().to(device)
        batch_mat = torch.cat([batch_mob.unsqueeze(-1), batch_dist.unsqueeze(-1)], dim=2)
        cur_static = [batch_static[:, :4], batch_static[:, 4:9], batch_static[:, 9:14], batch_mat]
        output, _ = model(batch_x, cur_static)
        
        y_pred += list(output.squeeze().cpu().detach().numpy())
        y_true += list(batch_y.squeeze().cpu().detach().numpy())
        weight_score += list(_[1].squeeze().cpu().detach().numpy())
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    weight_score = np.array(weight_score)


    norm_pred = (y_pred * normalize_dict['y'][1]) + normalize_dict['y'][0]
    norm_true = (y_true * normalize_dict['y'][1]) + normalize_dict['y'][0]
    
    print('Best Epoch: %d, Test MSE: %.2f, MAE: %.2f, R2: %.2f, CCC: %.2f'%(min_epoch, mse(norm_true, norm_pred), mae(norm_true, norm_pred), r2(norm_true, norm_pred), ccc(norm_true, norm_pred)))
    mae_.append(mae(norm_true, norm_pred))
    mae_exp.append(mae(np.exp(norm_true), np.exp(norm_pred)))
    mse_.append(mse(norm_true, norm_pred))
    mse_exp.append(mse(np.exp(norm_true), np.exp(norm_pred)))
    r2_.append(r2(norm_true, norm_pred))
    r2_exp.append(r2(np.exp(norm_true), np.exp(norm_pred)))
    ccc_.append(ccc(norm_true, norm_pred))
    ccc_exp.append(ccc(np.exp(norm_true), np.exp(norm_pred)))

Epoch: 0, Train Loss: 1.9760, Val Loss: 0.8686, MSE: 5.91, MAE: 2.06
Epoch: 100, Train Loss: 1.0583, Val Loss: 0.2447, MSE: 1.67, MAE: 0.99
Epoch: 200, Train Loss: 1.0449, Val Loss: 0.2491, MSE: 1.70, MAE: 1.00
Best Epoch: 118, Test MSE: 1.84, MAE: 1.06, R2: 0.74, CCC: 0.87


In [7]:
print('MSE: %.2f'%np.mean(mse_exp))
print('MAE: %.2f'%np.mean(mae_exp))
print('R2: %.2f'%np.mean(r2_exp))
print('CCC: %.2f'%np.mean(ccc_exp))

MSE: 5420623.50
MAE: 555.40
R2: 0.69
CCC: 0.79
