In [74]:
import geopandas as gpd

import numpy as np
import pandas as pd
from pandas import IndexSlice as idx
import tensorflow as tf
import sys
import os
import glob

from functools import partial



code_dir = '/cluster/home/kheuto01/code/opioid-overdose-models/perturbations/'
sys.path.append(code_dir)
code_dir = '/cluster/home/kheuto01/code/opioid-overdose-models/diff_bpr'
sys.path.append(code_dir)
from top_k import top_k_idx
from make_datasets import make_data
from bpr_model import PerturbedBPRModel


code_dir = '/cluster/home/kheuto01/code/opioid-overdose-models/'
sys.path.append(code_dir)
from zinf_gp.metrics import normcdf, fixed_top_X



from perturbations import perturbed
from bpr import bpr_variable_k_no_ties



In [2]:
data_path='/cluster/tufts/hugheslab/datasets/NSF_OD/results_20220606_update/clean_annual_tract/'

In [4]:
epochs = 5000
seed = 360
time_window = 10
first_train_eval_year = 2013
last_train_eval_year = 2017
batch_dim_size = last_train_eval_year - first_train_eval_year + 1
validation_year = 2018
first_test_year = 2019
last_test_year = 2020

tf.random.set_seed(seed)


timestep_col = 'timestep'
geography_col = 'geoid'
outcome_col = 'deaths'

x_idx_cols = [geography_col, 'lat', 'lon', timestep_col,
              'theme_1_pc', 'theme_2_pc', 'theme_3_pc', 'theme_4_pc',
              'svi_pctile', 'year',
              'neighbor_t', 'self_t-1']
y_idx_cols = [geography_col, timestep_col, outcome_col]
"""features_only = ['lat', 'lon', timestep_col,
                 'theme_1_pc', 'theme_2_pc', 'theme_3_pc', 'theme_4_pc',
                 'svi_pctile',
                 'neighbor_t', 'self_t-1']"""
features_only = ['deaths']

data_gdf = gpd.read_file(data_path)

multiindexed_gdf = data_gdf.set_index(['geoid', 'year'])
num_geoids = len(data_gdf['geoid'].unique())

train_shape = (num_geoids, time_window, len(features_only))

train_x_BSF_flat, train_y_BS = make_data(multiindexed_gdf, first_train_eval_year, last_train_eval_year,
                                         time_window, features_only, train_shape)

valid_x_BSF_flat, valid_y_BS = make_data(multiindexed_gdf, validation_year, validation_year,
                                         time_window, features_only, train_shape)

test_x_BSF_flat, test_y_BS = make_data(multiindexed_gdf, first_test_year, last_test_year,
                                       time_window, features_only, train_shape)

top_100_idx_func = partial(top_k_idx, k=100)

2023-05-26 13:12:24.722414: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-05-26 13:12:24.722454: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: p1cmp078.pax.tufts.edu
2023-05-26 13:12:24.722463: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: p1cmp078.pax.tufts.edu
2023-05-26 13:12:24.722573: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 515.65.1
2023-05-26 13:12:24.722600: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 515.65.1
2023-05-26 13:12:24.722605: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 515.65.1


In [5]:
best_noise = 0.3
best_samples = 50
best_lr = 0.005

In [13]:
checkpoints = glob.glob(f'/cluster/tufts/hugheslab/kheuto01/diffbpr/simple/n{best_noise}_samp{best_samples}_lr{best_lr}/model*.hdf5')

In [23]:
iterations = [int(model_path.split('_')[-1].split('.hdf5')[0]) for model_path in checkpoints]
best_model_idx = np.argmax(iterations)
best_model_path = checkpoints[best_model_idx]

In [32]:
top_100_idx_func = partial(top_k_idx, k=100)

perturbed_top_100 = perturbed(top_100_idx_func,
                              num_samples=best_samples,
                              sigma=best_noise,
                              noise='normal',
                              batched=True)

model = PerturbedBPRModel(perturbed_top_100)

optimizer = tf.keras.optimizers.Adam(learning_rate=best_lr)

# Compile the model
def weird_loss(a, b):
    return -a / b

model.compile(optimizer=optimizer, loss=weird_loss)

In [34]:
model(train_x_BSF_flat) # call to initialize model
model.load_weights(best_model_path)

In [36]:
model.evaluate(test_x_BSF_flat, test_y_BS)



-0.5855193138122559

In [103]:
preds = model(test_x_BSF_flat)
pred_val, pred_100_idx = tf.math.top_k(preds, k=model.k)

In [39]:
pred_100_idx

<tf.Tensor: shape=(2, 100), dtype=int32, numpy=
array([[ 374,  383,  386, 1516,  733, 1015,  267, 1185, 1074, 1091,  244,
        1295, 1132, 1517,  428,  397,  571, 1489,  115,  347,  195, 1183,
        1017, 1523,  211,  148,  167,  200, 1259,  197,  156,  610, 1346,
         611,  727, 1092, 1050,  262,  346,  268, 1176,  631,  568, 1052,
        1019, 1398,  338,  166,  198,  168, 1022, 1038, 1400,  264,  161,
        1271, 1051,  721,  158, 1010,  570, 1532,  149, 1337, 1149, 1499,
        1053,  478,  576, 1334,   73,  159, 1524, 1407,  127,   24,  153,
        1397,  175, 1594,  585,   46,  117, 1502, 1250, 1011,   62,  920,
        1118,  574,  495, 1078, 1540, 1465, 1391,  461,  349, 1129,  256,
         846],
       [1517, 1516,  374,  383,  397, 1185, 1017,  211,  167,  115,  347,
         268, 1132, 1524, 1176, 1131,  186, 1051, 1295,  262, 1334,   58,
        1532, 1465,  610, 1514, 1015, 1129,  267,  332,  152,  195, 1050,
         153,  375,  158,  570, 1052, 1092,  244,

In [41]:
tf.reduce_sum(tf.gather(test_y_BS, pred_100_idx, batch_dims=-1), axis=-1)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([277., 289.], dtype=float32)>

In [86]:
bpr_variable_k_no_ties(test_y_BS, preds,k=100)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.5918803, 0.5791583], dtype=float32)>

In [61]:
fixed_top_X(pd.Series(test_y_BS.numpy()[0]), pd.Series( preds.numpy()[0]) ,X=100)

(191.0, 0.5918803, 191.0, 0.5918803)

In [102]:
last_val, last_idx = tf.math.top_k(tf.squeeze(test_x_BSF_flat[:,:,-1]),k=100)

In [113]:
bpr_variable_k_no_ties(test_y_BS, tf.squeeze(test_x_BSF_flat[:,:,-1]), k=100)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.54273504, 0.5190381 ], dtype=float32)>

In [65]:
test_x_BSF_flat, test_y_BS = make_data(multiindexed_gdf, first_test_year, last_test_year,
                                       time_window, features_only+['year'], train_shape)

KeyError: "['year'] not in index"

In [64]:
first_test_year

2019

In [72]:
test_x_BSF_flat[0][:,0]==test_y_B

<tf.Tensor: shape=(1620,), dtype=float32, numpy=array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)>

In [70]:
test_y_BS

<tf.Tensor: shape=(2, 1620), dtype=float32, numpy=
array([[1., 1., 0., ..., 1., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.]], dtype=float32)>

In [77]:
multiindexed_gdf.loc[idx[:, last_train_eval_year - time_window:last_train_eval_year - 1], features_only]

Unnamed: 0_level_0,Unnamed: 1_level_0,deaths
geoid,year,Unnamed: 2_level_1
25001010100,2007,0.0
25001010100,2008,0.0
25001010100,2009,0.0
25001010100,2010,0.0
25001010100,2011,0.0
...,...,...
25027761402,2012,0.0
25027761402,2013,0.0
25027761402,2014,0.0
25027761402,2015,0.0


In [78]:
last_train_eval_year

2017

In [79]:
multiindexed_gdf.loc[idx[:, last_train_eval_year], 'deaths']

geoid        year
25001010100  2017    0.0
25001010206  2017    2.0
25001010208  2017    0.0
25001010304  2017    0.0
25001010306  2017    1.0
                    ... 
25027761100  2017    1.0
25027761200  2017    1.0
25027761300  2017    0.0
25027761401  2017    0.0
25027761402  2017    0.0
Name: deaths, Length: 1620, dtype: float64

In [92]:
last_idx

<tf.Tensor: shape=(2, 100), dtype=int32, numpy=
array([[ 117, 1515,  268,  721, 1017, 1516, 1517,  118,  149,  374,  384,
         431,  455,  493,  495,  610,  920, 1050, 1257,   34,   62,  156,
         159,  168,  189,  198,  200,  242,  264,  321,  332,  347,  354,
         421,  427,  441,  461,  472,  480,  585,  893, 1141, 1168, 1185,
        1294, 1346, 1407, 1523, 1524, 1532, 1538,   17,   57,   60,  120,
         121,  122,  148,  169,  179,  195,  236,  257,  265,  267,  330,
         346,  395,  409,  422,  428,  439,  440,  452,  453,  456,  473,
         499,  523,  568,  571,  575,  634,  655,  945,  976, 1041, 1052,
        1076, 1085, 1118, 1132, 1188, 1246, 1250, 1272, 1278, 1295, 1385,
        1442],
       [ 570,  138,  198,  211,  347, 1185,   57,   58,  159,  259,  260,
         262,  267,  268,  344,  456,  472, 1019, 1085, 1119, 1517,   96,
         152,  158,  167,  221,  282,  349,  458,  512,  571, 1176, 1313,
        1407, 1514, 1532, 1535, 1550, 1595, 1597,

In [101]:
len(set(pred_100_idx[0].numpy()).intersection(set(last_idx[0].numpy())))

40

In [104]:
last_val

<tf.Tensor: shape=(2, 100), dtype=float32, numpy=
array([[8., 8., 7., 7., 7., 7., 7., 6., 6., 6., 6., 6., 6., 6., 6., 6.,
        6., 6., 6., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
        5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
        5., 5., 5., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4.],
       [8., 7., 7., 7., 7., 7., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.,
        6., 6., 6., 6., 6., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
        5., 5., 5., 5., 5., 5., 5., 5., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4.]], dtype=float32)>

In [105]:
pred_val

<tf.Tensor: shape=(2, 100), dtype=float32, numpy=
array([[7.535028 , 7.2158713, 7.0981717, 6.6005   , 6.352484 , 5.8758893,
        5.824493 , 5.724207 , 5.6872063, 5.6585503, 5.603502 , 5.4050655,
        5.3186483, 5.2930846, 5.195948 , 5.172959 , 5.17269  , 5.05545  ,
        5.016334 , 5.014669 , 4.947457 , 4.918059 , 4.897112 , 4.785999 ,
        4.7273645, 4.7098765, 4.6921487, 4.689855 , 4.614169 , 4.588552 ,
        4.50291  , 4.47773  , 4.464743 , 4.437156 , 4.4345345, 4.4105053,
        4.356401 , 4.352587 , 4.3238335, 4.2702084, 4.2522626, 4.2371716,
        4.2060046, 4.1967535, 4.119102 , 4.11631  , 4.112356 , 4.1115093,
        4.0968194, 4.083629 , 4.074222 , 4.060568 , 4.041288 , 4.002971 ,
        4.0023813, 3.9946508, 3.973554 , 3.9399052, 3.93287  , 3.895623 ,
        3.8891158, 3.8731375, 3.8373806, 3.8296852, 3.8243341, 3.8162963,
        3.806931 , 3.7939856, 3.7855794, 3.7779508, 3.760709 , 3.756505 ,
        3.7521188, 3.7330031, 3.730347 , 3.7207575, 3.713199 ,

In [106]:
preds

<tf.Tensor: shape=(2, 1620), dtype=float32, numpy=
array([[0.7200178 , 1.1308959 , 0.440957  , ..., 0.09769697, 0.09769697,
        0.41200024],
       [0.34038925, 1.1140208 , 0.8928082 , ..., 0.45742333, 0.41841114,
        0.23202991]], dtype=float32)>

In [108]:
tf.reduce_min(preds)

<tf.Tensor: shape=(), dtype=float32, numpy=-0.15043697>