In [1]:
import torch
import torch.nn as nn
import pickle as pk
import numpy as np
from model import LocalPredictor
import random

In [2]:
import os

In [3]:
data = dict({})
for m in range(4, 13):
    for d in range(1, 32):
        filename = '../data/foursquare_interp_128/2012-{:02d}-{:02d}.pk'.format(m, d)
        if not os.path.isfile(filename):
            continue
        print(filename)
        with open(filename, 'rb') as f:
            data[(m, d)] = pk.load(f)

../data/foursquare_interp_128/2012-04-04.pk
../data/foursquare_interp_128/2012-04-05.pk
../data/foursquare_interp_128/2012-04-08.pk
../data/foursquare_interp_128/2012-04-09.pk
../data/foursquare_interp_128/2012-04-10.pk
../data/foursquare_interp_128/2012-04-11.pk
../data/foursquare_interp_128/2012-04-12.pk
../data/foursquare_interp_128/2012-04-13.pk
../data/foursquare_interp_128/2012-04-14.pk
../data/foursquare_interp_128/2012-04-15.pk
../data/foursquare_interp_128/2012-04-16.pk
../data/foursquare_interp_128/2012-04-17.pk
../data/foursquare_interp_128/2012-04-18.pk
../data/foursquare_interp_128/2012-04-19.pk
../data/foursquare_interp_128/2012-04-20.pk
../data/foursquare_interp_128/2012-04-21.pk
../data/foursquare_interp_128/2012-04-22.pk
../data/foursquare_interp_128/2012-04-23.pk
../data/foursquare_interp_128/2012-04-24.pk
../data/foursquare_interp_128/2012-04-25.pk
../data/foursquare_interp_128/2012-04-26.pk
../data/foursquare_interp_128/2012-04-27.pk
../data/foursquare_interp_128/20

In [4]:
uid_sets = dict({})
for d in data:
    uid_sets[d] = set(list(data[d].keys()))

In [5]:
num_locs = 128
loc_embedding_dim = 64
T = 96
num_time = T
time_embedding_dim = 32
hidden_dim = 128
latent_dim = 128
n_layers = 2

In [6]:
local_predictor = LocalPredictor(num_locs, loc_embedding_dim, num_time, time_embedding_dim, hidden_dim, latent_dim, n_layers).cuda(0)

In [7]:
optimizer = torch.optim.RMSprop(local_predictor.parameters(), lr=1e-3)
optimizer_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8)

In [8]:
dT = 4

In [9]:
training_loss = dict({})
validation_loss = dict({})

In [13]:
batch_size = 32

for epoch in range(1, 11):
    
    doc_date_set = set(random.sample(list(data.keys()), 100))
    qry_date_set = set(list(data.keys())) - doc_date_set

    uid_doc_set = set([])
    for d in doc_date_set:
        uid_doc_set = uid_doc_set | uid_sets[d]

    data_doc = dict({})
    data_qry = dict({})

    for d in doc_date_set:
        for uid in data[d]:
            if uid not in data_doc:
                data_doc[uid] = [data[d][uid]]
            else:
                data_doc[uid].append(data[d][uid])

    for d in qry_date_set:
        for uid in data[d]:
            if uid not in data_qry:
                data_qry[uid] = [data[d][uid]]
            else:
                data_qry[uid].append(data[d][uid])

    for uid in data_doc:
        data_doc[uid] = torch.LongTensor(data_doc[uid]).cuda(0)

    for uid in data_qry:
        data_qry[uid] = torch.LongTensor(data_qry[uid]).cuda(0)
    
    optimizer.zero_grad()
    #optimizer_scheduler.step()
    
    avg_loss = 0.0
    cnt = 0
    
    user_list_train = list(data_qry.keys())
    random.shuffle(user_list_train)
    
    for uid in user_list_train:
        t = np.random.randint(T - 2 * dT + 1)
        x_loc_qry = data_qry[uid][:, t: t + dT]
        x_t_qry = torch.zeros_like(x_loc_qry) + t
        y = data_qry[uid][:, t + 2 * dT - 1]
        
        if uid not in data_doc:
            loss = local_predictor(x_loc_qry, x_t_qry, None, None, y)
        else:
            x_loc_doc = data_doc[uid][:, t: t + 2 * dT]
            x_t_doc = torch.zeros_like(x_loc_doc) + t
            loss = local_predictor(x_loc_qry, x_t_qry, x_loc_doc, x_t_doc, y)
        
        loss.backward()
        cnt += data_qry[uid].shape[0]
        avg_loss += loss.item()

        if cnt % batch_size == 0:
            optimizer.step()
            optimizer.zero_grad()

            print('Epoch {:02d}, avg_loss = {:.4f}'.format(epoch, avg_loss / cnt), end='\r')
    training_loss[epoch] = avg_loss / cnt
    print('')
    
    # testing
        
    doc_date_set = set(random.sample(list(data.keys()), 100))
    qry_date_set = set(list(data.keys())) - doc_date_set

    uid_doc_set = set([])
    for d in doc_date_set:
        uid_doc_set = uid_doc_set | uid_sets[d]

    data_doc = dict({})
    data_qry = dict({})

    for d in doc_date_set:
        for uid in data[d]:
            if uid not in data_doc:
                data_doc[uid] = [data[d][uid]]
            else:
                data_doc[uid].append(data[d][uid])

    for d in qry_date_set:
        for uid in data[d]:
            if uid not in data_qry:
                data_qry[uid] = [data[d][uid]]
            else:
                data_qry[uid].append(data[d][uid])

    for uid in data_doc:
        data_doc[uid] = torch.LongTensor(data_doc[uid]).cuda(0)

    for uid in data_qry:
        data_qry[uid] = torch.LongTensor(data_qry[uid]).cuda(0)
    
    avg_loss = 0.0
    cnt = 0
    
    for uid in user_list_train:
        t = np.random.randint(T - 2 * dT + 1)
        x_loc_qry = data_qry[uid][:, t: t + dT]
        x_t_qry = torch.zeros_like(x_loc_qry) + t
        y = data_qry[uid][:, t + 2 * dT - 1]
        
        if uid not in data_doc:
            loss = local_predictor(x_loc_qry, x_t_qry, None, None, y)
        else:
            x_loc_doc = data_doc[uid][:, t: t + 2 * dT]
            x_t_doc = torch.zeros_like(x_loc_doc) + t
            loss = local_predictor(x_loc_qry, x_t_qry, x_loc_doc, x_t_doc, y)
        
        cnt += data_qry[uid].shape[0]
        avg_loss += loss.item()

        print('Epoch {:02d}, val_avg_loss = {:.4f}'.format(epoch, avg_loss / cnt), end='\r')
    validation_loss[epoch] = avg_loss / cnt
    print('')

Epoch 01, avg_loss = 0.2977
Epoch 01, val_avg_loss = 0.2829
Epoch 02, avg_loss = 0.2935
Epoch 02, val_avg_loss = 0.2879
Epoch 03, avg_loss = 0.2946
Epoch 03, val_avg_loss = 0.2673

KeyError: 1229

In [None]:
torch.save(local_predictor, './results_tokyo/local_predictor_open_128.pytorch')