In [54]:
import torch
import pyro
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO, JitTrace_ELBO
import pandas as pd
from torch.distributions import constraints
from pyro.infer.mcmc.api import MCMC
from pyro.infer.mcmc import NUTS

In [29]:
dict_rest = pd.read_pickle("./data/crawled_data.pickle")

In [30]:
# organize data 
ids = [d["id"] for d in dict_rest]
names = [d["name"] for d in dict_rest]
lats = [d["coordinates"]["lat"] for d in dict_rest]
lngs = [d["coordinates"]["lng"] for d in dict_rest]
ratings = [ d["rating"] for d in dict_rest]
rating_ns = [d["rating_n"] for d in dict_rest]
populartimes = [ d["populartimes"] for d in dict_rest]
poptime_sat = [ d[5]['data'] for d in populartimes]
poptime_sat_12 = [d[12] for d in poptime_sat]

In [61]:
selected_data = {'ids':ids,'names':names,'lats':lats,'lngs':lngs,\
                 'ratings':ratings,'rating_ns':rating_ns,'poptime_sat_12':poptime_sat_12}
df = pd.DataFrame(selected_data)
df_model = df[['ratings','rating_ns','poptime_sat_12']]
train_data = torch.tensor(df_model.values, dtype = torch.float)
rating, rating_ns, popularity = train_data[:, 0], torch.log(train_data[:,1]), train_data[:,2]

In [62]:
# #Tested whether SVI runs correctly, and it did.
# rating = torch.rand(30)
# test_dist = dist.Poisson(torch.exp(rating.sum() + 1))
# popularity = torch.tensor([test_dist.sample() for i in range(len(rating))], dtype = torch.float)

In [66]:
def model(rating, rating_ns, popularity):
    beta_0 = pyro.sample("beta_0", dist.Normal(0, 1))
    beta_1 = pyro.sample("beta_1", dist.Normal(0, 1))
    beta_2 = pyro.sample("beta_2", dist.Normal(0, 1))
    lambda_ = torch.exp(beta_0 + beta_1 * rating + beta_2*rating_ns)
#     lambda_ = torch.exp(beta_0 + beta_1 * rating)
    
    with pyro.plate("data", len(popularity)):
        y = pyro.sample("obs", dist.Poisson(lambda_), obs=popularity)
    
    return y

In [67]:
def guide(rating, rating_ns, popularity):
    weights_loc = pyro.param('weights_loc', torch.randn(3))
    weights_scale = pyro.param('weights_scale', torch.ones(3), constraint=constraints.positive)        
#     weights_loc = pyro.param('weights_loc', torch.randn(2))
#     weights_scale = pyro.param('weights_scale', torch.ones(2), constraint=constraints.positive)        

    
    beta_0 = pyro.sample("beta_0", dist.Normal(weights_loc[0], weights_scale[0]))
    beta_1 = pyro.sample("beta_1", dist.Normal(weights_loc[1], weights_scale[1]))
    beta_2 = pyro.sample("beta_2", dist.Normal(weights_loc[2], weights_scale[2]))
    labmda_ = torch.exp(beta_0 + beta_1 * rating + beta_2 * rating_ns)
#     labmda_ = torch.exp(beta_0 + beta_1 * rating)



In [None]:
svi = SVI(model, 
          guide, 
          optim.Adam({"lr": .005}), 
          loss=JitTrace_ELBO(), 
          num_samples=1000)

pyro.clear_param_store()
epoch = 100000
for i in range(epoch):
    ELBO = svi.step(rating, rating_ns, popularity)
    if i % 500 == 0:
        print(ELBO)
posterior = svi.run(rating, rating_ns, popularity)


  


11600.6875
266362.65625
143109.65625
130535.921875
150796.84375
937149.4375
85137.96875
225538.578125
170855.921875
189423.609375
154006.796875
226505.0625
244432.765625
82647.4609375
241266.328125
112808.46875
228354.515625
193419.546875
176098.875
202751.796875
48424.328125
240005.875
240750.171875
86653.09375
89775.4140625
131370.296875
52324.42578125
131071.0703125
318461.65625
32957.71484375
203703.640625
210036.609375
52487.140625
201916.03125
185246.109375
193069.453125
102849.5703125
119132.9296875
243919.484375
54721.11328125
26633.458984375
112422.5390625
84720.9765625
130535.4765625
110389.9375
46824.12109375
20870.96484375
13940.2470703125
10529.8974609375
42454.1640625
11350.5224609375
26071.26953125
14021.494140625
33590.59375
10046.9365234375
9953.16796875
12649.7587890625
16504.884765625
10421.9482421875
10278.607421875
9555.7626953125
9852.947265625
9653.05078125
9546.3134765625
9919.765625
9492.0
9451.55078125
9947.048828125
9438.8388671875
9455.88671875
9595.48730468

In [None]:
sites = ["beta_0", "beta_1", "beta_2", "beta_3", "sigma"]

svi_samples = {site: EmpiricalMarginal(svi_diagnorm_posterior, sites=site)
                     .enumerate_support().detach().cpu().numpy()
               for site in sites}

for site, values in summary(svi_samples).items():
    print("Site: {}".format(site))
    print(values, "\n")

In [59]:
nuts_kernel = NUTS(model)

mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200)
mcmc.run(ratings, rating_ns, popularity)

hmc_samples = {k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items()}

Warmup:   0%|          | 0/300 [00:00<?, ?it/s]

TypeError: mul(): argument 'other' (position 1) must be Tensor, not list