In [22]:
import pandas as pd
import numpy as np
from keras.utils import to_categorical
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from tensorflow.keras.datasets import imdb
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import sequence
seed = 123

# Import and pre-processing

In [23]:
NUM_WORDS = 5000
max_review_length = 100
INDEX_FROM = 3

# --- Import the IMDB data and only consider the ``top_words``` most used words
np.load.__defaults__=(None, True, True, 'ASCII')
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=NUM_WORDS, index_from=INDEX_FROM)
np.load.__defaults__=(None, False, True, 'ASCII')

word_to_id = imdb.get_word_index()
word_to_id = {k:(v+INDEX_FROM) for k,v in word_to_id.items()}
word_to_id["<PAD>"] = 0
word_to_id["<START>"] = 1
word_to_id["<UNK>"] = 2

id_to_word = {value:key for key,value in word_to_id.items()}
print(' '.join(id_to_word[id] for id in X_train[1000] ))

<START> although i had seen <UNK> in a theater way back in <UNK> i couldn't remember anything of the plot except for vague images of kurt thomas running and fighting against a backdrop of stone walls and disappointment regarding the ending br br after reading some of the other reviews i picked up a copy of the newly released dvd to once again enter the world of <UNK> br br it turns out this is one of those films produced during the <UNK> that would go directly to video today the film stars <UNK> <UNK> kurt thomas as jonathan <UNK> <UNK> out of the blue to <UNK> the nation of <UNK> to enter and hopefully win the game a <UNK> <UNK> <UNK> by the khan who <UNK> his people by yelling what sounds like <UNK> power the goal of the mission involves the star wars defense system jonathan is trained in the martial arts by princess <UNK> who never speaks or leaves the house once trained tries to blend in with the <UNK> by wearing a bright red <UNK> with <UNK> of blue and white needless to say <UNK>

In [24]:
# --- truncate and pad input sequences
X_train = X_train[:5]
X_test = X_test[:600]

X_train = sequence.pad_sequences(X_train, maxlen=max_review_length, padding='post', truncating='post', value=0)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length, padding='post', truncating='post', value=0)

print("len(X_train[0]):", len(X_train[0]))
print("len(X_train[1]):", len(X_train[1]))
print("X_train[0]:", X_train[0])

len(X_train[0]): 100
len(X_train[1]): 100
X_train[0]: [   1   14   22   16   43  530  973 1622 1385   65  458 4468   66 3941
    4  173   36  256    5   25  100   43  838  112   50  670    2    9
   35  480  284    5  150    4  172  112  167    2  336  385   39    4
  172 4536 1111   17  546   38   13  447    4  192   50   16    6  147
 2025   19   14   22    4 1920 4613  469    4   22   71   87   12   16
   43  530   38   76   15   13 1247    4   22   17  515   17   12   16
  626   18    2    5   62  386   12    8  316    8  106    5    4 2223
    2   16]


# LSTM

In [25]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, topics, model_length):
        self.topics=topics
        self.model_length=model_length

    def __len__(self):
        return len(self.topics)-self.model_length

    def __getitem__(self, index):
        input_sequence=torch.tensor(self.topics[index:index+self.model_length, :])
        target_sequence=torch.tensor(self.topics[index+1:index+self.model_length+1, :])

        return input_sequence, target_sequence

In [26]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_length, batch_size):
        super(LSTM, self).__init__()
        self.input_size = input_size #input size (NUM_TOPICS)
        self.hidden_size = hidden_size #number of hidden neurons
        self.output_size = output_size #output size (NUM_TOPICS)
        self.model_length=model_length
        self.batch_size = batch_size

        self.lstm = nn.LSTM(self.input_size, self.hidden_size, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x, prev_state):
        output, state = self.lstm(x, prev_state)
        output=self.fc(output)
        probabilities = F.softmax(output[:, -1, :], dim=1)
        return probabilities, state

    def init_state(self):
        return (torch.zeros(1, self.batch_size, self.hidden_size), #(NUM_LAYERS, BATCH SIZE, NUM_NEURONES)
                torch.zeros(1, self.batch_size, self.hidden_size))

    def train_model(self, dataset, optimizer, criterion):
        state_h, state_c = self.init_state()
        self.train()
        for t, (x, y) in enumerate(dataset):
            optimizer.zero_grad()
            softmax , (state_h, state_c) = self(x, (state_h, state_c)) #softmax= p(z_{t+1}|z_1:t)
            loss = criterion(softmax, y[:, -1, :])
            state_h = state_h.detach()
            state_c = state_c.detach()

            loss.backward()
            optimizer.step()
        print({'loss': loss.item() })

    def predict_next_probability(self, input_sequence):
        state_h, state_c = self.init_state()
        with torch.no_grad():
            for t in range(len(input_sequence)):
                input_t = input_sequence[t].unsqueeze(0).unsqueeze(0)
                _, (state_h, state_c) = self(input_t, (state_h, state_c))

        input_t = input_sequence[-1].unsqueeze(0).unsqueeze(0)
        probabilities, _ = self(input_t, (state_h, state_c))

        return probabilities

    def sample_next_z(self, input_sequence):
        proba = self.predict_next_probability(input_sequence)
        return torch.multinomial(proba, 1).item()+1

In [27]:
HIDDEN_SIZE=64
SEQUENCE_LENGTH = 100
MODEL_LENGTH = 10
NUM_TOPICS = 50
NUM_WORDS = 5000

lstm_model=LSTM(input_size=NUM_TOPICS, hidden_size=HIDDEN_SIZE, output_size=NUM_TOPICS, model_length=MODEL_LENGTH, batch_size=1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.001)

In [28]:
z_star_train = torch.randint(1, NUM_TOPICS + 1, size=(X_train.shape[0], SEQUENCE_LENGTH))
z_star_one_hot_train = to_categorical(z_star_train-1, num_classes=NUM_TOPICS)

for i in range(z_star_train.shape[0]):
    print(i)
    dataset_i=Dataset(topics=z_star_one_hot_train[i], model_length=MODEL_LENGTH)
    dataloader_i = DataLoader(dataset_i, batch_size=1)
    lstm_model.train_model(dataloader_i, optimizer, criterion)

0
{'loss': 3.914231300354004}
1
{'loss': 3.9097096920013428}
2
{'loss': 3.9114458560943604}
3
{'loss': 3.914520263671875}
4
{'loss': 3.911060094833374}


## SSM

In [29]:
class SSMPytorch:
    def __init__(self, num_words, num_topics, T):
        self.num_words = num_words
        self.num_topics = num_topics
        self.T = T
        self.phi = torch.randn(T, num_words, num_topics) * 0.01
        self.phi = torch.exp(self.phi - torch.max(self.phi, dim=1, keepdim=True).values)
        self.phi = self.phi / torch.sum(self.phi, dim=1, keepdim=True)

    def compute_MLE_SSM(self, ech_x, ech_z):
        def compute_MLE_SSM_time_t(t, ech_x, ech_z, num_words, num_topics):
            ech_x_t = ech_x[:, t]
            ech_z_t = ech_z[:, t]
            n_samples = ech_x_t.size(0)
            proba_matrix = torch.zeros((num_words, num_topics), dtype=torch.float)
            for i in range(n_samples):
                index_x = ech_x_t[i] - 1
                index_z = ech_z_t[i] - 1
                proba_matrix[index_x, index_z] += 1.0
            row_sums = proba_matrix.sum(dim=0, keepdim=True)
            proba_matrix_normalized = proba_matrix / (row_sums + 1e-6)
            self.phi[t] = proba_matrix_normalized

        for t in range(self.T):
            compute_MLE_SSM_time_t(t, ech_x, ech_z, self.num_words, self.num_topics)

    def predict_proba(self, t, z_t):
        return self.phi[t - 1][:, z_t - 1]

    def sample_xt(self, t, z_t):
        proba = self.predict_proba(t, z_t)
        sampled_xt = torch.multinomial(proba, 1).item()
        return sampled_xt


In [30]:
ssm_model = SSMPytorch(num_words=NUM_WORDS, num_topics=NUM_TOPICS, T=SEQUENCE_LENGTH)
ssm_model.phi[10].sum(dim = 0)

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000])

In [31]:
X_train_torch = torch.tensor(X_train, dtype = torch.int64)
ssm_model.compute_MLE_SSM(X_train_torch, z_star_train)
ssm_model.phi[10].sum(dim = 0)

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000,
        0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        1.0000, 0.0000, 0.0000, 0.0000, 0.0000])

## Particle Gibbs

In [32]:
def compute_alpha_unnormalized(t, z_1_t_minus_1, num_topics, num_voc, lstm, ssm):
    z_1_t_minus_1 = z_1_t_minus_1 - 1
    z_one_hot = F.one_hot(z_1_t_minus_1, num_classes=num_topics).float()
    softmax = lstm.predict_next_probability(z_one_hot).detach()
    phi_t = ssm.phi[t - 1]
    alpha = torch.tensor([torch.matmul(softmax, phi_t[j, :]) for j in range(num_voc)])
    return alpha

def compute_alpha_normalized(t, z_1_t_minus_1, num_topics, num_voc, lstm, ssm):
    num = compute_alpha_unnormalized(t, z_1_t_minus_1, num_topics, num_voc, lstm, ssm)
    denom = torch.sum(num) + 1e-6
    return num / denom

def compute_gamma_unnormalized(t, xt, z_1_t_minus_1, num_topics, lstm, ssm):
    z_1_t_minus_1 = z_1_t_minus_1 - 1
    z_one_hot = F.one_hot(z_1_t_minus_1, num_classes=num_topics).float()
    softmax = lstm.predict_next_probability(z_one_hot).detach()
    phi_t = ssm.phi[t - 1]
    phi_xt = phi_t[xt - 1, :]
    return torch.mul(softmax, phi_xt)

def compute_gamma_normalized(t, xt, z_1_t_minus_1, num_topics, lstm, ssm):
    num = compute_gamma_unnormalized(t, xt, z_1_t_minus_1, num_topics, lstm, ssm)
    denom = torch.sum(num) + 1e-6
    return num / denom

In [33]:
def particle_gibbs(x, previous_z_1_T_star, P, num_topics, num_words, T, lstm_model, ssm_model):
    # Init
    Z_matrix = torch.zeros((P, T + 1), dtype=torch.long)
    alpha_matrix = torch.zeros((P, T + 1), dtype=torch.float)
    ancestor_matrix = torch.ones((P, T + 1), dtype=torch.long)

    # t=0
    z_0 = torch.randint(1, num_topics + 1, (P,))
    alpha_0 = torch.full((P,), 1 / P)
    Z_matrix[:, 0] = z_0
    alpha_matrix[:, 0] = alpha_0

    for t in range(1, T + 1):
        a_t_minus_1 = torch.tensor(1)
        z_1_t = previous_z_1_T_star[:t]
        ancestor_matrix[0, t - 1] = a_t_minus_1
        Z_matrix[0, 1:t + 1] = z_1_t

        for p in range(2, P + 1):
            alpha_t_minus_1_p = alpha_matrix[:, t - 1]
            try:
                a_t_minus_1_p = torch.multinomial(alpha_t_minus_1_p, 1).item()+1
            except:
                a_t_minus_1_p = torch.multinomial(alpha_t_minus_1_p / alpha_t_minus_1_p.sum(), 1).item()+1

            ancestor_matrix[p - 1, t - 1] = a_t_minus_1_p

            if t == 1:
                z_1_t_minus_1_a_t_minus_1_p = Z_matrix[int(a_t_minus_1_p) - 1, 0]
                z_1_t_minus_1_a_t_minus_1_p = torch.tensor([z_1_t_minus_1_a_t_minus_1_p])
                gamma_t_p = compute_gamma_normalized(t=t,
                                                 xt=x[t - 1],
                                                 z_1_t_minus_1=z_1_t_minus_1_a_t_minus_1_p,
                                                 num_topics=num_topics,
                                                 lstm=lstm_model,
                                                 ssm=ssm_model)

                try:
                    z_t_p = torch.multinomial(gamma_t_p, 1).item()+1
                except:
                    z_t_p = torch.multinomial(gamma_t_p / gamma_t_p.sum(), 1).item()+1
                z_1_t_p = z_t_p
            else:
                z_1_t_minus_1_a_t_minus_1_p = Z_matrix[int(a_t_minus_1_p) - 1, 1:t]
                gamma_t_p = compute_gamma_normalized(t=t,
                                                    xt=x[t - 1],
                                                    z_1_t_minus_1=z_1_t_minus_1_a_t_minus_1_p,
                                                    num_topics=num_topics,
                                                    lstm=lstm_model,
                                                    ssm=ssm_model)

                try:
                    z_t_p = torch.multinomial(gamma_t_p, 1).item()+1
                except:
                    z_t_p = torch.multinomial(gamma_t_p / gamma_t_p.sum(), 1).item()+1

                z_1_t_p = torch.cat([z_1_t_minus_1_a_t_minus_1_p, torch.tensor([z_t_p])])

            Z_matrix[p - 1, 1:t + 1] = z_1_t_p

        for p in range(1, P + 1):
            a_t_minus_1_p = ancestor_matrix[p - 1, t - 1]
            if t == 1:
                z_1_t_minus_1_a_t_minus_1_p = Z_matrix[int(a_t_minus_1_p) - 1, 0]
                z_1_t_minus_1_a_t_minus_1_p = torch.tensor([z_1_t_minus_1_a_t_minus_1_p])
            else:
                z_1_t_minus_1_a_t_minus_1_p = Z_matrix[int(a_t_minus_1_p) - 1, 1:(t - 1) + 1]

            alpha_t_p = compute_alpha_normalized(t=t,
                                                 z_1_t_minus_1=z_1_t_minus_1_a_t_minus_1_p,
                                                 num_topics=num_topics,
                                                 num_voc=num_words,
                                                 lstm=lstm_model,
                                                 ssm=ssm_model)
            alpha_t_p = alpha_t_p[x[t - 1] - 1]
            alpha_matrix[p - 1, t] = alpha_t_p

        alpha_matrix[:, t] = alpha_matrix[:, t] / (alpha_matrix[:, t].sum() + 1e-6)

    alpha_T = alpha_matrix[:, -1]
    alpha_T = alpha_T / (alpha_T.sum() + 1e-6)

    try:
        r = torch.multinomial(alpha_T, 1).item()+1
    except:
        r = torch.multinomial(alpha_T / alpha_T.sum(), 1).item()+1

    a_T_r = ancestor_matrix[int(r) - 1, -1]
    z_1_T = Z_matrix[int(a_T_r) - 1, 1:]

    return z_1_T


## Train

In [34]:
HIDDEN_SIZE=64
SEQUENCE_LENGTH = 100
MODEL_LENGTH = 10
NUM_TOPICS = 50
NUM_WORDS = 5000
N_ITER_PARTICULES = 1
N_EPOCHS = 2
NUM_PARTICULES = 10

lstm_model=LSTM(input_size=NUM_TOPICS, hidden_size=HIDDEN_SIZE, output_size=NUM_TOPICS, model_length=MODEL_LENGTH, batch_size=1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.001)
ssm_model = SSMPytorch(num_words=NUM_WORDS, num_topics=NUM_TOPICS, T=SEQUENCE_LENGTH)

In [35]:
%%time
from torch.multiprocessing import Pool

def parallel_particle_gibbs(args):
    return particle_gibbs(*args)

args_list = [(torch.tensor(x, dtype=torch.int64), torch.randint(1, NUM_TOPICS + 1, (SEQUENCE_LENGTH,)),  NUM_PARTICULES, NUM_TOPICS, NUM_WORDS, SEQUENCE_LENGTH, lstm_model, ssm_model) for x in X_train]

num_processes = 6
with Pool(num_processes) as pool:
    z_star_train = pool.map(parallel_particle_gibbs, args_list)

CPU times: user 1.3 s, sys: 391 ms, total: 1.69 s
Wall time: 5min 50s


In [36]:
print(ok)

NameError: ignored

In [None]:
z_star_train

In [None]:
# %%time
# from concurrent.futures import ThreadPoolExecutor
# from functools import partial


# partial_particle_gibbs = partial(particle_gibbs, P=NUM_PARTICULES, num_topics=NUM_TOPICS, num_words=NUM_WORDS, T=SEQUENCE_LENGTH, lstm_model=lstm_model, ssm_model=ssm_model, previous_z_1_T_star=torch.randint(1, NUM_TOPICS + 1, (SEQUENCE_LENGTH,)))
# num_threads = 4

# X_train_torch = torch.tensor(X_train, dtype=torch.int64)
# X_train_list = X_train_torch.tolist()

# with ThreadPoolExecutor(max_workers=num_threads) as executor:
#     z_star_train = list(executor.map(partial_particle_gibbs, X_train_list))

In [None]:
%%time
for epoch in range(N_EPOCHS):
    print("epoch: {}".format(epoch+1))
    z_star_train = []
    print("Particle Gibbs")
    for i in range(X_train.shape[0]):
        for iter in range(N_ITER_PARTICULES):
            if iter == 0:
                previous_z_1_T_star = torch.randint(1, NUM_TOPICS + 1, (SEQUENCE_LENGTH,))
        x_tensor = torch.tensor(X_train[i], dtype=torch.int64)
        z_star = particle_gibbs(NUM_PARTICULES, NUM_TOPICS, NUM_WORDS, SEQUENCE_LENGTH, lstm_model, ssm_model, x_tensor, previous_z_1_T_star)

        if i == 0:
            z_star_train = z_star
        else:
            z_star_train = torch.vstack((z_star_train, z_star))
        previous_z_1_T_star = z_star
    print("LSTM")
    for i in range(z_star_train.shape[0]):
        dataset_i=Dataset(topics=z_star_one_hot_train[i], model_length=MODEL_LENGTH)
        dataloader_i = DataLoader(dataset_i, batch_size=1)
        lstm_model.train_model(dataloader_i, optimizer, criterion)
    print("SSM")
    X_train_torch = torch.tensor(X_train, dtype=torch.int64)
    ssm_model.compute_MLE_SSM(X_train_torch, z_star_train)