In [10]:
import torch
from torch import nn
import pdb
import logging


torch.set_printoptions(linewidth=180)

FORMAT = '%(asctime)s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

MAX_CONDITIONS=2
MIN_CONDITIONS=0

torch.manual_seed(7)



<torch._C.Generator at 0x1a807c57a50>

In [11]:
## A single sample consists of 6 events that may occur with different probabilities
def generate_samples(size):
    base = torch.rand((3,size)) < torch.tensor([1.0, 0.2, 0.1]).unsqueeze(1)
    D = torch.rand(size) < (base[1] * 0.5 + 0.2 )
    E =  torch.rand(size) < (base[2] * 0.4 + 0.1)
    F =  torch.rand(size) < (D*0.5+E*0.5)
    return torch.cat((base, D.unsqueeze(0), E.unsqueeze(0), F.unsqueeze(0))).transpose(0,1)


def conditional_prob_matrix(samples):
    def conditional_prob(samples, idx):
        part = samples[samples[:,idx],]
        return part.sum(dim=0)/part.shape[0]
    return torch.stack([conditional_prob(samples, idx) for idx in range(samples.shape[1])])

samples = generate_samples(1000000)
conditional_prob_matrix(samples)



tensor([[1.0000, 0.2001, 0.1001, 0.2993, 0.1404, 0.2202],
        [1.0000, 1.0000, 0.1002, 0.6998, 0.1407, 0.4196],
        [1.0000, 0.2002, 1.0000, 0.2988, 0.5023, 0.4012],
        [1.0000, 0.4678, 0.0999, 1.0000, 0.1407, 0.5716],
        [1.0000, 0.2005, 0.3582, 0.2999, 1.0000, 0.6496],
        [1.0000, 0.3813, 0.1824, 0.7770, 0.4142, 1.0000]])

In [12]:
## For learning we actually use tensors of the form [5,2,1,7,7,7] ~ P(5 | 2,1)
## We need to create a loader that return sample tesnors of that form
## P(5 | 2) means that we don't assume that 3 didn't occur- 
## In  order to learn this semantics when generating these samples we must randomly
## drop events that occured 

MAX_CONDITIONS=2
MIN_CONDITIONS=0


def shuffle_columns(mat, mask=0):
    perms = torch.sort(torch.rand(mat.shape) + mask)[1] + torch.arange(mat.shape[0]).unsqueeze(1) * mat.shape[1]
    return mat.flatten()[perms.flatten()].reshape(mat.shape)


def create_conditions_target_i(data):
    nsamp, ncovs = data.shape
    na_vec = torch.tensor([ncovs]*ncovs)
    drop_vec = torch.tensor([ncovs+1]*ncovs)
    event = torch.randint(0,6, (nsamp,))
    target = torch.sum(torch.logical_and(data, event.unsqueeze(1) == torch.arange(ncovs)), dim=1)
    id_mat = torch.where(data, torch.arange(ncovs), na_vec)
    id_mat = shuffle_columns(id_mat, mask=(id_mat == ncovs))    
    nselect =  torch.randint(MIN_CONDITIONS,MAX_CONDITIONS + 1, (id_mat.shape[0],1))
    keep = torch.arange(id_mat.shape[1]) < nselect 
    conditions = torch.where(keep, id_mat, drop_vec)
    return torch.concat((event.unsqueeze(1), conditions), dim=1), target.float()

def create_conditions_target(data, repeat=5):    
    statement, target = zip(*[create_conditions_target_i(data) for idx in range(repeat)])        
    return torch.concat(statement, dim=0), torch.concat(target, dim=0)
    

def create_loader(statement, target, **kwargs):
    dataset = torch.utils.data.TensorDataset(statement, target)
    loader = torch.utils.data.DataLoader(dataset, **kwargs)    
    return loader

statement, target = create_conditions_target(samples)
loader = create_loader(statement, target, batch_size=1024)


In [13]:
## This is how the data we use for learning looks like
## [2, 0, 7,  ..., 7, 7, 7] ~ F(2 | 0)

batch = iter(loader).next()
print(batch)
print(batch[0].shape)

[tensor([[5, 0, 6,  ..., 7, 7, 7],
        [4, 0, 4,  ..., 7, 7, 7],
        [1, 0, 1,  ..., 7, 7, 7],
        ...,
        [1, 0, 5,  ..., 7, 7, 7],
        [3, 7, 7,  ..., 7, 7, 7],
        [0, 3, 0,  ..., 7, 7, 7]]), tensor([0., 1., 1.,  ..., 0., 1., 1.])]
torch.Size([1024, 7])


In [14]:
##  Model

class Residual(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.ff = nn.Sequential(*[nn.Linear(dim, dim), nn.ReLU()])

    def forward(self, input):
        return self.ff(input) + input

class CondProbNetwork(nn.Module):

    def __init__(self, nevents=6, embedding_dim=6, inter_dim=60, nlayers=8):
        super().__init__()
        self.nconds = MAX_CONDITIONS
        self.nevents = nevents
        self.none = nevents + 1
        self.event_embedding = nn.Embedding(nevents+2, embedding_dim)
        self.condition_embedding = nn.Embedding(nevents+2, embedding_dim)
        self.ff = nn.Sequential(*(
            [nn.Linear(embedding_dim * (1+ self.nconds), inter_dim), nn.ReLU()] +
            [Residual(inter_dim) for idx in range(nlayers)] +
            [nn.Linear(inter_dim, 1), nn.Sigmoid()]))
    
    def forward(self, input):
        if input.shape[1] < self.nconds + 1:
            missing = self.nconds + 1  - input.shape[1]
            extra = (torch.tensor([self.none] * missing) + torch.zeros(input.shape[0]).unsqueeze(1)).to(input.device)
            input = torch.concat((input, extra), dim=1)

        event = self.event_embedding(input[:,0:1].int())
        cond = self.condition_embedding(input[:,1:self.nconds+1].int())
        encoded_input = torch.concat((event.flatten(1), cond.flatten(1)), dim=1)        
        return self.ff(encoded_input)



In [17]:
## training

def train_model(model, loss_fun, optimizer, loader, num_epochs=10, device=None, track=None):
    for idx in range(num_epochs):
        logging.info(f"{idx}")
        for data, target in iter(loader):
            if device is not None:
                data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            y_model = model(data)            
            loss = loss_fun(y_model.flatten(), target)
            loss.backward()
            optimizer.step()
        if track is not None and not track(idx, model, device):
            return

def train_ext(model, loader, device_name='cuda:0', batch_size=16, num_epochs=4, lr=0.005, track=None):
    #pdb.set_trace()    
    logging.info(f"start training")
    device = torch.device(device_name)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    train_model(model=model, loss_fun=nn.BCELoss(), 
        optimizer=optimizer, loader=loader, 
        device=device,num_epochs=num_epochs,
        track = None)
    return model

model = CondProbNetwork()
cpu = torch.device("cpu")
model = train_ext(model, loader).to(cpu)

2022-09-20 15:13:07,434 start training
2022-09-20 15:13:07,524 0
2022-09-20 15:13:51,219 1
2022-09-20 15:14:34,807 2
2022-09-20 15:15:18,788 3


In [24]:

print("Model conditional probability matrix")
print(torch.tensor([model(torch.tensor([[e,c]])) for c in range(6) for e in range(6)]).reshape(6,6))

print("Estimate ground truth by sampling")
print(conditional_prob_matrix(samples))


def sample_prob(e,c):
    mask = ((statement[:,0:3] == torch.tensor([e,c,7])).sum(dim=1) == 3)        
    return  target[mask].sum()  / target[mask].shape[0] 

print("Estimate with the sampling used for learning")
print(torch.tensor([sample_prob(e,c) for c in range(6) for e in range(6)]).reshape(6,6))

Model conditional probability matrix
tensor([[1.0000, 0.0947, 0.0476, 0.1487, 0.0667, 0.0915],
        [1.0000, 1.0000, 0.0671, 0.5405, 0.0949, 0.3005],
        [1.0000, 0.1408, 1.0000, 0.2057, 0.3381, 0.2274],
        [1.0000, 0.3712, 0.0642, 1.0000, 0.0959, 0.4613],
        [1.0000, 0.1548, 0.2989, 0.2241, 1.0000, 0.5088],
        [1.0000, 0.2996, 0.1262, 0.7303, 0.3490, 1.0000]])
Estimate ground truth by sampling
tensor([[1.0000, 0.2001, 0.1001, 0.2993, 0.1404, 0.2202],
        [1.0000, 1.0000, 0.1002, 0.6998, 0.1407, 0.4196],
        [1.0000, 0.2002, 1.0000, 0.2988, 0.5023, 0.4012],
        [1.0000, 0.4678, 0.0999, 1.0000, 0.1407, 0.5716],
        [1.0000, 0.2005, 0.3582, 0.2999, 1.0000, 0.6496],
        [1.0000, 0.3813, 0.1824, 0.7770, 0.4142, 1.0000]])
Estimate with the sampling used for learning
tensor([[1.0000, 0.0944, 0.0490, 0.1441, 0.0652, 0.0878],
        [1.0000, 1.0000, 0.0650, 0.5812, 0.0929, 0.2977],
        [1.0000, 0.1384, 1.0000, 0.1957, 0.3889, 0.2623],
        [1.0

In [31]:
print("A few example of conditioning on two events")
print(model(torch.tensor([[3,4,5,7]])))
print(model(torch.tensor([[3,5,4,7]])))


A few example of conditioning on two events
tensor([[0.2981]], grad_fn=<SigmoidBackward0>)
tensor([[0.2842]], grad_fn=<SigmoidBackward0>)


In [33]:
print("The estimate value (by sampling)")
mask = (torch.sum(statement[:,0:4]==torch.tensor([3,4,5,7]), dim=1) == 4)
target[mask].sum()/target[mask].shape[0]

The estimate value (by sampling)


tensor(0.2713)