In [2]:
import torch

In [3]:
k = 20

# Inexact Marginals
This is the terpret problem with inexact marginals. 

In [8]:
mu = torch.randn(k,  requires_grad=True)
optimizer = torch.optim.SGD([mu], lr=1e-1)
for t in range(0,10000):
    x = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
    z = torch.cat((x[-1:], x[:-1])) 
    equi = x * z + (1-x) * (1-z)
    all_equi = torch.prod(equi)
    loss = -torch.log(all_equi)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
loss, torch.round(x*100)

(tensor(2.7908, grad_fn=<NegBackward>),
 tensor([100., 100.,  50.,   0.,   0.,   0.,   0.,   0.,   0.,  50., 100., 100.,
         100., 100., 100., 100., 100., 100., 100., 100., 100.],
        grad_fn=<RoundBackward>))

# Inexact Max-Marginals

This is the same with max-marginals

In [10]:
mu = torch.randn(k,  requires_grad=True)
optimizer = torch.optim.SGD([mu], lr=1e-1)
for t in range(0,100000):
    x = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
    z = torch.cat((x[-1:], x[:-1])) 
    equi = torch.max(x * z + (1-x) * (1-z))
    all_equi = torch.max(equi)
    loss = -torch.log(all_equi)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
loss, torch.round(x*100)

(tensor(0.0002, grad_fn=<NegBackward>),
 tensor([100.,  36.,  28.,  28.,   0.,   0.,  45.,  14.,  34.,  51.,  62.,  43.,
          84.,  28.,  45.,  56.,  77.,  70.,  90.,  22.,  63.],
        grad_fn=<RoundBackward>))

This is learning with an exact forward marginal (probability of all XORs/EQUI nodes to be active.

In [157]:
mu = torch.randn(k,  requires_grad=True)
optimizer = torch.optim.SGD([mu], lr=1e-2)
for t in range(0,10000):
    x = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
    all_equi = torch.prod(x) + torch.prod(1.0 - x)
    loss = -torch.log(all_equi)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
loss, torch.round(x*100)

(tensor(0.2052, grad_fn=<NegBackward>),
 tensor([100.,  99.,  99.,  99.,  99.,  99.,  99.,  99.,  99.,  99.,  99.,  99.,
          99.,  99.,  99.,  99.,  99.,  99.,  99.,  99.,  99.],
        grad_fn=<RoundBackward>))

In [None]:
mu = torch.randn(k,  requires_grad=True)

In [21]:
x_0 = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
x_1 = torch.cat((x_0[-1:], x_0[:-1])) 
x_2 = torch.cat((x_1[-1:], x_1[:-1])) 
x_0, x_1

(tensor([1.0000, 0.3506, 0.5822, 0.5305, 0.3434, 0.5266, 0.4675, 0.2158, 0.4149,
         0.6560, 0.6453, 0.5317, 0.1204, 0.3375, 0.3203, 0.8821, 0.4624, 0.2785,
         0.7742, 0.3175, 0.3915, 0.2815, 0.2538, 0.8902, 0.2406, 0.2683, 0.4788,
         0.6625, 0.8804, 0.1115, 0.9080], grad_fn=<CatBackward>),
 tensor([0.9080, 1.0000, 0.3506, 0.5822, 0.5305, 0.3434, 0.5266, 0.4675, 0.2158,
         0.4149, 0.6560, 0.6453, 0.5317, 0.1204, 0.3375, 0.3203, 0.8821, 0.4624,
         0.2785, 0.7742, 0.3175, 0.3915, 0.2815, 0.2538, 0.8902, 0.2406, 0.2683,
         0.4788, 0.6625, 0.8804, 0.1115], grad_fn=<CatBackward>))

In [22]:
equi_true_true = x_0 * x_1 * x_2 + (1-x_0) * (1-x_1) * (1-x_2)
equi_true_true[0:-1:2]

tensor([0.1013, 0.2041, 0.2349, 0.2501, 0.2862, 0.2470, 0.1874, 0.4091, 0.1737,
        0.1873, 0.1900, 0.3542, 0.1166, 0.3205, 0.3003],
       grad_fn=<SliceBackward>)

In [24]:
all_equi = torch.prod(equi_true_true) 
all_equi

tensor(5.8105e-22, grad_fn=<ProdBackward1>)

The code below aggregates several XOR variables to provide a joint distribution over these for the aggregation layer. In the limit (of aggregating over the complete set of variables), this should result in the optimal solution (because the forward marginal would be correct. 

In [156]:
k = 20
mu = torch.randn(k,  requires_grad=True)
optimizer = torch.optim.SGD([mu], lr=1e-1)
for t in range(0,10000):
    x_0 = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
    x_1 = torch.cat((x_0[-1:], x_0[:-1])) 
    x_2 = torch.cat((x_1[-1:], x_1[:-1])) 
    x_3 = torch.cat((x_2[-1:], x_2[:-1])) 
    equi_true_true = x_0 * x_1 * x_2 * x_3 + (1-x_0) * (1-x_1) * (1-x_2) * (1-x_3)
    offset = t % 3
    equi_true_true = torch.cat((equi_true_true[offset:], equi_true_true[:offset]))
    all_equi = torch.prod(equi_true_true[0:-1:3]) 
    loss = -torch.log(all_equi)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
loss, torch.round(x_0*100)

(tensor(0.0199, grad_fn=<NegBackward>),
 tensor([100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
         100., 100., 100., 100., 100., 100., 100., 100., 100.],
        grad_fn=<RoundBackward>))

In [705]:
mu = torch.randn(k,  requires_grad=True)
x = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
def compute_all_equi(x):
    all_equi_true = 1.0
    all_equi_false = 1.0
    for i in range(0,k+1):
        prev = i-1 % k
        all_equi_true = all_equi_true * x[i]
        all_equi_false = all_equi_false * (1-x[i])
    return all_equi_false + all_equi_true

def compute_all_equi_uncorrected(x):
    all_equi_dumb = 1.0
    for i in range(0,k+1):
        prev = i-1 % k
        all_equi_dumb = all_equi_dumb * ((x[i] * x[prev]) + (1-x[i]) * (1-x[prev]))
    return all_equi_dumb

def compute_all_equi_corrected(x):
    all_equi_dumb = 1.0
    for i in range(0,k+1):
        prev = i-1 % k
        
        all_equi_dumb = all_equi_dumb * ((x[i] * x[prev]) + (1-x[i]) * (1-x[prev]))
        all_equi_dumb += all_equi_dumb * ((x[i] * (1-x[prev])) + (1-x[i]) * x[prev])
    return all_equi_dumb

compute_all_equi_uncorrected(x), compute_all_equi(x), compute_all_equi_corrected(x)

(tensor(7.5020e-10, grad_fn=<ThMulBackward>),
 tensor(4.2494e-11, grad_fn=<ThAddBackward>),
 tensor(0.0002, grad_fn=<ThAddBackward>))

In [701]:
mu = torch.randn(k,  requires_grad=True)
optimizer = torch.optim.SGD([mu], lr=1e-1)
for t in range(0,400):
    x = torch.cat((torch.tensor([1.0]), torch.sigmoid(mu)))
#     print(x)
#     all_equi = compute_all_equi(x)
    all_equi = compute_all_equi_uncorrected(x)
    loss = -torch.log(all_equi)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
#     print(loss)

loss, torch.round(x*100)

(tensor(6.2719, grad_fn=<NegBackward>),
 tensor([100.,  99.,  99.,  98.,  50.,   3.,   1.,   1.,   1.,   1.,   2.,   2.,
           2.,   1.,   2.,  49.,  97.,  99.,  99.,  99.,  99.,  98.,  51.,   3.,
           1.,   1.,   1.,   1.,   2.,  49.,  97.], grad_fn=<RoundBackward>))

TypeError: 'Tensor' object is not callable

In [78]:
eq_12 = x[0] * x[1] + (1-x[0])*(1 - x[1])
eq_12_true = x[0] * x[1]
eq_12_false = (1-x[0]) * (1-x[1])
eq_123_true =  eq_12_true * x[2]
eq_123_false = eq_12_false * (1-x[2])
eq_23_true = x[1] * x[2]
eq_23_false = (1-x[1])*(1-x[2])
eq_123_true, eq_12_true * eq_23_true

(tensor(0.1128), tensor(0.0531))