In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ortho_group
np.random.seed(seed=0)
x = ortho_group.rvs(6)

In [52]:
std_list = [1.5, 2.0, 1.5, 2.5, 1.7, 2.2]
fdf = pd.DataFrame()
for i in range(6):
    fdf[i] = np.random.normal(loc=0.0, scale=std_list[i], size=200000)

In [53]:
x[0]

array([ 0.47669462,  0.15964814, -0.47766656,  0.55284255, -0.13930486,
        0.44053222])

In [54]:
x[1]

array([ 0.10813329, -0.8838343 ,  0.06718921,  0.39309219,  0.13141423,
       -0.1756091 ])

In [55]:
fdf["group"] = 0.9 / (1.0 + np.exp(-np.matmul(fdf.values, x[0].T) + 5.0)) + 0.1 / (1.0 + np.exp(-np.matmul(fdf.values, x[1].T) + 6.0))

In [56]:
fdf["prob"] = np.random.uniform(low=0.0, high=1.0, size=200000)

In [57]:
fdf["label"] = fdf["group"] > fdf["prob"]

In [58]:
fdf["label"] = fdf["label"] * 1

In [59]:
fdf.label.value_counts()

0    193874
1      6126
Name: label, dtype: int64

In [60]:
cdf = fdf.copy()

In [61]:
Y = cdf.label.values
X = cdf.drop(["prob", "label", "group"], axis=1).values

In [62]:
X

array([[-3.2644018 ,  4.00259172,  1.99437553,  4.61904693, -0.53068814,
         3.80792758],
       [-1.09016515,  2.8152774 , -1.21269169, -1.9584467 ,  0.70623256,
        -5.45471066],
       [-0.30506562, -1.90167043,  2.04191504,  1.60608851, -0.04201549,
         0.45122124],
       ...,
       [-0.11720379, -3.43795188, -1.89778315, -2.08164134, -1.4620388 ,
        -0.640514  ],
       [-0.80369369,  0.68150212, -0.10568412,  0.30215617, -0.23740226,
         1.55757821],
       [ 3.48005895,  1.83640184,  0.49217133, -0.84419194,  0.8227444 ,
         0.31970732]])

In [262]:
import torch
import torch.nn.functional as F
import torch.utils.data as Data

torch.manual_seed(0)

<torch._C.Generator at 0x1a14424d30>

In [263]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.input = torch.nn.Linear(6, 2)
        torch.nn.init.orthogonal_(self.input.weight)
        self.output = torch.nn.Linear(2, 1, bias=False)
        self.output.weight.data = torch.tensor([[0.9, 0.1]])
        # self.input.weight.requires_grad = False
        self.output.weight.requires_grad = False

    def forward(self, x):
        x = torch.sigmoid(self.input(x))
        x = self.output(x)
        return x

In [264]:
# class Net_sub(torch.nn.Module):
#     def __init__(self):
#         super(Net_sub, self).__init__()
#         self.input = torch.nn.Linear(6, 1)

#     def forward(self, x):
#         x = torch.sigmoid(self.input(x))
#         return x

In [265]:
# class Net_comb(torch.nn.Module):
#     def __init__(self, Net_sub):
#         super(Net_comb, self).__init__()
#         self.net1 = Net_sub()
#         self.net2 = Net_sub()
#         self.weight = torch.nn.Parameter(torch.tensor(0.9))
#         self.weight.requires_grad = False

#     def forward(self, x):
#         x = torch.cat((self.net1(x), self.net1(x)), 1)
#         return self.weight * x[:, 0] + (1.0 - self.weight) * x[:, 1]

In [266]:
X_torch = torch.tensor(X)
Y_torch = torch.tensor(Y)

In [267]:
torch_data_set = Data.TensorDataset(X_torch, Y_torch)
loader = Data.DataLoader(
    dataset=torch_data_set,
    batch_size=1000,
    shuffle=True
)

In [268]:
net = Net()
lr = 0.01
optimizer = torch.optim.Adam([net.input.bias], lr=lr)

In [269]:
list(net.parameters())

[Parameter containing:
 tensor([[-0.4004,  0.5431, -0.1336,  0.5454,  0.2126,  0.4293],
         [-0.2372, -0.6757, -0.4316, -0.0695,  0.1102,  0.5329]],
        requires_grad=True), Parameter containing:
 tensor([-0.3900, -0.2704], requires_grad=True), Parameter containing:
 tensor([[0.9000, 0.1000]])]

In [271]:
print(net)

Net(
  (input): Linear(in_features=6, out_features=2, bias=True)
  (output): Linear(in_features=2, out_features=1, bias=False)
)


In [272]:
Y_torch.unsqueeze(1)

tensor([[0],
        [0],
        [0],
        ...,
        [0],
        [0],
        [0]])

In [276]:
for epoch in range(100):
    for step, (X_train, Y_train) in enumerate(loader):
        optimizer.zero_grad()
        X_train = X_train.requires_grad_(True)

        logistic_regression_result = net(X_train.float())
        loss = F.binary_cross_entropy(logistic_regression_result, Y_train.unsqueeze(1).float())
        
        # print('Epoch: ', epoch, '| Step: ', step, '| loss_1: ', loss_1)
        

        loss.backward()
        weight_partial = net.input.weight.grad
        
        with torch.no_grad():
            matA = torch.mm(torch.transpose(weight_partial, 0, 1), net.input.weight) - torch.mm(torch.transpose(net.input.weight, 0, 1), weight_partial)
            update_matrix = torch.mm(torch.inverse(torch.eye(6) + matA * lr / 2.0), (torch.eye(6) - matA * lr / 2.0))
            updated_weight = torch.mm(update_matrix, torch.transpose(net.input.weight, 0, 1))
        optimizer.step()
        net.input.weight.data = torch.transpose(updated_weight, 0, 1)
        
        
    with torch.no_grad():
        loss_all = F.binary_cross_entropy(net(X_torch.float()), Y_torch.unsqueeze(1).float())
        print('Epoch: ', epoch,'| loss: ', loss_all.data.numpy())
        print(net.input.weight)
        print("******************************************")

Epoch:  0 | loss:  0.10423806
Parameter containing:
tensor([[ 0.4322, -0.3311, -0.4010,  0.5134, -0.3519,  0.3948],
        [ 0.0766,  0.7694, -0.0760,  0.4419,  0.3762,  0.2454]],
       requires_grad=True)
******************************************
Epoch:  1 | loss:  0.105281085
Parameter containing:
tensor([[ 0.4412, -0.3239, -0.1394,  0.6809, -0.2330,  0.4045],
        [ 0.0647,  0.7895, -0.5094,  0.1588,  0.1901,  0.2287]],
       requires_grad=True)
******************************************
Epoch:  2 | loss:  0.10541792
Parameter containing:
tensor([[ 0.4625, -0.0574,  0.0867,  0.7777, -0.0264,  0.4126],
        [ 0.0351,  0.3755, -0.8884, -0.0145, -0.1461,  0.2180]],
       requires_grad=True)
******************************************
Epoch:  3 | loss:  0.10476339
Parameter containing:
tensor([[ 0.4653,  0.3676,  0.0334,  0.6655,  0.1403,  0.4305],
        [ 0.0350, -0.3061, -0.8178,  0.1510, -0.4212,  0.1913]],
       requires_grad=True)
**************************************

KeyboardInterrupt: 

In [275]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[ 0.4296, -0.1699, -0.6077,  0.3673, -0.3829,  0.3693],
        [ 0.0829,  0.4723,  0.2530,  0.6746,  0.4088,  0.2904]],
       requires_grad=True)
Parameter containing:
tensor([-4.9933, -4.0897], requires_grad=True)
Parameter containing:
tensor([[0.9000, 0.1000]])


In [244]:
print(x[0])
print(x[1])

[ 0.47669462  0.15964814 -0.47766656  0.55284255 -0.13930486  0.44053222]
[ 0.10813329 -0.8838343   0.06718921  0.39309219  0.13141423 -0.1756091 ]


In [224]:
(a[0] * a[1]).sum()

0.00014147999999994387

In [228]:
for param_group in optimizer.param_groups:
    print(param_group)

{'params': [Parameter containing:
tensor([-5.6412, -3.5738], requires_grad=True)], 'lr': 0.01, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}
