In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

In [2]:
batch_size = 128
epsilon = 0.01
base_matrix = [[[ 2, 2 ],[2,   3 ]]]
inv_base_matrix = [[[ 1.5, -1. ],[-1.,   1. ]]]

In [3]:


class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()
        self.fc1 = nn.Linear(4,8)
        self.fc3 = nn.Linear(8, 4)
        
    def forward(self, x):
        x = x.view(-1, 4)   # reshape Variable
        x = F.relu(self.fc1(x))
#         x = F.dropout(x, 0.1)
        x = self.fc3(x)
        return x
    
model = BaseModel()
model = model.to(torch.double)
model = model.to('cuda') 
model.train()
model

BaseModel(
  (fc1): Linear(in_features=4, out_features=8, bias=True)
  (fc3): Linear(in_features=8, out_features=4, bias=True)
)

In [4]:
class CustomDataset(Dataset):

    def __init__(self, root_dir):
        self.dataset = np.load(root_dir)
        print('number of data points', self.dataset.shape[0])

    def __len__(self):
        return self.dataset.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        x = self.dataset[idx, :,:,0]
        y = self.dataset[idx, :,:,1]
        return x,y

In [5]:
train_set = CustomDataset('train_set.npy')
val_set = CustomDataset('val_set.npy')
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=batch_size)

number of data points 1000000
number of data points 10000


In [6]:
model.load_state_dict(torch.load('small_1.pth'))
list(model.parameters())

[Parameter containing:
 tensor([[ 2.9422e-17, -1.0965e-16, -1.0821e-16, -2.9642e-16],
         [ 9.5628e-02,  2.9369e-01,  5.3154e-02, -4.4647e-01],
         [ 2.5575e-18, -5.1115e-18,  3.6952e-16,  5.6182e-16],
         [ 1.5360e+00, -1.2078e+00, -7.5969e-01,  6.3958e-01],
         [-3.6153e-01, -4.7281e-02,  4.6443e-01, -3.2432e-01],
         [ 3.5525e-01, -1.8960e-01, -6.1029e-01,  1.3732e-01],
         [-1.2802e+00,  1.0202e+00,  6.6826e-01, -5.6571e-01],
         [-6.5787e-01,  4.8745e-01,  2.9334e-01, -2.2843e-01]], device='cuda:0',
        dtype=torch.float64, requires_grad=True),
 Parameter containing:
 tensor([-6.6458e-15,  7.9882e-01, -7.6495e-15, -2.4201e-01,  1.0407e+00,
          1.2244e+00,  1.9913e-01,  9.9282e-02], device='cuda:0',
        dtype=torch.float64, requires_grad=True),
 Parameter containing:
 tensor([[ 1.6707e-17, -7.5081e-02,  6.4202e-17, -1.1650e+00,  5.8385e-01,
          -5.0147e-01,  1.1485e+00,  6.6387e-01],
         [ 5.9550e-19, -8.2578e-01, -8.3030e

In [7]:
W1 = model.fc1.weight.data.to('cpu').numpy()
b1 = model.fc1.bias.data.to('cpu').numpy()
W2 = model.fc3.weight.data.to('cpu').numpy()
b2 = model.fc3.bias.data.to('cpu').numpy()

#### a_11

In [71]:
from scipy.optimize import linprog
dic={0:1,1:3,2:4,3:5,4:6,5:7}
def a11_linear_program(sgn_arr):
    c = np.zeros(4,dtype=np.float64)
    A = np.zeros((6,4),dtype=np.float64)
    b = np.zeros(6,dtype=np.float64)
    bias = b2[0] #bias
    for idx in range(6):
        i = dic[idx]
        if sgn_arr[idx]==1:#(h_i>0)W_1[i,:]x+b1[i]>0
            A[idx,:] = -W1[i,:]
            b[idx] = b1[i]
            c[:] = c[:] + W2[0,i]*W1[i,:]
            bias = bias + W2[0,i]*b1[i]
        else:#(h_i<0) W_1[i,:]x+b1[i]<0
            A[idx,:] = W1[i,:]
            b[idx] = -b1[i]
    c = c-[-2.25,1.5,1.5,-1]#(weights)
    
    # max(max(cx+bias),max(-cx-bias)) 
    # = max(-min(-cx-bias), -min(cx+bias))
    # = max(-min(-cx)+bias, -min(cx)-bias)
    
    xa_bounds = (-1, 1)
    xb_bounds = (-1, 1)
    xc_bounds = (-1, 1)
    xd_bounds = (-1, 1)
    res = linprog(c, A_ub=A, b_ub=b, bounds=[xa_bounds, xb_bounds, xc_bounds, xd_bounds])
    if res.success==True:
        results = -res.fun - bias #add bias
        ans_value = results*epsilon
        ans_x = res.x
        label=True
    else:
        ans_value=-10e4
        ans_x=0
        label=False
#     print(c)
#     print(-res.fun)
#     print(bias)
    
    c = -c #(absolute value)
    res = linprog(c, A_ub=A, b_ub=b, bounds=[xa_bounds, xb_bounds, xc_bounds, xd_bounds])
    results = -res.fun + bias #add bias
    if results*epsilon>ans_value and res.success==True:
        ans_value = results*epsilon
        ans_x = res.x
        label=True
    elif res.success==False and label==False:
        label=False
    
    return ans_value, ans_x, label


max_error=0

class solution:
    def __init__(self):
        self.max_error = 0
        
    def dfs(self,sgn_arr,i):
        if i==len(sgn_arr):
            temp_value, temp_x,label = a11_linear_program(sgn_arr)
            if temp_value>self.max_error and label==True:
                self.max_error = temp_value
                self.max_x = temp_x
            print(sgn_arr, temp_value, label)
            return
        sgn_arr[i]=1
        self.dfs(sgn_arr,i+1)
        sgn_arr[i]=-1
        self.dfs(sgn_arr,i+1)

solve = solution()
solve.dfs([0,0,0,0,0,0],0)
print(solve.max_error, solve.max_x)

[1, 1, 1, 1, 1, 1] 0.00024328450716730622 True
[1, 1, 1, 1, 1, -1] 0.0004695042566230939 True
[1, 1, 1, 1, -1, 1] 0.0005464282662661109 True
[1, 1, 1, 1, -1, -1] 0.0015488413969484433 True
[1, 1, 1, -1, 1, 1] -100000.0 False
[1, 1, 1, -1, 1, -1] -100000.0 False
[1, 1, 1, -1, -1, 1] -100000.0 False
[1, 1, 1, -1, -1, -1] -100000.0 False
[1, 1, -1, 1, 1, 1] -100000.0 False
[1, 1, -1, 1, 1, -1] -100000.0 False
[1, 1, -1, 1, -1, 1] -100000.0 False
[1, 1, -1, 1, -1, -1] 0.0019682584666252125 True
[1, 1, -1, -1, 1, 1] -100000.0 False
[1, 1, -1, -1, 1, -1] -100000.0 False
[1, 1, -1, -1, -1, 1] -100000.0 False
[1, 1, -1, -1, -1, -1] -100000.0 False
[1, -1, 1, 1, 1, 1] 0.0014151653206610994 True
[1, -1, 1, 1, 1, -1] 0.00046950425660662986 True
[1, -1, 1, 1, -1, 1] 0.0005464282649560165 True
[1, -1, 1, 1, -1, -1] 0.0002747146324063798 True
[1, -1, 1, -1, 1, 1] 0.0014151652977950025 True
[1, -1, 1, -1, 1, -1] -100000.0 False
[1, -1, 1, -1, -1, 1] -100000.0 False
[1, -1, 1, -1, -1, -1] -100000.0 Fa

In [72]:
temp = np.array([[[ 0.99999991, -0.99999906],[ -0.99999992,  0.99999988]]],dtype=np.float64)
cur_matrix = base_matrix + epsilon*temp
temp_inv = np.linalg.inv(cur_matrix)
print(temp_inv)

target = temp_inv - inv_base_matrix
#         print(target)
target = target/epsilon
print('target', target)

linear = -2.25*temp[0,0,0]+1.5*temp[0,0,1]+1.5*temp[0,1,0]-1*temp[0,1,1]
print('linear', linear)

temp = torch.tensor(temp)
temp = temp.to('cuda')
output = model(temp)
output = output.detach().cpu().numpy()
print('output', output)
print((output[0,0]-linear)*epsilon)

x = temp.view(-1, 4)   # reshape Variable
x = model.fc1(x)
print(x)

[[[ 1.4401914  -0.95215313]
  [-0.95215312  0.9617225 ]]]
target [[[-5.98085955  4.78468739]
  [ 4.7846878  -3.82775007]]]
linear -6.2499981475
output [[-6.0531723   4.88084851  4.82497874 -3.87770002]]
0.001968258489723054
tensor([[-6.6949e-15,  1.0114e-01, -7.4495e-15,  3.9011e+00, -6.2257e-02,
          2.5168e+00, -3.3353e+00, -1.5678e+00]], device='cuda:0',
       dtype=torch.float64, grad_fn=<AddmmBackward>)


### a_ij

In [15]:
from scipy.optimize import linprog
dic={0:1,1:3,2:4,3:5,4:6,5:7}
a_idx = 3

def a11_linear_program(sgn_arr):
    c = np.zeros(4,dtype=np.float64)
    A = np.zeros((6,4),dtype=np.float64)
    b = np.zeros(6,dtype=np.float64)
    bias = b2[a_idx] #bias
    for idx in range(6):
        i = dic[idx]
        if sgn_arr[idx]==1:#(h_i>0)W_1[i,:]x+b1[i]>0
            A[idx,:] = -W1[i,:]
            b[idx] = b1[i]
            c[:] = c[:] + W2[a_idx,i]*W1[i,:]
            bias = bias + W2[a_idx,i]*b1[i]
        else:#(h_i<0) W_1[i,:]x+b1[i]<0
            A[idx,:] = W1[i,:]
            b[idx] = -b1[i]
    if a_idx == 0:
        c = c-[-2.25,1.5,1.5,-1]#(weights)
    elif a_idx==1:
        c = c-[1.5, -1.5, -1, 1]
    elif a_idx==2:
        c = c-[1.5, -1, -1.5, 1]
    elif a_idx==3:
        c = c-[-1,1,1,-1]
    
    # max(max(cx+bias),max(-cx-bias)) 
    # = max(-min(-cx-bias), -min(cx+bias))
    # = max(-min(-cx)+bias, -min(cx)-bias)
    
    xa_bounds = (-1, 1)
    xb_bounds = (-1, 1)
    xc_bounds = (-1, 1)
    xd_bounds = (-1, 1)
    res = linprog(c, A_ub=A, b_ub=b, bounds=[xa_bounds, xb_bounds, xc_bounds, xd_bounds])
    if res.success==True:
        results = -res.fun - bias #add bias
        ans_value = results*epsilon
        ans_x = res.x
        label=True
    else:
        ans_value=-10e4
        ans_x=0
        label=False
#     print(c)
#     print(-res.fun)
#     print(bias)
    
    c = -c #(absolute value)
    res = linprog(c, A_ub=A, b_ub=b, bounds=[xa_bounds, xb_bounds, xc_bounds, xd_bounds])
    results = -res.fun + bias #add bias
    if results*epsilon>ans_value and res.success==True:
        ans_value = results*epsilon
        ans_x = res.x
        label=True
    elif res.success==False and label==False:
        label=False
    
    return ans_value, ans_x, label


max_error=0

class solution:
    def __init__(self):
        self.max_error = 0
        
    def dfs(self,sgn_arr,i):
        if i==len(sgn_arr):
            temp_value, temp_x,label = a11_linear_program(sgn_arr)
            if temp_value>self.max_error and label==True:
                self.max_error = temp_value
                self.max_x = temp_x
            print(sgn_arr, temp_value, label)
            return
        sgn_arr[i]=1
        self.dfs(sgn_arr,i+1)
        sgn_arr[i]=-1
        self.dfs(sgn_arr,i+1)

solve = solution()
solve.dfs([0,0,0,0,0,0],0)
print(solve.max_error, solve.max_x)

[1, 1, 1, 1, 1, 1] 0.00011773973873376226 True
[1, 1, 1, 1, 1, -1] 0.00021506600837474133 True
[1, 1, 1, 1, -1, 1] 0.00017554214078433568 True
[1, 1, 1, 1, -1, -1] 0.000956103424870623 True
[1, 1, 1, -1, 1, 1] -100000.0 False
[1, 1, 1, -1, 1, -1] -100000.0 False
[1, 1, 1, -1, -1, 1] -100000.0 False
[1, 1, 1, -1, -1, -1] -100000.0 False
[1, 1, -1, 1, 1, 1] -100000.0 False
[1, 1, -1, 1, 1, -1] -100000.0 False
[1, 1, -1, 1, -1, 1] -100000.0 False
[1, 1, -1, 1, -1, -1] 0.0012229880623877021 True
[1, 1, -1, -1, 1, 1] -100000.0 False
[1, 1, -1, -1, 1, -1] -100000.0 False
[1, 1, -1, -1, -1, 1] -100000.0 False
[1, 1, -1, -1, -1, -1] -100000.0 False
[1, -1, 1, 1, 1, 1] 0.0008593442505967541 True
[1, -1, 1, 1, 1, -1] 0.00013244772785622896 True
[1, -1, 1, 1, -1, 1] 0.00021282787675164144 True
[1, -1, 1, 1, -1, -1] 0.00013244771797948863 True
[1, -1, 1, -1, 1, 1] 0.0008593442320096667 True
[1, -1, 1, -1, 1, -1] -100000.0 False
[1, -1, 1, -1, -1, 1] -100000.0 False
[1, -1, 1, -1, -1, -1] -100000.0

In [17]:
temp = np.array([[[ -1, -1],[-1,  1]]],dtype=np.float64)
cur_matrix = base_matrix + epsilon*temp
temp_inv = np.linalg.inv(cur_matrix)
print(temp_inv)

target = temp_inv - inv_base_matrix
#         print(target)
target = target/epsilon
print('target', target)

linear = -temp[0,0,0]+temp[0,0,1]+temp[0,1,0]-temp[0,1,1]
print('linear', linear)

temp = torch.tensor(temp)
temp = temp.to('cuda')
output = model(temp)
output = output.detach().cpu().numpy()
print('output', output)
print((output[0,3]-linear)*epsilon)

x = temp.view(-1, 4)   # reshape Variable
x = model.fc1(x)
print(x)

[[[ 1.48290472 -0.98039216]
  [-0.98039216  0.98039216]]]
target [[[-1.70952803  1.96078431]
  [ 1.96078431 -1.96078431]]]
linear -2.0
output [[-1.72436913  1.88671679  1.92069509 -1.87436932]]
0.0012563067637435443
tensor([[-6.7537e-15, -9.0120e-02, -7.4546e-15,  8.2905e-01,  6.6079e-01,
          1.8064e+00, -7.7490e-01, -2.5207e-01]], device='cuda:0',
       dtype=torch.float64, grad_fn=<AddmmBackward>)
