In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

In [2]:
batch_size = 128
epoch = 20
# tensorboard_writer = SummaryWriter()
T_0 =3
T_mult=2
eta_min = 1e-6
epsilon = 0.01
inv_base_matrix = [[[ 1.5,-1],[-1, 1]]]

In [3]:


class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()
        self.fc1 = nn.Linear(4,32)
        self.fc2 = nn.Linear(32,32)
        self.fc3 = nn.Linear(32, 4)
        
    def forward(self, x):
        x = x.view(-1, 4)   # reshape Variable
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
model = BaseModel()
model = model.to(torch.double)
model = model.to('cuda') 
model.train()
model

BaseModel(
  (fc1): Linear(in_features=4, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=4, bias=True)
)

In [4]:
class CustomDataset(Dataset):

    def __init__(self, root_dir):
        self.dataset = np.load(root_dir)
        print('number of data points', self.dataset.shape[0])

    def __len__(self):
        return self.dataset.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        x = self.dataset[idx, :,:,0]
        y = self.dataset[idx, :,:,1]
        return x,y

In [5]:
train_set = CustomDataset('train_set.npy')
val_set = CustomDataset('val_set.npy')
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=batch_size)

number of data points 1000000
number of data points 10000


In [6]:
optimizer = optim.Adam(model.parameters(), lr=5e-5, weight_decay=1e-7)
scheduler = CosineAnnealingWarmRestarts(optimizer,T_0,T_mult,eta_min)

In [7]:

train_accu = []
i = 1
for epoch in range(epoch):
    for data, target in train_loader:
#         target = target - torch.tensor(inv_base_matrix)
#         print(target)
#         target = target/epsilon
        data, target = Variable(data), Variable(target)
        data = data.to('cuda')
        target = target.to('cuda')
        optimizer.zero_grad()
        output = model(data)
        loss = F.mse_loss(output, target.view(-1,4))
        loss.backward()
        
        mse_loss = loss.item()
        optimizer.step()
        
#         if i % 10 == 0:
#             tensorboard_writer.add_scalar("Loss/step", loss, i)
        if i % 100 == 0:
            print('\rTrain Step: %d, Loss: %.4f, lr: %.8f'%(i, mse_loss, scheduler.get_lr()[0]), end="")
        i += 1
    scheduler.step()
    print('\n')



Train Step: 7800, Loss: 0.0001, lr: 0.00005000

Train Step: 15600, Loss: 0.0000, lr: 0.00003775

Train Step: 23400, Loss: 0.0000, lr: 0.00001325

Train Step: 31200, Loss: 0.0000, lr: 0.00005000

Train Step: 39000, Loss: 0.0000, lr: 0.00004672

Train Step: 46800, Loss: 0.0000, lr: 0.00003775

Train Step: 54600, Loss: 0.0000, lr: 0.00002550

Train Step: 62500, Loss: 0.0000, lr: 0.00001325

Train Step: 70300, Loss: 0.0000, lr: 0.00000428

Train Step: 78100, Loss: 0.0000, lr: 0.00005000

Train Step: 85900, Loss: 0.0000, lr: 0.00004917

Train Step: 93700, Loss: 0.0000, lr: 0.00004672

Train Step: 101500, Loss: 0.0000, lr: 0.00004282

Train Step: 109300, Loss: 0.0000, lr: 0.00003775

Train Step: 117100, Loss: 0.0000, lr: 0.00003184

Train Step: 125000, Loss: 0.0000, lr: 0.00002550

Train Step: 132800, Loss: 0.0000, lr: 0.00001916

Train Step: 140600, Loss: 0.0000, lr: 0.00001325

Train Step: 148400, Loss: 0.0000, lr: 0.00000818

Train Step: 156200, Loss: 0.0000, lr: 0.00000428



In [8]:
torch.save(model.state_dict(), 'layer3_3.pth')

In [9]:
from tqdm import tqdm
train_accu = 0
i = 1
model = model.eval()
total_error = 0.0
total_number = 0
for data, target in tqdm(val_loader):
    target = target - torch.tensor(inv_base_matrix)
#     target = target/epsilon
    data, target = Variable(data), Variable(target)
    data = data.to('cuda')
#     target = target.to('cuda')
    output = model(data)
    output = output.detach().to('cpu') - torch.tensor(inv_base_matrix).view(-1,4)
#     output = output/epsilon
    total_error += torch.sum(torch.abs(output[:,:] - target.view(-1,4)[:,:]))
    total_number += output.shape[0]*output.shape[1]
#     print(total_error/total_number)
#     print(output[:5,:])
#     print(target.view(-1,4)[:5,:])
#     break

print(total_error.numpy()/total_number)

100%|██████████| 79/79 [00:00<00:00, 530.80it/s]

7.463188810197244e-06





####

model 1 test error: 4.8965896880933165e-06
model 2 test error: 9.76252439105381e-06
model 3 test error: 7.463188810197244e-06

In [10]:
temp = [ 4.8965896880933165e-06, 9.76252439105381e-06,7.463188810197244e-06]
print(np.mean(temp))
print(np.var(temp))

7.3741009631147895e-06
3.95018841116143e-12
