In [17]:
import torch
import numpy as np
import time

# class LogMessanger(object):
#     """Computes and stores the average and current value"""
#     def __init__(self):
#         self.value = 0
#         self.avg = 0
#         self.sum = 0
#         self.count = 0

#     def update(self, avg_value_for_n, n=1):
#         self.value = avg_value_for_n
#         self.sum += avg_value_for_n * n
#         self.count += n
#         self.avg = self.sum / self.count

class CustomLogger(object):
    def __init__(self):
        self.losses = []
        self.deltatime = []
        
    def update(self, model, loss_fn, X, y, deltat=0):
        yhat = model(X)
        loss = loss_fn(yhat, y)
        self.losses.append(loss.item())
        self.deltatime.append(deltat)

    def output(self):
        res = f'loss {np.mean(self.losses)}'
        print(res)
        return res


class MyLogger(CustomLogger):
    def __init__(self):
        super().__init__()
        self.p = []
    
    def update(self, model, loss_fn, X, y, deltat=0):
        super().update(model, loss_fn, X, y, deltat)
        p = model.state_dict()
        self.p.append([p['linear.bias'].item(), p['linear.weight'].item()])

    def output(self):
        super().output()


class ModelTemplete():
    def __init__(self, model, loss_fn, optimizer, logger=None):
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(self.device)
        self.train_loader = None
        self.val_loader = None
        self.logger = logger

    def set_loaders(self, train_loader, val_loader=None):
        self.train_loader = train_loader
        self.val_loader = val_loader

    def set_logger(self, msg):
        self.logger = msg

    # def _train_once(self):
    #     def _train_one_step(X, y):
    #         self.model.train()
    #         yhat = self.model(X)
    #         loss = self.loss_fn(yhat, y)
    #         loss.backward()
    #         self.optimizer.step()
    #         self.optimizer.zero_grad()
    #         return loss.item()
    #     return _train_one_step

    # def _eval_once(self):
    #     def _eval_one_step(X, y):
    #         self.model.eval()
    #         yhat = self.model(X)
    #         loss = self.loss_fn(yhat, y)
    #         return loss.item()
    #     return _eval_one_step
    
    def _train_minibatch_once(self):
        self.model.train()
        for i, (X_batch, y_batch) in enumerate(self.train_loader):
            start_time = time.time()
            X_batch = X_batch.to(self.device)
            y_batch = y_batch.to(self.device)
            yhat = self.model(X_batch)
            loss = self.loss_fn(yhat, y_batch)
            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()
            end_time = time.time()
            self.logger.update(self.model, self.loss_fn, X_batch, y_batch, deltat = end_time-start_time)
        #     losses.append(mini_batch_loss)
        # loss = np.mean(losses)
        return loss

    def train(self, train_loader, epoch_num=10):
        self.model.train()
        self.set_loaders(train_loader)
        for epoch in range(epoch_num):
            _ = self._train_minibatch_once()
            p = self.model.state_dict()
            plist.append([p['linear.bias'].item(), p['linear.weight'].item()])
        plist = np.array(plist)
        return plist


def train_one_epoch(model, loss_fn, optimizer, dataloader):
        model.train()
        loss_log = 0
        for X_batch, y_batch in dataloader:
            yhat = model(X_batch.reshape(-1, 1))
            loss = loss_fn(yhat, y_batch.reshape(-1, 1))
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss_log += loss.item() * y_batch.size(0)
        return loss_log/len(dataloader.dataset)

In [2]:
import numpy as np

RANDOMSEED = 42
np.random.seed(RANDOMSEED)
X = np.random.rand(100)
y = 2.3 + 1.2 * X + np.random.randn(100) * 0.1

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15,
                                                    random_state=RANDOMSEED)

In [6]:
from torch.nn.modules import Linear
from torch.nn import MSELoss
import torch.nn as nn
from torch.optim import SGD

class BetterLR(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

        self.linear = Linear(in_features=1, out_features=1)
        self.linear.bias = torch.nn.Parameter(torch.tensor([1.0], dtype=float))
        self.linear.weight = torch.nn.Parameter(torch.tensor([[1.5]], dtype=float))

    def forward(self, x):
        return self.linear(x)

In [4]:
from torch.utils.data import Dataset, TensorDataset

class MyData(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=float).reshape(-1, 1)
        self.y = torch.tensor(y, dtype=float).reshape(-1, 1)

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.y)
    
from torch.utils.data import DataLoader 

train_data = MyData(X_train, y_train)

lr = 0.2
epoch_num = 10

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = BetterLR().to(device)
optimizer = SGD(model.parameters(), lr=lr)

train_loader = DataLoader(dataset=train_data, batch_size=32, shuffle=False)

plist = []
for epoch in range(epoch_num):
    for X_batch, y_batch in train_loader:
        yhat = model(X_batch)
        loss = MSELoss(reduction='mean')(yhat, y_batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    p= model.state_dict()
    plist.append([p['linear.bias'].item(), p['linear.weight'].item()])
plist


[[1.8277028504755573, 1.8368193572906044],
 [1.9607104449826838, 1.8293981130981023],
 [2.001626059409397, 1.7815077539441087],
 [2.0286935704191, 1.7321715193480243],
 [2.0522055690695757, 1.686138097785071],
 [2.0736381185747943, 1.6437403254745735],
 [2.0933134526016604, 1.6047617600958677],
 [2.111393711486754, 1.5689357968513453],
 [2.1280105514943686, 1.5360086358936902],
 [2.143282725696795, 1.505745878510758]]

In [19]:

lr = 0.2
model = BetterLR().to(device)
optimizer = SGD(model.parameters(), lr=lr)

msg = MyLogger()
mm = ModelTemplete(model, MSELoss(reduction='mean'), optimizer, msg)

plist= mm.train(train_loader)
plist

array([[1.82770285, 1.83681936],
       [1.96071044, 1.82939811],
       [2.00162606, 1.78150775],
       [2.02869357, 1.73217152],
       [2.05220557, 1.6861381 ],
       [2.07363812, 1.64374033],
       [2.09331345, 1.60476176],
       [2.11139371, 1.5689358 ],
       [2.12801055, 1.53600864],
       [2.14328273, 1.50574588]])

In [12]:
msg

<__main__.MyLogger at 0x1758c1c1d00>

In [20]:
msg.p


[[1.4592897136116971, 1.7077316274780026],
 [1.7044490155760934, 1.7947310020972977],
 [1.8277028504755573, 1.8368193572906044],
 [1.8909203007898212, 1.8402628949865438],
 [1.9407059634895623, 1.8393113070683942],
 [1.9607104449826838, 1.8293981130981023],
 [1.9721568268405247, 1.808159321590578],
 [1.9949517748218504, 1.7966599019140075],
 [2.001626059409397, 1.7815077539441087],
 [2.0059469002246373, 1.7587124028857222],
 [2.023703089590172, 1.7466251734420466],
 [2.0286935704191, 1.7321715193480243],
 [2.031707101072717, 1.7106830383163631],
 [2.047393452747219, 1.6992354308697957],
 [2.0522055690695757, 1.686138097785071],
 [2.054696698804196, 1.6662054120350336],
 [2.0688125374860906, 1.6554985363101928],
 [2.0736381185747943, 1.6437403254745735],
 [2.07573710879244, 1.6252834946918484],
 [2.0884525122952433, 1.6152772174641685],
 [2.0933134526016604, 1.6047617600958677],
 [2.0950634349581234, 1.5876673135589225],
 [2.106497295311565, 1.5783075093083911],
 [2.111393711486754, 1.5