In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

In [2]:
import torch
import augdataset
import loss, train_utils, models
import utils

In [3]:
import logging
logging.basicConfig(filename='train_status.log',
                            filemode='a',
                            format='%(asctime)s %(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.INFO)

In [4]:
# get the trainig data
train_dataloader = augdataset.get_train_dl(dirs=100*['../digitization/data/fin_docs/pdf_images/*.*'])

Number of Images:  139400
Compose(
    Resize(size=(150, 150), interpolation=bilinear, max_size=None, antialias=None)
    RandomResizedCrop(size=(150, 150), scale=(0.2, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear)
    RandomHorizontalFlip(p=0.5)
    RandomVerticalFlip(p=0.5)
    RandomApply(
    p=0.8
    ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.6, 1.4], hue=[-0.1, 0.1])
)
    RandomGrayscale(p=0.2)
    ToTensor()
    Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
)


In [5]:
# import model 
# output feature size = 128
model = models.Model(features_dim=128)
model = model.cuda()
# resume training
# model = torch.load('models/modelv1-Copy1.0.pt')



In [6]:
# for param in model.parameters():
#     print(param.requires_grad)# = True

In [7]:
v = 2
# SimCLR loss https://arxiv.org/pdf/2002.05709.pdf
criterion = loss.SimCLRLoss(temperature=0.1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# decay the learning rate by 0.1 after every 500 epochs
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 200, gamma=0.1, last_epoch=-1,)

# decay by .1 if no improvement for 100 epochs
scheduler = utils.ReduceLROnPlateauWithBacktrack(optimizer, model, filename=f'models/modelv{v}.pt', factor=0.1, verbose=False, patience=100, warmup_steps=0, eps=1e-8)
n_epochs = 100

In [None]:
%%time
import time
import tqdm

start = time.time()
# train model
for epoch in tqdm.tqdm(range(n_epochs)):
    
    total_loss = train_utils.train(train_dataloader, model, criterion, optimizer)
    
    print(f"Epoch {epoch} total_loss {total_loss} lr : {optimizer.param_groups[0]['lr']}", )
    
    logging.info(f"modelv{v} Epoch {epoch} total_loss {total_loss} lr {optimizer.param_groups[0]['lr']}")
    scheduler.step([-total_loss])
    torch.save([model, optimizer], f'models/modelv{v}_curr.pt')
end = time.time()

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch 0 total_loss 1788.3089561462402 lr : 0.001


  1%|          | 1/100 [18:28<30:28:44, 1108.33s/it]

Epoch 1 total_loss 1545.2999470233917 lr : 0.001


  2%|▏         | 2/100 [38:30<31:40:39, 1163.66s/it]

Epoch 2 total_loss 1369.3499193191528 lr : 0.001


  3%|▎         | 3/100 [58:31<31:48:28, 1180.50s/it]

minibatch: 95 running_loss: 4.8350081443786625

In [None]:
print((end-start) * 10**3, "ms")
