In [1]:
from model_runner import ModelRunner
from DBPN import DBPN
from SRCNN_different_specs import RunSRCNN, SRCNN
from custom_image_dataset import CustomImageDataset
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import torch
from torch import nn
from image_helper import ImageHelper

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATASET_ROOT="./datasets/"
DATASET_NAME = "Set14"

In [3]:
%%time
transform_x = transforms.Compose([
    transforms.Resize((8, 8), interpolation=Image.BICUBIC),
])

transform_y = transforms.Compose([
    transforms.CenterCrop((32, 32))
])

# Set up the data loaders
train_data_set = CustomImageDataset(
    img_dir=f"{DATASET_ROOT}/{DATASET_NAME}_train", transform=transform_x, target_transform=transform_y)
validation_data_set = CustomImageDataset(
    img_dir=f"{DATASET_ROOT}/{DATASET_NAME}_validation", transform=transform_x, target_transform=transform_y)

# Report split sizes
print('Training set has {} instances'.format(len(train_data_set)))
print('Validation set has {} instances'.format(len(validation_data_set)))

100%|██████████| 14/14 [00:00<00:00, 67.53it/s]
100%|██████████| 14/14 [00:00<00:00, 280.11it/s]

Training set has 1085 instances
Validation set has 280 instances
CPU times: user 154 ms, sys: 102 ms, total: 256 ms
Wall time: 342 ms





In [4]:
%%time

# https://towardsdatascience.com/7-tips-for-squeezing-maximum-performance-from-pytorch-ca4a40951259
pin_memory = False
batch_size = round(1*(2**13))

train_dataloader = DataLoader(train_data_set, batch_size=batch_size, shuffle=True, pin_memory=pin_memory) # TODO: aumentar batch_size p/ 2^12
validation_dataloader = DataLoader(validation_data_set, batch_size=batch_size, pin_memory=pin_memory)

CPU times: user 10.3 ms, sys: 11.9 ms, total: 22.2 ms
Wall time: 20.9 ms


In [5]:
# The learning rate is initialized to 1e − 4 for all layers and decrease by a factor of 10 for every 5 × 105 iterations for total 106 iterations.
lr = 1e-4
device = "cuda" if torch.cuda.is_available() else "cpu"

model = DBPN().to(device)
model_runner = ModelRunner()

# For optimization, we use Adam with momentum to 0.9 and weight decay to 1e−4.
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.LinearLR(
    optimizer,
    start_factor=1.0,
    end_factor=0.01,
    total_iters=60)

In [11]:
%%time
model_runner.train(
    model=model,
    train_dataloader=train_dataloader,
    validation_dataloader=validation_dataloader,
    optimizer=optimizer,
    epochs=7,
    scheduler=scheduler,
    loss_fn=nn.MSELoss(),
)


epoch 0
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:21<00:00, 21.59s/it]


Training Error: 
 Avg loss: 0.240193 



100%|██████████| 1/1 [00:02<00:00,  2.73s/it]


Validation Error: 
 Avg loss: 0.232363 

Learning rate (antes): 0.0001
Learning rate (depois): 9.835e-05

epoch 1
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:18<00:00, 18.92s/it]


Training Error: 
 Avg loss: 0.234729 



100%|██████████| 1/1 [00:02<00:00,  2.60s/it]


Validation Error: 
 Avg loss: 0.226965 

Learning rate (antes): 9.835e-05
Learning rate (depois): 9.67e-05

epoch 2
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:20<00:00, 20.47s/it]


Training Error: 
 Avg loss: 0.229269 



100%|██████████| 1/1 [00:02<00:00,  2.62s/it]


Validation Error: 
 Avg loss: 0.221248 

Learning rate (antes): 9.67e-05
Learning rate (depois): 9.505e-05

epoch 3
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:16<00:00, 16.73s/it]


Training Error: 
 Avg loss: 0.223482 



100%|██████████| 1/1 [00:02<00:00,  2.55s/it]


Validation Error: 
 Avg loss: 0.214974 

Learning rate (antes): 9.505e-05
Learning rate (depois): 9.340000000000001e-05

epoch 4
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:20<00:00, 20.81s/it]


Training Error: 
 Avg loss: 0.217126 



100%|██████████| 1/1 [00:02<00:00,  2.79s/it]


Validation Error: 
 Avg loss: 0.207894 

Learning rate (antes): 9.340000000000001e-05
Learning rate (depois): 9.175000000000001e-05

epoch 5
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:19<00:00, 19.54s/it]


Training Error: 
 Avg loss: 0.209948 



100%|██████████| 1/1 [00:03<00:00,  3.45s/it]


Validation Error: 
 Avg loss: 0.199772 

Learning rate (antes): 9.175000000000001e-05
Learning rate (depois): 9.010000000000001e-05

epoch 6
-------------------------------
Training on 163 samples...
Batch size: 8192
Number of batches: 1


  0%|          | 0/1 [00:00<?, ?it/s]


X: torch.Size([163, 3, 8, 8])
y: torch.Size([163, 3, 32, 32])
pred: torch.Size([163, 3, 32, 32])



100%|██████████| 1/1 [00:23<00:00, 23.60s/it]


Training Error: 
 Avg loss: 0.201708 



100%|██████████| 1/1 [00:02<00:00,  2.71s/it]

Validation Error: 
 Avg loss: 0.190361 

Learning rate (antes): 9.010000000000001e-05
Learning rate (depois): 8.845000000000001e-05
CPU times: user 16min 54s, sys: 1min 54s, total: 18min 49s
Wall time: 2min 41s





In [12]:
a = torch.randn(2, 2, 3)
a = torch.unsqueeze(a, 0)
a.shape

torch.Size([1, 2, 2, 3])

In [6]:
# input = torch.randn(1, 3, 256, 256)
input = torch.randn([1, 3, 8, 8])
output = model(input)
print(output.shape)

torch.Size([1, 3, 32, 32])


In [7]:
abc = DBPN()

image_helper = ImageHelper()
image_helper.apply_model_to_image_and_show(
    model=model,
    image=f"datasets/Set5_train/patches/baby_patch/patch_0.png",
    downsample_factor=4
)

RuntimeError: Given groups=1, weight of size [64, 128, 1, 1], expected input[1, 64, 264, 132] to have 128 channels, but got 64 channels instead