In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import DataParallel

import time
import os
import numpy as np
import json
import cv2
from PIL import Image, ImageOps
import random
from tqdm import tqdm
import operator
import itertools
from scipy.io import  loadmat
import logging
from scipy import signal

from utils import data_transforms
from utils import get_paste_kernel, kernel_map
from utils_logging import setup_logger

from models.gazenet import GazeNet
from dataloading import GazeDataset
from models.__init__ import save_checkpoint, resume_checkpoint
from training.train_gazenet import train, test
from training.train_gazenet import StagedOptimizer

In [2]:
logger = setup_logger(name='first_logger', 
                      log_dir ='./logs/',
                      log_file='train.log',
                      log_format = '%(asctime)s %(levelname)s %(message)s',
                      verbose=True)

In [3]:
#logger.info("Log test if working again")

In [4]:
#main
    
batch_size = 32

train_set = GazeDataset(root_dir='/home/eee198/Documents/datasets/GazeFollowData/',
                        mat_file='/home/eee198/Documents/datasets/GazeFollowData/train_annotations.mat',
                        training='train')
train_data_loader = DataLoader(train_set, batch_size=batch_size,
                               shuffle=True, num_workers=16)

test_set = GazeDataset(root_dir='/home/eee198/Documents/datasets/GazeFollowData/',
                       mat_file='/home/eee198/Documents/datasets/GazeFollowData/test_annotations.mat',
                       training='test')
test_data_loader = DataLoader(test_set, batch_size=batch_size//2,
                              shuffle=False, num_workers=8)

net = GazeNet()
net = DataParallel(net)
net.cuda()

DataParallel(
  (module): GazeNet(
    (face_net): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace)

In [5]:
start_epoch = 0
method = 'Adam'
learning_rate = 0.0001
max_epoch = 25

staged_opt = StagedOptimizer(net, learning_rate)
optimizer = staged_opt.get_optimizer()

resume_training = True
resume_path = './saved_models/temp/model_epoch25.pth.tar'
if resume_training :
    net, optimizer = resume_checkpoint(net, optimizer, resume_path)
    test(net, test_data_loader,logger)
    start_epoch = 25

  0%|          | 0/299 [00:00<?, ?it/s]

=> loading checkpoint './saved_models/temp/model_epoch25.pth.tar'
=>Optimizer has different parameter groups. Usually this will occur for staged optimizers (GazeNet, GazeMask)
=> loaded checkpoint './saved_models/temp/model_epoch25.pth.tar' (epoch 26)


  map(lambda x: Variable(x.cuda(), volatile=True), [image, face_image, gaze_field, eye_position, gt_position, gt_heatmap])
loss: 0.06168, 0.11927, 0.18096
  0%|          | 1/299 [00:01<07:16,  1.46s/it]loss: 0.05558, 0.11232, 0.16790
  1%|          | 2/299 [00:01<05:17,  1.07s/it]loss: 0.06302, 0.11029, 0.17331
  1%|          | 3/299 [00:01<03:56,  1.25it/s]loss: 0.06081, 0.08591, 0.14672
  1%|▏         | 4/299 [00:01<02:55,  1.69it/s]loss: 0.05679, 0.03788, 0.09466
  2%|▏         | 5/299 [00:02<02:13,  2.20it/s]loss: 0.05022, 0.08001, 0.13022
loss: 0.06150, 0.17127, 0.23277
  2%|▏         | 7/299 [00:02<01:41,  2.89it/s]loss: 0.05873, 0.06466, 0.12339
  3%|▎         | 8/299 [00:02<01:22,  3.51it/s]loss: 0.05862, 0.08342, 0.14204
  3%|▎         | 9/299 [00:02<01:08,  4.20it/s]loss: 0.05867, 0.03987, 0.09854
  3%|▎         | 10/299 [00:02<00:59,  4.82it/s]loss: 0.05301, 0.19217, 0.24518
  4%|▎         | 11/299 [00:02<00:50,  5.69it/s]loss: 0.05280, 0.05494, 0.10774
  4%|▍         | 12/2

loss: 0.05783, 0.05266, 0.11049
 37%|███▋      | 112/299 [00:14<00:18, 10.25it/s]loss: 0.06575, 0.09278, 0.15853
loss: 0.05508, 0.10581, 0.16089
 38%|███▊      | 114/299 [00:14<00:19,  9.73it/s]loss: 0.06422, 0.15661, 0.22083
loss: 0.05597, 0.06377, 0.11974
 39%|███▉      | 116/299 [00:14<00:19,  9.57it/s]loss: 0.05769, 0.17179, 0.22949
 39%|███▉      | 117/299 [00:14<00:19,  9.19it/s]loss: 0.06515, 0.15367, 0.21883
 39%|███▉      | 118/299 [00:15<00:20,  8.95it/s]loss: 0.07159, 0.08580, 0.15738
 40%|███▉      | 119/299 [00:15<00:21,  8.54it/s]loss: 0.06403, 0.07940, 0.14343
loss: 0.04949, 0.09425, 0.14375
 40%|████      | 121/299 [00:15<00:19,  9.24it/s]loss: 0.06140, 0.06566, 0.12707
 41%|████      | 122/299 [00:15<00:19,  8.92it/s]loss: 0.05325, 0.08707, 0.14032
 41%|████      | 123/299 [00:15<00:19,  8.80it/s]loss: 0.05950, 0.14623, 0.20573
 41%|████▏     | 124/299 [00:15<00:20,  8.53it/s]loss: 0.06310, 0.09676, 0.15986
 42%|████▏     | 125/299 [00:15<00:19,  8.79it/s]loss: 0.06562

 76%|███████▋  | 228/299 [00:27<00:08,  8.20it/s]loss: 0.06244, 0.12918, 0.19162
loss: 0.05495, 0.03696, 0.09191
 77%|███████▋  | 230/299 [00:27<00:08,  8.35it/s]loss: 0.05678, 0.02971, 0.08649
 77%|███████▋  | 231/299 [00:28<00:07,  8.64it/s]loss: 0.05846, 0.20658, 0.26504
 78%|███████▊  | 232/299 [00:28<00:08,  8.09it/s]loss: 0.05543, 0.09233, 0.14775
 78%|███████▊  | 233/299 [00:28<00:07,  8.45it/s]loss: 0.06065, 0.03415, 0.09480
 78%|███████▊  | 234/299 [00:28<00:08,  8.01it/s]loss: 0.05639, 0.09595, 0.15234
 79%|███████▊  | 235/299 [00:28<00:07,  8.49it/s]loss: 0.05240, 0.06232, 0.11472
 79%|███████▉  | 236/299 [00:28<00:07,  8.73it/s]loss: 0.05126, 0.01507, 0.06633
loss: 0.06385, 0.04377, 0.10762
 80%|███████▉  | 238/299 [00:28<00:06,  8.75it/s]loss: 0.06061, 0.15759, 0.21820
loss: 0.05772, 0.11474, 0.17246
 80%|████████  | 240/299 [00:29<00:06,  8.95it/s]loss: 0.05551, 0.14812, 0.20362
 81%|████████  | 241/299 [00:29<00:06,  9.10it/s]loss: 0.06351, 0.09309, 0.15660
loss: 0.05209

In [8]:
staged_opt = StagedOptimizer(net, learning_rate)
optimizer = staged_opt.update(24)
save_path = './saved_models/temp/'
save_checkpoint(net, optimizer, 24+1, save_path)

In [6]:
staged_opt = StagedOptimizer(net, learning_rate)

for epoch in range(start_epoch, max_epoch):
    
    # Update optimizer
    optimizer = staged_opt.update(epoch)

    # Train model
    train(net, train_data_loader, optimizer, epoch, logger)

    # Save model and optimizer
    if epoch > max_epoch-5:
        save_path = './saved_models/temp/'
        save_checkpoint(net, optimizer, epoch+1, save_path)
    
    # Evaluate model
    test(net, test_data_loader, logger)

  1%|          | 32/3924 [00:14<29:58,  2.16it/s] 


KeyboardInterrupt: 