## Check pytorch pipeline

In [1]:
# !cd ../../../ramp-workflow/ && python setup.py install

In [2]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [3]:
import os, sys

In [4]:
sys.path.append("../submissions")
sys.path.append("..")

In [5]:
from rampwf.workflows.simplified_image_classifier import ImageLoader
from problem import get_cv, get_train_data, get_test_data

In [6]:
from pytorch_DenseNet161_finetunning_imbalanced2.image_classifier import ImageClassifier, DenseNet161PollenatingInsects

HAS_GPU: False


In [7]:
folder_X_array, y_array = get_train_data(path="..")
cv = list(get_cv(folder_X_array, y_array))

for fold_i, (train_is, valid_is) in enumerate(cv):

    folder, X_array = folder_X_array
    if train_is is None:
        train_is = slice(None, None, None)

    img_loader = ImageLoader(X_array[train_is], y_array[train_is], folder=folder, n_classes=403)

    
# img_loader.setup_backend('opencv')

(folder, X_array), y_array = get_test_data(path="..")
img_loader_test = ImageLoader(X_array, y_array, folder=folder, n_classes=403)


In [None]:
ic = ImageClassifier()

In [1]:
!nvidia-smi

Thu Nov  2 23:49:54 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 375.66                 Driver Version: 375.66                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 0000:02:00.0     Off |                  N/A |
| 55%   85C    P2   282W / 250W |  10747MiB / 11170MiB |    100%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID  Type  Process name                               Usage    

Results:
global import : 3 loops, best of 2: 10.6 s per loop


local import : 

## Check dataflow

In [18]:
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline

In [19]:
def normalize(in_img, q_min=0.5, q_max=99.5, return_mins_maxs=False):
    """
    Normalize image in [0.0, 1.0]
    mins is array of minima
    maxs is array of differences between maxima and minima
    """
    init_shape = in_img.shape
    if len(init_shape) == 2:
        in_img = np.expand_dims(in_img, axis=2)
    w, h, d = in_img.shape
    img = in_img.copy()
    img = np.reshape(img, [w * h, d]).astype(np.float64)
    mins = np.percentile(img, q_min, axis=0)
    maxs = np.percentile(img, q_max, axis=0) - mins
    maxs[(maxs < 0.0001) & (maxs > -0.0001)] = 0.0001
    img = (img - mins[None, :]) / maxs[None, :]
    img = img.clip(0.0, 1.0)
    img = np.reshape(img, [w, h, d])
    if init_shape != img.shape:
        img = img.reshape(init_shape)
    if return_mins_maxs:
        return img, mins, maxs
    return img


def scale_percentile(matrix, q_min=0.5, q_max=99.5):
    is_gray = False
    if len(matrix.shape) == 2:
        is_gray = True
        matrix = matrix.reshape(matrix.shape + (1,))
    matrix = (255 * normalize(matrix, q_min, q_max)).astype(np.uint8)
    if is_gray:
        matrix = matrix.reshape(matrix.shape[:2])
    return matrix

In [20]:
train_batches_ds, val_batches_ds = ic._get_trainval_datasets(img_loader, n_splits=7, batch_size=16, num_workers=2)



In [21]:
from tqdm import tqdm 

In [None]:
max_count = len(img_loader) / 16

with tqdm(total=max_count) as pbar:
    for i, (batch_x, batch_y) in enumerate(train_batches_ds):
        pbar.update(1)
        sys.stdout.flush()
        max_count -= 1 
        if max_count == 0:
            break
        

  0%|          | 0/2917 [00:00<?, ?it/s]

In [None]:
# max_count = 5
# n = 10

# with tqdm(total=max_count) as pbar:
#     for i, (batch_x, batch_y) in enumerate(train_batches_ds):
#         plt.figure(figsize=(16, 4))
#         plt.suptitle("Batch %i" % i)
#         for j in range(len(batch_x)):    
#             if j > 0 and j % n == 0:
#                 plt.figure(figsize=(16, 4))

#             x = batch_x[j, ...].cpu()
#             y = batch_y[j, ...]
#             plt.subplot(1, n, (j % n) + 1)
#             plt.imshow(scale_percentile(x.numpy().transpose([1, 2, 0])))
#             plt.title("Class %i" % y)
#             plt.axis('off')

#         pbar.update(1)
#         sys.stdout.flush()

#         max_count -= 1 
#         if max_count == 0:
#             break
        

In [None]:
max_count = 5
n = 10

for i, (batch_x, batch_y) in enumerate(val_batches_ds):
    
    plt.figure(figsize=(16, 4))
    plt.suptitle("Batch %i" % i)
    for j in range(len(batch_x)):    
        if j > 0 and j % n == 0:
            plt.figure(figsize=(16, 4))
        
        x = batch_x[j, ...].cpu()
        y = batch_y[j, ...]
        plt.subplot(1, n, (j % n) + 1)
        plt.imshow(scale_percentile(x.numpy().transpose([1, 2, 0])))
        plt.title("Class %i" % y)
    
    max_count -= 1 
    if max_count == 0:
        break

## Check training phase

In [9]:
ic.batch_size = 4
ic.n_epochs = 2
ic.n_workers = 2
ic.n_splits = 7

In [10]:
train_batches_ds, val_batches_ds = ic._get_trainval_datasets(img_loader, n_splits=7, batch_size=16, num_workers=2)



In [11]:
from torch import nn
from torch.optim import Adam

criterion = nn.CrossEntropyLoss().cuda()
optimizer = Adam(ic.net.parameters(), lr=0.001)

ic._train_one_epoch(ic.net, train_batches_ds, criterion, optimizer, 0, 1)

Epoch: 1/1: 100%|██████████| 2493/2493 [17:09<00:00,  2.43it/s, Loss 4.5364 | Prec@1 12.011 | Prec@5 31.348]


In [15]:
import torch

HAS_GPU: True


In [19]:
x = torch.randn(3,256, 256)
y = torch.randn(3,257, 256)
z = torch.cat([x,y], 0)

RuntimeError: inconsistent tensor sizes at /pytorch/torch/lib/TH/generic/THTensorMath.c:2709

In [16]:
x = torch.autograd.Variable(x)
y = torch.autograd.Variable(y)
z = torch.cat([x,y], 0)

NameError: name 'x' is not defined

In [9]:
ic.batch_size = 12
ic.n_epochs = 2
ic.n_workers = 2
ic.n_splits = 7

In [10]:
ic.fit(img_loader)

Epoch: 1/2:  34%|###4      | 1134/3324 [06:02<11:40,  3.13it/s, Loss 2.6616 | Prec@1 45.294 | Prec@5 67.812]Process Process-1:
Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self.run()
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
  File "/usr/local/lib/python2.7/dist-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
    r = index_queue.get()
  File "/usr/lib/python2.7/multiprocessing/queues.py", line 378, in get
  File "/usr/lib/pytho

KeyboardInterrupt: 

In [1]:
a = 1

In [8]:
import torch
from torch.autograd import Variable
from torch.nn import AdaptiveAvgPool2d, Linear, Sequential, Module
from torchvision.models import densenet161, DenseNet


class Flatten(Module):
    def forward(self, x):
        return x.view(x.size(0), -1)
    
# net = DenseNet161PollenatingInsects()

In [9]:
densenet = densenet161(pretrained=True)

features = densenet.features

classifier = Sequential(
    AdaptiveAvgPool2d(1),
    Flatten(),
    Linear(densenet.classifier.in_features, 403)
)


In [49]:
x = torch.randn(4, 3, 299, 299)

x = Variable(x, requires_grad=True)

x1 = features(x)
y = classifier(x1)

In [50]:
y.size()

torch.Size([4, 403])

In [32]:
x1.size()

torch.Size([4, 2208, 9, 9])

In [45]:
x2 = AdaptiveAvgPool2d(1)(x1)
x2 = x2.view(x2.size(0), -1)

In [40]:
x2.size()

torch.Size([4, 2208])

In [41]:
densenet.classifier.in_features

2208

In [43]:
x3 = Linear(densenet.classifier.in_features, 403)(x2)

In [44]:
x3.size()

torch.Size([4, 403])

Check test ds

Check DataLoader

In [1]:
import torch
import torch.utils.data

class DS(object):
    def __getitem__(self, idx):
        return torch.rand(3, 640, 640).cuda()
    def __len__(self):
        return 2000

ds = DS()
it = torch.utils.data.DataLoader(ds, batch_size=500, num_workers=1)

for i, data in enumerate(it):
    print(i)

0
1
2
3


In [3]:
import os, sys
sys.path.append("../submissions")
sys.path.append("..")

from pytorch_DenseNet161_finetunning_imbalanced.image_classifier import ResizedDataset, TransformedDataset, OnGPUDataLoader


HAS_GPU: True


In [1]:
import torch
import torch.utils.data

class DS(object):
    def __getitem__(self, idx):
        return torch.rand(3, 640, 640)
    def __len__(self):
        return 2000

ds = DS()
it = torch.utils.data.DataLoader(ds, batch_size=500, num_workers=1)

for i, data in enumerate(it):
    print(i)

0
1
2
3
