In [1]:
from __future__ import print_function
from __future__ import division

import numpy as np
import argparse
import random
import shutil
import time
import warnings


import torch
import torch.nn as nn

import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim as optim

import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed

import torchvision
from torchvision import datasets, models, transforms
import time
import os
import sys
import copy

In [2]:
#os.environ["WORLD_SIZE"]?

In [3]:
model_names = sorted(name for name in models.__dict__
    if name.islower() and not name.startswith("__")
    and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('data', metavar='DIR',
                    help='path to dataset')
parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
                    choices=model_names,
                    help='model architecture: ' +
                        ' | '.join(model_names) +
                        ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N',
                    help='mini-batch size (default: 256), this is the total '
                         'batch size of all GPUs on the current node when '
                         'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('-p', '--print-freq', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                    help='use pre-trained model')
parser.add_argument('--world-size', default=-1, type=int,
                    help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
                    help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
                    help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
                    help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
                    help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
                    help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
                    help='Use multi-processing distributed training to launch '
                         'N processes per node, which has N GPUs. This is the '
                         'fastest way to use PyTorch for either single node or '
                         'multi node data parallel training')

_StoreTrueAction(option_strings=['--multiprocessing-distributed'], dest='multiprocessing_distributed', nargs=0, const=True, default=False, type=None, choices=None, help='Use multi-processing distributed training to launch N processes per node, which has N GPUs. This is the fastest way to use PyTorch for either single node or multi node data parallel training', metavar=None)

# One node, multi-gpu

In [4]:
parser.parse_args("-a resnet50 --lr 0.01 --dist-url 'tcp://127.0.0.1:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 /scratch/by783/DL_Final/ssl_data_96".split())

Namespace(arch='resnet50', batch_size=256, data='/scratch/by783/DL_Final/ssl_data_96', dist_backend="'nccl'", dist_url="'tcp://127.0.0.1:FREEPORT'", epochs=90, evaluate=False, gpu=None, lr=0.01, momentum=0.9, multiprocessing_distributed=True, pretrained=False, print_freq=10, rank=0, resume='', seed=None, start_epoch=0, weight_decay=0.0001, workers=4, world_size=1)

# Multiple node, multi-gpu

In [5]:
parser.parse_args("-a resnet50 --lr 0.01 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 2 --rank 0 /scratch/by783/DL_Final/ssl_data_96".split())

Namespace(arch='resnet50', batch_size=256, data='/scratch/by783/DL_Final/ssl_data_96', dist_backend="'nccl'", dist_url="'tcp://IP_OF_NODE0:FREEPORT'", epochs=90, evaluate=False, gpu=None, lr=0.01, momentum=0.9, multiprocessing_distributed=True, pretrained=False, print_freq=10, rank=0, resume='', seed=None, start_epoch=0, weight_decay=0.0001, workers=4, world_size=2)

In [6]:
parser.parse_args("-a resnet50 --lr 0.01 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 2 --rank 1 /scratch/by783/DL_Final/ssl_data_96".split())

Namespace(arch='resnet50', batch_size=256, data='/scratch/by783/DL_Final/ssl_data_96', dist_backend="'nccl'", dist_url="'tcp://IP_OF_NODE0:FREEPORT'", epochs=90, evaluate=False, gpu=None, lr=0.01, momentum=0.9, multiprocessing_distributed=True, pretrained=False, print_freq=10, rank=1, resume='', seed=None, start_epoch=0, weight_decay=0.0001, workers=4, world_size=2)

In [7]:
ngpus_per_node = torch.cuda.device_count()

In [8]:
args_world_size = ngpus_per_node*1

In [9]:
# import torchvision.transforms as transforms
# import torchvision.datasets as datasets
# import torchvision.models as models

In [10]:
save_path='/scratch/by783/DL_Final_models/'+'try'#+'190424_vgg_ae'#args.save
model_name = 'vgg'#args.model
num_epochs = 2 #args.epochs
feature_extract = True # str2bool(args.pretrained)

###################### fixed_params ###################################

num_classes = 1000
loader_image_path='/scratch/by783/DL_Final/ssl_data_96'
loader_batch_size=256

In [11]:
def image_loader(path, batch_size):
    transform = transforms.Compose(
        [
            #transforms.Resize(input_size),
            #transforms.CenterCrop(input_size),
            # use model fitted with the image size, so no need to resize
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            # https://pytorch.org/docs/stable/torchvision/transforms.html
            # [mean],[std] for different channels
        ]
    )
    sup_train_data = datasets.ImageFolder('{}/{}/train'.format(path, 'supervised'), transform=transform)
    sup_val_data = datasets.ImageFolder('{}/{}/val'.format(path, 'supervised'), transform=transform)
    unsup_data = datasets.ImageFolder('{}/{}/'.format(path, 'unsupervised'), transform=transform)
    # source code: https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
    # Main idea:
    data_loader_sup_train = torch.utils.data.DataLoader(
        sup_train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )
    data_loader_sup_val = torch.utils.data.DataLoader(
        sup_val_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )
    data_loader_unsup = torch.utils.data.DataLoader(
        unsup_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )

    print('sup_train_data.class_to_idx==sup_val_data.class_to_idx: ',
          sup_train_data.class_to_idx == sup_val_data.class_to_idx)

    return data_loader_sup_train, data_loader_sup_val, data_loader_unsup, sup_train_data.class_to_idx

In [12]:
# https://stackoverflow.com/questions/37837682/python-class-input-argument/37837766
# https://github.com/awentzonline/pytorch-cns/blob/master/examples/vggmse.py

class Model_Based_Autoencoder(torch.nn.Module):
    def __init__(self,model_name, pretrained):
        super(Model_Based_Autoencoder, self).__init__()
        if model_name!='vgg':
            sys.stdout.write('Dear, we only support vgg now...')
        
        self.encoder = models.vgg11_bn(pretrained=pretrained).features
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512,512,kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),#de-conv8
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(512,512,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),#de-conv7
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(512,512,kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),#de-conv6
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(512,256,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),#de-conv5
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(256,256,kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),#de-conv4
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(256,128,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),#de-conv3
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(128,64,kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),#de-conv2
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64,3,kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),#de-conv1
            nn.BatchNorm2d(3),
            nn.Tanh()
        )
    
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
            
    

In [13]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False




def initialize_model(model_name, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0



    if model_name != "vgg":
        sys.stdout.write('We only have vgg now!!!')
    else:
        """ VGG11_bn
        """
        model_ft = Model_Based_Autoencoder('vgg', pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft.encoder, feature_extract)

        input_size = 96

    return model_ft, input_size

In [14]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    #unsupervised learning, we do not need train and vals
    since = time.time()
    loss_history=[]
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        ################# train the model on unsupervised data ############
        model.train()
        
        running_loss = 0.0
        
        for inputs, _ in dataloaders['unlabeled']:
            inputs = inputs.to(device)
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            
            running_loss += loss.item() * inputs.size(0)
            
            loss.backward()
            optimizer.step()
            
        epoch_loss = running_loss / len(dataloaders['unlabeled'].dataset)
        sys.stdout.write('Training time: {:.0f}s'.format( time.time() - since ))
        sys.stdout.write('Training loss: {:.4f}'.format(epoch_loss))
        ################# evaluate the model performance on labeled data ############
        
        model.eval()
        
        eval_loss=0.0
        for inputs, _ in dataloaders['labeled']:
            inputs = inputs.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            eval_loss += loss.item() * inputs.size(0)
            
        epoch_eval_loss = running_loss / len(dataloaders['labeled'].dataset)
        sys.stdout.write('Evaluation time: {:.0f}s'.format( time.time() - since ))        
        sys.stdout.write(' Eval loss: {:.4f}'.format( epoch_eval_loss))
        
        #################
        
        loss_history.append( ( epoch_loss,epoch_eval_loss ) )
        
        
        if epoch_eval_loss < best_loss:
            best_loss = epoch_eval_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            with open(save_path, 'wb') as f:
                torch.save(model, f)
            
        with open(save_path+'_val_acc', 'w') as f:
            for item in loss_history:
                f.write("unlabeled: %s, labeled: %s \n,  " % (item[0],item[1]) )
    
    
    
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    return model, loss_history

In [15]:
torch.cuda.device_count()

2

In [16]:
use_pretrained=True

model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_ft.encoder[0].weight.requires_grad

model_ft.decoder[0].weight.requires_grad

if torch.cuda.device_count() > 1:
    model_ft = nn.DataParallel(model_ft)

model_ft = model_ft.to(device)

In [17]:
criterion = nn.MSELoss()

learning_rate=0.001
optimizer_ft = torch.optim.Adam(model_ft.parameters(), lr=learning_rate, weight_decay=1e-5)


In [18]:
####### load data, input_size is used ####

sys.stdout.write('Begin to load data...')

dataloaders={}

dataloaders['unlabeled'], dataloaders['labeled'], data_loader_unsup, class_to_idx_dict = image_loader(loader_image_path,loader_batch_size)


Begin to load data...sup_train_data.class_to_idx==sup_val_data.class_to_idx:  True


In [None]:
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            sys.stdout.write("\t{}".format(name))
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            sys.stdout.write("\t{}".format(name))

Params to learn:
	module.decoder.0.weight	module.decoder.0.bias	module.decoder.1.weight	module.decoder.1.bias	module.decoder.3.weight	module.decoder.3.bias	module.decoder.4.weight	module.decoder.4.bias	module.decoder.6.weight	module.decoder.6.bias	module.decoder.7.weight	module.decoder.7.bias	module.decoder.9.weight	module.decoder.9.bias	module.decoder.10.weight	module.decoder.10.bias	module.decoder.12.weight	module.decoder.12.bias	module.decoder.13.weight	module.decoder.13.bias	module.decoder.15.weight	module.decoder.15.bias	module.decoder.16.weight	module.decoder.16.bias	module.decoder.18.weight	module.decoder.18.bias	module.decoder.19.weight	module.decoder.19.bias	module.decoder.21.weight	module.decoder.21.bias	module.decoder.22.weight	module.decoder.22.bias

In [None]:
model_ft, hist = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

Epoch 0/1
----------
Training time: 1563sTraining loss: 1.3153

In [None]:
print(adsfa)

In [None]:
for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = Variable(img).cuda()
        # ===================forward=====================
        output = model(img)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, loss.data[0]))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './dc_img/image_{}.png'.format(epoch))

torch.save(model.state_dict(), './conv_autoencoder.pth')