In [1]:
import os
import random

import numpy as np
import pandas as pd
import scipy as sc
from scipy.io import wavfile
from scipy import signal
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torchvision.transforms import Compose

import tensorboardX
from tqdm import tqdm

import matplotlib.pyplot as plt

In [2]:
## TRY TO ADD DROPOUT

class FaceSubnet(nn.Module):

    def __init__(self, seed=13):
        super(FaceSubnet, self).__init__()
#         torch.manual_seed(seed)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(num_features=96)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=2, padding=2)
        self.bn2 = nn.BatchNorm2d(num_features=256)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=256)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(num_features=256)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(num_features=256)
        
        self.fc6 = nn.Linear(in_features=256 * 7 * 7, out_features=4096)
        self.fc7 = nn.Linear(in_features=4096, out_features=1024)
        self.fc8 = nn.Linear(in_features=1024, out_features=256)
        
        self.mpool = nn.MaxPool2d(kernel_size=2)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        B, C, H, W = x.size()
        
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.mpool(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.mpool(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.mpool(x)
        
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc6(x))
        x = self.relu(self.fc7(x))
        x = self.fc8(x)
        
        return F.normalize(x)

## TRY TO REMOVE DROPOUT

class VoiceSubnet(nn.Module):

    def __init__(self, seed=13):
        super(VoiceSubnet, self).__init__()
#         torch.manual_seed(seed)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=96, kernel_size=7, stride=2, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=96)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(num_features=256)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=256)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(num_features=256)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(num_features=256)
        
        self.bn6 = nn.BatchNorm2d(num_features=4096)
        
        self.fc7 = nn.Linear(in_features=4096, out_features=1024)
        self.fc8 = nn.Linear(in_features=1024, out_features=256)
        
        self.relu = nn.ReLU()
        self.mpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.mpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.mpool5 = nn.MaxPool2d(kernel_size=(5, 3), stride=(3, 2))
        
        # Conv2d with weights of size (H, 1) is identical to FC with H weights
        self.fc6 = nn.Conv2d(in_channels=256, out_channels=4096, kernel_size=(9, 1))
        
    def forward(self, x):
        B, C, H, W = x.size()

        x = self.relu(self.bn1(self.conv1(x)))
        x = self.mpool1(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.mpool2(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.mpool5(x)

        B, C, H, W = x.size()
        self.apool6 = nn.AvgPool2d(kernel_size=(1, W))

        x = self.relu(self.bn6(self.fc6(x)))
        x = self.apool6(x)

        x = x.view(x.size(0), -1)
        x = self.relu(self.fc7(x))
        x = self.fc8(x)
        
        return F.normalize(x)
    
class CurriculumMining(nn.Module):

    def __init__(self):
        super(CurriculumMining, self).__init__()
        
    def forward(self, positive_pairs, tau):
        faces, voices = positive_pairs
        B, D = faces.size()
        # calc dist 
        # (X - Y) ^ 2 = X^2 + Y^2 - 2XY
        x = (faces**2).sum(dim=1).view(-1, 1) + (voices**2).sum(dim=1) - 2*faces.matmul(voices.t())
        dists = x.sqrt()
        
        sorted_dist, sorted_idx = torch.sort(dists, dim=1, descending=True)
        Dnj = sorted_dist - dists.diag().view(-1, 1)
        idx_threshold = round(tau * (B-1))
        
        # tricky part
        mask = torch.ones_like(sorted_dist)
        mask[:, idx_threshold+1:] = 0
        mask[Dnj <= 0] = 0
        idx_of_sorted_idx = ((mask).sum(dim=1) - 1).abs().long()
        neg_samples_idx = torch.gather(sorted_idx, dim=1, index=idx_of_sorted_idx.view(B, 1))
        neg_samples_idx = neg_samples_idx.view(B)
        negative_voices = voices[neg_samples_idx]
        
        return (faces, negative_voices)

class ContrastiveLoss(nn.Module):

    def __init__(self):
        super(ContrastiveLoss, self).__init__()
        
    def forward(self, positive_pairs, negative_pairs, margin):
        ## POSITIVE PART
        faces, voices = positive_pairs
#         dists_pos = ((faces - voices) ** 2).sum(dim=1).sqrt()
#         pos_part = dists_pos ** 2
        pos_part = ((faces - voices) ** 2).sum(dim=1)
    
        ## NEGATIVE PART
        faces, voices = negative_pairs
        dists_neg = ((faces - voices) ** 2).sum(dim=1).sqrt()
        neg_part = (margin - dists_neg).clamp(0) ** 2
        
        loss4pair = torch.cat([pos_part, neg_part])
        
        ## CALCULATE LOSS
        B, D = faces.size()
        batch_loss = loss4pair.sum() / (2 * B)
    
        return batch_loss

class LearnablePinsNet(nn.Module):

    def __init__(self):
        super(LearnablePinsNet, self).__init__()
        self.face_subnet = FaceSubnet()
        self.voice_subnet = VoiceSubnet()
        self.curr_mining = CurriculumMining()
        
    def forward(self, frames, log_specs, tau=None):
        emb_f = self.face_subnet(frames)
        emb_v = self.voice_subnet(log_specs)
        
        if self.training:
            positive_pairs = (emb_f, emb_v)
            negative_pairs = self.curr_mining(positive_pairs, tau)

            return positive_pairs, negative_pairs
        
        else:
            return emb_f, emb_v

In [3]:
class Normalize(object):
    """Normalizes both face (mean) and voice spectogram (mean-varience)"""
    
    def __call__(self, sample):
        frame, log_spectogram = sample

        ## FACE (H, W, C)
        # mean normalization for every image (not batch)
        mu = frame.mean(axis=(0, 1))
        frame = frame - mu
        
        ## VOICE (Freq, Time)
        # mean-variance normalization for every spectogram (not batch)
        mu = log_spectogram.mean(axis=1).reshape(512, 1)
        sigma = log_spectogram.std(axis=1).reshape(512, 1)
        log_spectogram = (log_spectogram - mu) / sigma

        return frame, log_spectogram

class RandomHorizontalFlip(object):
    '''Horizontally flip the given Image ndarray randomly with a given probability.'''
    
    def __init__(self, p=0.5):
        self.p = p
    
    def __call__(self, sample):
        frame, log_spectogram = sample

        if random.random() < self.p:
            return cv2.flip(frame, 1), log_spectogram
        
        return frame, log_spectogram

class ColorJittering(object):
    '''Given Image ndarray performs brightness and 
    saturation jittering. It is not mentioned in the paper but I guess 
    the authors used MatConvNet but do not mention any specific augmentation
    parameters. So, I made my wind guess regarding the parameters and implemented
    augmentation in the following fashion as in there:
    http://www.vlfeat.org/matconvnet/mfiles/vl_imreadjpeg/
    and the Section 3.5 of the manual
    http://www.vlfeat.org/matconvnet/matconvnet-manual.pdf'''
    
    def __init__(self, brightness=[255/25.5, 255/25.5, 255/25.5], saturation=0.5):
        # brightness
        self.B = np.array(brightness, dtype=np.float32)
        # saturation
        self.S = saturation
    
    def __call__(self, sample):
        frame, log_spectogram = sample
        
        # brightness
        # gives an error w/o float32 -- normal() returns float64
        w = np.float32(np.random.normal(size=3))
        b = self.B * w
        frame = np.clip(frame + b, 0, 255)
        
        # saturation
        sigma = np.random.uniform(1-self.S, 1+self.S)
        frame = sigma * frame + (1-sigma) / 3 * frame.sum(axis=2, keepdims=True)
        frame = np.clip(frame, 0, 255)
        
        return frame, log_spectogram
    
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""
    
    def __call__(self, sample):
        frame, log_spectogram = sample
        F, T = log_spectogram.shape

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        frame = frame.transpose((2, 0, 1))
        
        # now log_specs are of size (Freq, Time) 2D but has to be 3D
        log_spectogram = log_spectogram.reshape(1, F, T)

        return torch.from_numpy(frame), torch.from_numpy(log_spectogram)

In [4]:
class AudioFrameDatasetTest(Dataset):
    '''Test only'''

    def __init__(self, path_to_data, path_to_pairs, transform=None): 
        self.path_to_data = path_to_data
        self.transform = transform
        self.dataset = pd.read_table(path_to_pairs, sep=',')[['label', 'voice-path', 'face-path']]
        self.dataset.replace({'_000': '/0'}, inplace=True, regex=True)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        label, voice_path, face_path = self.dataset.iloc[idx]
        
        ### VISUAL INPUT
        full_face_path = os.path.join(self.path_to_data, 'video', face_path)
        frame = cv2.cvtColor(cv2.imread(full_face_path), cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (224, 224), interpolation=cv2.INTER_CUBIC)
        
        ### AUDIO INPUT
        # not 1024 as reported in the referenced paper [35] 
        # because it gives 513xN but not 512xN
        FFT_len = 1022
        window = 'hamming'
        full_voice_path = os.path.join(self.path_to_data, 'audio', voice_path)

        sample_rate, samples = wavfile.read(full_voice_path)
        window_width = int(sample_rate * 0.025)
        overlap = int(sample_rate * (0.025 - 0.010))
        audio_segment = samples
        _, _, spectrogram = signal.spectrogram(audio_segment, sample_rate, 
                                               window=window, nfft=FFT_len, 
                                               nperseg=window_width, noverlap=overlap, 
                                               mode='magnitude')
#         log_spectogram = np.log(spectrogram)
        log_spectogram = spectrogram.copy()
        assert sample_rate == 16000
        
        if self.transform:
            frame = frame.astype(np.float32)
            log_spectogram = log_spectogram.astype(np.float32)
            frame, log_spectogram = self.transform((frame, log_spectogram))

        return (label, frame, log_spectogram)

In [5]:
LOG_PATH = '/home/vladimir/storage8tb/logs/LearnablePINs4/'
DATA_PATH = '/home/vladimir/storage8tb/data/voxceleb1/'
FACE_SUBNET_SNAPSHOT_PATH = os.path.join(LOG_PATH, 'face_subnet_snapshot.txt')
VOICE_SUBNET_SNAPSHOT_PATH = os.path.join(LOG_PATH, 'voice_subnet_snapshot.txt')
DEVICES = [0, 2]
# since audio tracks may have different lengths the computation cannot be 
# made in batches larger than 1 per device
B = len(DEVICES)
B = 1
# https://discuss.pytorch.org/t/guidelines-for-assigning-num-workers-to-dataloader/813/5
NUM_WORKERS = 4 * len(DEVICES)
MARGIN = 0.6

TEST_PATH = os.path.join(DATA_PATH, 'testpairs/testpairs')
TEST_RANDOM_SH = os.path.join(TEST_PATH, 'veriflist_test_random_seenheard.txt')
TEST_RANDOM_UU = os.path.join(TEST_PATH, 'veriflist_test_random_unseenunheard.txt')

In [6]:
test_transform = Compose([
    Normalize(),
    ToTensor(),
])

test = AudioFrameDatasetTest(DATA_PATH, TEST_RANDOM_SH, transform=test_transform)
testloader = torch.utils.data.DataLoader(test, batch_size=B, num_workers=NUM_WORKERS)

In [7]:
net = LearnablePinsNet()
net.face_subnet.load_state_dict(torch.load(FACE_SUBNET_SNAPSHOT_PATH))
net.voice_subnet.load_state_dict(torch.load(VOICE_SUBNET_SNAPSHOT_PATH))

In [8]:
device = "cuda"
torch.cuda.set_device(DEVICES[0])
net.to(device);
# net = nn.DataParallel(net, DEVICES)

In [9]:
net.eval()

results = pd.DataFrame(columns=['predict', 'label'])

for i, (labels, frames, log_specs) in tqdm(enumerate(testloader)):
    frames, log_specs = frames.cuda(async=True), log_specs.cuda(async=True)
    
    emb_f, emb_v = net(frames, log_specs)
    
    dists = torch.pairwise_distance(emb_f, emb_v)
    
    predict = dists < MARGIN
    
    batch_preds_labels = pd.DataFrame([
        pd.Series(predict, name='predict'), 
        pd.Series(labels, name='label')
    ]).transpose()
    
    results = results.append(batch_preds_labels, ignore_index=True)

5it [00:00, 32.75it/s]

tensor([0.3202], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3194], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3223], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3209], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0305], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0359], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0296], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0295], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3176], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3168], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3199], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3158], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3215], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3151], device='cuda:0', grad_fn=<NormBackward1>)


23it [00:00, 47.88it/s]

tensor([0.3216], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3211], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0342], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0445], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0357], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0400], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3178], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3243], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3170], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3171], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3107], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3212], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3113], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3148], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0286], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0272], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0269], device='cuda:0', grad_fn=<NormBackward1

42it [00:00, 62.82it/s]

tensor([0.0277], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3098], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3195], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3101], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3161], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3114], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3174], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3118], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3123], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3210], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3214], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3247], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3210], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0440], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0473], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0432], device='cuda:0', grad_fn=<NormBackward1>)


59it [00:00, 70.31it/s]

tensor([0.0450], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3229], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3160], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3205], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3151], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1585], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1573], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1586], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1599], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0263], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0389], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0328], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0293], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3180], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3152], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3195], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3109], device='cuda:0', grad_fn=<NormBackward1

81it [00:00, 84.54it/s]

tensor([0.2275], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0861], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0818], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0833], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0824], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0299], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0366], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0326], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0361], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1143], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1088], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1121], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.1121], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3187], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3114], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3191], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3167], device='cuda:0', grad_fn=<NormBackward1

102it [00:01, 91.72it/s]

tensor([0.3170], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3113], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3165], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3112], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3187], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3152], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3188], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3129], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3189], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3227], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3181], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3157], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3198], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3171], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3202], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3106], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3236], device='cuda:0', grad_fn=<NormBackward1

122it [00:01, 94.41it/s]

tensor([0.3229], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3174], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3246], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3186], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3215], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3155], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3198], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3114], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3184], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3119], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3207], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3151], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0291], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0296], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0289], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.0295], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3172], device='cuda:0', grad_fn=<NormBackward1

145it [00:01, 101.13it/s]

tensor([0.3185], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3115], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3130], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3247], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3265], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3177], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3220], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3144], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3229], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3137], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3173], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3153], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3141], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3127], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3161], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3146], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3172], device='cuda:0', grad_fn=<NormBackward1

167it [00:01, 103.88it/s]

tensor([0.3147], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3180], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3185], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3210], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3111], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3146], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3139], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3198], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3200], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3147], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3213], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3224], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3240], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3174], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3211], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3187], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3199], device='cuda:0', grad_fn=<NormBackward1

189it [00:02, 98.43it/s] 

tensor([0.3214], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3237], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3192], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3184], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3137], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3190], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3175], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3281], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3232], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3270], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3274], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3212], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3163], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3196], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3122], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3209], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3149], device='cuda:0', grad_fn=<NormBackward1

209it [00:02, 89.33it/s]

tensor([0.3122], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3201], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3164], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3197], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3162], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3206], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3139], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3155], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3141], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3185], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3105], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3240], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3180], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3238], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3193], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3207], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3242], device='cuda:0', grad_fn=<NormBackward1

230it [00:02, 94.43it/s]

tensor([0.3214], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3197], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3053], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3000], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3092], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3016], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3213], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3196], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3182], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3168], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3176], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3118], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3164], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3116], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3176], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3158], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3185], device='cuda:0', grad_fn=<NormBackward1

252it [00:02, 99.64it/s]

tensor([0.3152], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3104], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3177], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3196], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3164], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3146], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3189], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3171], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3183], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3129], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3182], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3134], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3218], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3173], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3183], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3108], device='cuda:0', grad_fn=<NormBackward1>)
tensor([0.3138], device='cuda:0', grad_fn=<NormBackward1

Process Process-7:
Process Process-1:
Process Process-5:
Process Process-6:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process

KeyboardInterrupt: 