In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from skimage import io,transform
import torch
from tqdm import tqdm
from torch import nn
import torch.nn.functional as F
import os
import time
from torch.optim import lr_scheduler
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms, utils,models
import copy
from skimage import io, transform

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [0]:
device

device(type='cuda', index=0)

In [0]:
!wget http://memorability.csail.mit.edu/lamem.tar.gz

--2019-12-08 11:28:41--  http://memorability.csail.mit.edu/lamem.tar.gz
Resolving memorability.csail.mit.edu (memorability.csail.mit.edu)... 128.30.195.49
Connecting to memorability.csail.mit.edu (memorability.csail.mit.edu)|128.30.195.49|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2708368436 (2.5G) [application/x-gzip]
Saving to: ‘lamem.tar.gz’


2019-12-08 11:32:07 (12.6 MB/s) - ‘lamem.tar.gz’ saved [2708368436/2708368436]



In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!tar -xf /content/lamem.tar.gz

In [0]:
dataset_train = pd.read_csv("/content/drive/My Drive/image memorability/dataset/train_dataset.csv")
dataset_validation = pd.read_csv("/content/drive/My Drive/image memorability/dataset/validation_dataset.csv")

In [0]:

class AsetheticsDataset(Dataset):
      '''asethitics dataset'''
      def __init__(self,dataframe,root_dir,transform=None):
        """
            Args:
                csv_file (string): Path to the csv file with annotations.
                root_dir (string): Directory with all the images.
                transform (callable, optional): Optional transform to be applied
                    on a sample.
        """
        self.data = dataframe
        #     self.data.rename(columns=columns,inplace=True)
    #     self.data.drop(self.data.columns[[1,2,3,4,5,6,8,9]] , axis=1,inplace=True)
        self.root_dir = root_dir
        self.transform = transform
    
      def __len__(self):
        return len(self.data)
  
      def __getitem__(self,idx):
   
        if torch.is_tensor(idx):
          idx = idx.tolist()
   
        image_name =  os.path.join(self.root_dir,self.data.iloc[idx,0])
        image = io.imread(image_name)
        mem_val = self.data.iloc[idx,1]
#     return_sample={}
        return_sample = {
              'image':image,
              'memorability_score':mem_val 
        }
        if self.transform:
            return_sample = self.transform(return_sample)
    
     
        return return_sample

class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        
        image,mem_val = sample['image'], sample["memorability_score"]
        
        h, w = image.shape[:2]
        
        # if isinstance(self.output_size, int):
        #     if h > w:
        #         new_h, new_w = self.output_size * h / w, self.output_size
        #     else:
        #         new_h, new_w = self.output_size, self.output_size * w / h
        # else:
        #     new_h, new_w = self.output_size

        # new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(image, (self.output_size,self.output_size,3))
        return {'image': img, 'memorability_score': mem_val}
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        
        image, mem_val = sample['image'], sample['memorability_score']
#         print(type(torch.from_numpy(image)))
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
#         print(image.shape)
      
        image = image.transpose((2, 0, 1))
        
        return {'image': torch.from_numpy(image),
                'memorability_score': mem_val}
class Normalize(object):
    def __init__(self,mean,std):
        self.mean=mean
        self.std=std
    def __call__(self,sample):
        image, mem_val = sample["image"], sample["memorability_score"]
        normalized=  (image -self.mean) / self.std
        return {
            "image":normalized,
            "memorability_score" : mem_val
       }

transformed_dataset_train = AsetheticsDataset(dataset_train,root_dir="/content/lamem/images",
                                        transform=transforms.Compose([Rescale(224),ToTensor(),Normalize(0.5,0.5)
                                                          ]))

transformed_dataset_val= AsetheticsDataset(dataset_validation,root_dir="/content/lamem/images",
                                        transform=transforms.Compose([Rescale(224),ToTensor(),Normalize(0.5,0.5)
                                                          ]))

train_dataloader=DataLoader(transformed_dataset_train,batch_size=32,shuffle=True)
validation_dataloader=DataLoader(transformed_dataset_val,batch_size=32,shuffle=True)
dataloaders={
    "train":train_dataloader,
    "val":validation_dataloader
}
dataset_sizes ={
    "train":len(dataset_train),
    "val":len(dataset_validation)
}

In [0]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    since = time.time()
    running_loss_history = []
    val_running_loss_history=[]
    orignal_model=None
    best_model_wts = copy.deepcopy(model.state_dict())
    low_loss = np.inf

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for batched_data in tqdm(dataloaders[phase]):

                inputs=batched_data["image"]
                inputs = inputs.to(device)
                labels=batched_data["memorability_score"]
                labels=labels.view(-1,1).double()
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    print("  batch loss:    ",loss.item())
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            if phase=="train":
              running_loss_history.append(epoch_loss)
            else:
              val_running_loss_history.append(epoch_loss)
            

            print('{} Loss: {:.4f}'.format(
                phase, epoch_loss))
            

            # deep copy the model
            if phase == 'val' and epoch_loss < low_loss:
                print("saving best model......")
                low_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                #torch.save(best_model_wts,"/content/drive/My Drive/image memorability/saved models/resnet50_weights(2).pt")

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # load best model weights
    original_model =copy.deepcopy(model)
    model.load_state_dict(best_model_wts)
    return model,original_model,running_loss_history,val_running_loss_history

In [0]:
#idenetity block
class IdentityBlock(nn.Module):
    def __init__(self,f,in_channels,filters):
        super(IdentityBlock,self).__init__()
        '''
        Args:
        
        f -- integer specifying the shape of window
        in_channels : number of channels of input to this identity block
        filters - list of integers (len=3 ) count of feature maps to be produced
        
        Returns 
        X -- output of the identity block,tensor of shape (n_H,n_W,n_C)
        '''
        self.in_channels = in_channels
        self.kernel_size = f
        self.F1 = filters[0]
        self.F2 = filters[1]
        self.F3 = filters[2]
        #first component 
        self.conv1 = self.conv_layer(in_channels=self.in_channels,out_channels=self.F1,kernel_size=(1,1),padding=0)
        self.bn1 = nn.BatchNorm2d(self.F1)
        #second component
        self.conv2 = self.conv_layer(in_channels=self.F1,out_channels=self.F2,kernel_size=(self.kernel_size,self.kernel_size),padding="same")
        self.bn2 = nn.BatchNorm2d(self.F2)
        #third component
        self.conv3 = self.conv_layer(in_channels=self.F2,out_channels=self.F3,kernel_size=(1,1),padding=0)
        self.bn3 = nn.BatchNorm2d(self.F3)

        
    def conv_layer(self,in_channels,out_channels,kernel_size,padding=0,stride=1):
        '''
        Args:
           in_channels : the number of input channel from the  input tensor
           out_channels : the number of output channels of the feature map 
           kernel_size  : filter size 
           padding      : that takes two values [same ,0]"[default : 0]
           stride       : the stride length [default is zero]

        Output:
            convolution layer
        '''
  
        if padding=="same":
            padding = int((kernel_size[0]-1)/2)
        return nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding)
    
    def forward(self,x):
        x_shortcut = x
      #  print("first",x_shortcut.shape,x.shape)
      #  x = self.conv_layer(in_channels=x.shape[1] , out_channels=self.F1,kernel_size=(1,1),padding=0)(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
       # print("after 1",x.shape)
        x = self.conv2(x)
        x = self.bn2(x)
        x =  F.relu(x)
        #print("after 2",x.shape)
        x = self.conv3(x)
        x = self.bn3(x)
        #print("after 3",x.shape)
        #print(x_shortcut.shape)
        x = x + x_shortcut
        x = F.relu(x)
        
        return x
        

In [0]:
#convolutional block
class ConvolutionalBlock(nn.Module):
    '''
    Args:
    f           : the size of filter that will be used in the intermediate layers of this convolutional block
    in_channels : the number of input channels from the input tensor
    filters      :  list of integers (len=3 ) count of feature maps to be produced
    stride      : the length of stride
    
    
    '''
    def __init__(self,f,in_channels,filters,stride=2):
        super(ConvolutionalBlock,self).__init__()
        self.in_channels = in_channels
        self.kernel_size = f
        self.F1 = filters[0]
        self.F2 = filters[1]
        self.F3 = filters[2]
        self.stride = stride
        #first component
        self.conv1 = self.conv_layer(self.in_channels,self.F1,(1,1),padding=0,stride=self.stride)
        self.bn1 = nn.BatchNorm2d(self.F1)
        
        #second component
        self.conv2 = self.conv_layer(self.F1,self.F2,(f,f),padding="same")
        self.bn2 = nn.BatchNorm2d(self.F2)
        
        #third componenet
        self.conv3 = self.conv_layer(self.F2,self.F3,(1,1))
        self.bn3 = nn.BatchNorm2d(self.F3)
        
        #shortcut component
        self.sconv = self.conv_layer(self.in_channels,self.F3,(1,1),padding=0,stride=self.stride)
        self.sbn = nn.BatchNorm2d(self.F3)
        
    def forward(self,x):
        
        x_shortcut = x
#         print(x_shortcut.shape)
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        
        #
        #print(x_shortcut.shape)
        x_shortcut = self.sconv(x_shortcut)

        x_shortcut = self.sbn(x_shortcut)
        
        x = x+x_shortcut
        x = F.relu(x)
        return x
        
        
    def conv_layer(self,in_channels,out_channels,kernel_size,padding=0,stride=1):
        '''
        Args:
           in_channels : the number of input channel from the  input tensor
           out_channels : the number of output channels of the feature map 
           kernel_size  : filter size 
           padding      : that takes two values [same ,0]"[default : 0]
           stride       : the stride length [default is zero]
        
        Output:
            Convolutional layer
        '''
        if padding=="same":
            padding = int((kernel_size[0]-1)/2)
        return nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding,stride)
    
        

In [0]:
class ResNet50(nn.Module):
    
    def __init__(self,in_channels,identity_block,convolutional_block):
        super(ResNet50,self).__init__()
        
        self.in_channels = in_channels
        self.convolutional_block = convolutional_block
        self.identity_block = identity_block
        
        #stage1
        self.stage1_conv = self.conv_layer(self.in_channels,64,(7,7),padding=0,stride=2)
        self.stage1_bn = nn.BatchNorm2d(64)
        self.stage1_maxpool = nn.MaxPool2d(kernel_size=(3,3),stride=(2,2))
  
        #stage2
        self.stage2_convblock = self.convolutional_block(3,64,[64,64,256],1)
        self.stage2_identity_block1 = self.identity_block(3,256,[64,64,256])
        # self.stage2_identity_block2 = self.identity_block(3,256,[64,64,256])
        
        #stage3
        self.stage3_convblock = self.convolutional_block(3,256,[128,128,512])
        self.stage3_identity_block1 = self.identity_block(3,512,[128,128,512])
        # self.stage3_identity_block2 = self.identity_block(3,512,[128,128,512])
        # self.stage3_identity_block3 = self.identity_block(3,512,[128,128,512])
        
        # #stage 4
        self.stage4_convblock = self.convolutional_block(3,512,[128,128,1024])
        self.stage4_identity_block1 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block2 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block3 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block4 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block5 = self.identity_block(3,1024,[128,128,1024])
        
        # #stage5
        # self.stage5_convblock = self.convolutional_block(3,1024,[512,512,2048])
        # self.stage5_identity_block1 = self.identity_block(3,2048,[512,512,2048])
        # self.stage5_identity_block2 = self.identity_block(3,2048,[512,512,2048])
        #pooling
        # self.average_pool = nn.AvgPool2d((2,2))
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(1024,1)
        
    def forward(self,x):
        #stage1
        x = self.stage1_conv(x)
        x = self.stage1_bn(x)
        x = F.relu(x)
        x = self.stage1_maxpool(x)
        
        #stage2
        x = self.stage2_convblock(x)
        x = self.stage2_identity_block1(x)
        # x = self.stage2_identity_block2(x)
        
        #stage3
        x = self.stage3_convblock(x)
        x = self.stage3_identity_block1(x)
        # x = self.stage3_identity_block2(x)
        # x = self.stage3_identity_block3(x)
        
        # #stage4
        x = self.stage4_convblock(x)
        x = self.stage4_identity_block1(x)
        # x = self.stage4_identity_block2(x)
        # x = self.stage4_identity_block3(x)
        # x = self.stage4_identity_block4(x)
        # x = self.stage4_identity_block5(x)
        
        # #stage5
        # x = self.stage5_convblock(x)
        # x = self.stage5_identity_block1(x)
        # x = self.stage5_identity_block2(x)
        
        #pooling
        # x = self.average_pool(x)
        x = self.gap(x)
        print(x.shape)
        x = x.squeeze()
        if len(x.shape)==1:
            print("single element")
            x = x.unsqueeze(0)

        #fully connected 
        x = self.fc(x)
        print(x.shape)
        
        
    
        

    def conv_layer(self,in_channels,out_channels,kernel_size,padding=0,stride=1):
        if padding=="same":
            padding = int((kernel_size[0]-1)/2)
        return nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding)

In [0]:
class VRNet(nn.Module):
    
    def __init__(self,in_channels,identity_block,convolutional_block):
        super(VRNet,self).__init__()
        
        self.in_channels = in_channels
        self.convolutional_block = convolutional_block
        self.identity_block = identity_block
        
        #stage1
        self.stage1_conv = self.conv_layer(self.in_channels,64,(7,7),padding=0,stride=2)
        self.stage1_bn = nn.BatchNorm2d(64)
        self.stage1_maxpool = nn.MaxPool2d(kernel_size=(3,3),stride=(2,2))
        
        self.stage1_interconv3x3 = self.conv_layer(64,64,(3,3),padding=0,stride = 2)
        self.stage1_interpool = nn.AdaptiveAvgPool2d(64)
        self.stage1_intercon1x1 = self.conv_layer(64,32,(1,1),padding=0,stride =2)
        
        #stage2
        self.stage2_convblock = self.convolutional_block(3,64,[64,64,256],1)
        self.stage2_identity_block1 = self.identity_block(3,256,[64,64,256])
        self.stage2_identity_block2 = self.identity_block(3,256,[64,64,256])
        
        self.stage2_interconv3x3 = self.conv_layer(256,64,(3,3),padding=0,stride = 2)
        self.stage2_interpool = nn.AdaptiveAvgPool2d(64)
        self.stage2_intercon1x1 = self.conv_layer(64,32,(1,1),padding=0,stride =2)

        #stage3
        self.stage3_convblock = self.convolutional_block(3,256,[128,128,512])
        self.stage3_identity_block1 = self.identity_block(3,512,[128,128,512])
        self.stage3_identity_block2 = self.identity_block(3,512,[128,128,512])
        # self.stage3_identity_block3 = self.identity_block(3,512,[128,128,512])
        
        self.stage3_interconv3x3 = self.conv_layer(512,128,(3,3),padding=0,stride = 2)
        self.stage3_interpool = nn.AdaptiveAvgPool2d(64)
        self.stage3_intercon1x1 = self.conv_layer(128,64,(1,1),padding=0,stride =2)
        
        # #stage 4
        # self.stage4_convblock = self.convolutional_block(3,512,[128,128,1024])
        # self.stage4_identity_block1 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block2 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block3 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block4 = self.identity_block(3,1024,[128,128,1024])
        # self.stage4_identity_block5 = self.identity_block(3,1024,[128,128,1024])
        
        # self.stage4_interconv3x3 = self.conv_layer(1024,128,(3,3),padding=0,stride = 0)
        # self.stage4_interpool = nn.AdaptiveAvgPool2d(64)
        # self.stage4_intercon1x1 = self.conv_layer(128,64,(1,1),padding=0,stride =0)
        
        # #stage5
        # self.stage5_convblock = self.convolutional_block(3,1024,[512,512,2048])
        # self.stage5_identity_block1 = self.identity_block(3,2048,[512,512,2048])
        # self.stage5_identity_block2 = self.identity_block(3,2048,[512,512,2048])
        
        # self.stage5_interconv3x3 = self.conv_layer(2048,128,(3,3),padding=0,stride = 0)
        # self.stage5_interpool = nn.AdaptiveAvgPool2d(64)
        # self.stage5_intercon1x1 = self.conv_layer(128,64,(1,1),padding=0,stride =0)
        
        #final conv
        self.final_conv1x1 = self.conv_layer((32+32+64),64,(1,1),padding=0,stride=2)
        self.final_conv3x3 = self.conv_layer(64,128,(3,3),padding=0,stride=2)
        self.final_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(128,1)
       
        
    def forward(self,x):
        #stage1
        x = self.stage1_conv(x)
        x = self.stage1_bn(x)
        x = F.relu(x)
        x = self.stage1_maxpool(x)
        stage1_inter = x
        #stage1 inter
        stage1_inter= self.stage1_interconv3x3(stage1_inter)
        stage1_inter = self.stage1_interpool(stage1_inter)
        stage1_inter = self.stage1_intercon1x1(stage1_inter)
        
        
        #stage2
        x = self.stage2_convblock(x)
        x = self.stage2_identity_block1(x)
        x = self.stage2_identity_block2(x)
        stage2_inter = x
        stage2_inter= self.stage2_interconv3x3(stage2_inter)
        stage2_inter = self.stage2_interpool(stage2_inter)
        stage2_inter = self.stage2_intercon1x1(stage2_inter)
        
        
        # stage3
        x = self.stage3_convblock(x)
        x = self.stage3_identity_block1(x)
        x = self.stage3_identity_block2(x)
        # x = self.stage3_identity_block3(x)
        #stage3 inter
        # stage3_inter = x
        x = self.stage3_interconv3x3(x)
        x = self.stage3_interpool(x)
        x = self.stage3_intercon1x1(x)
        
        # #stage4
        # x = self.stage4_convblock(x)
        # x = self.stage4_identity_block1(x)
        # x = self.stage4_identity_block2(x)
        # x = self.stage4_identity_block3(x)
        # x = self.stage4_identity_block4(x)
        # x = self.stage4_identity_block5(x)
        # #stage4
        # stage4_inter = x
        # stage4_inter= self.stage4_interconv3x3(stage4_inter)
        # stage4_inter = self.stage4_interpool(stage4_inter)
        # stage4_inter = self.stage4_intercon1x1(stage4_inter)
        
        # #stage5
        # x = self.stage5_convblock(x)
        # x = self.stage5_identity_block1(x)
        # x = self.stage5_identity_block2(x)
        # #stage5 inter
        # stage5_inter = x
        # stage5_inter= self.stage5_interconv3x3(stage5_inter)
        # stage5_inter = self.stage5_interpool(stage5_inter)
        # stage5_inter = self.stage5_intercon1x1(stage5_inter)
        
        #concatenate all feature maps
        # concatenated_maps = torch.cat((stage1_inter,stage2_inter,stage3_inter,stage4_inter,stage5_inter),1)
        concatenated_maps = torch.cat((stage1_inter,stage2_inter,x),1)
        #final stage
        concatenad_maps = self.final_conv1x1(concatenated_maps)
        concatenad_maps = self.final_conv3x3(concatenad_maps)
        concatenad_maps = self.final_pool(concatenad_maps)
        concatenad_maps = concatenad_maps.squeeze()
        if len(concatenad_maps.shape)==1:
            concatenad_maps = concatenad_maps.unsqueeze(0)
            
        output = self.fc(concatenad_maps)
        return output
       
        
        
    
        

    def conv_layer(self,in_channels,out_channels,kernel_size,padding=0,stride=1):
        if padding=="same":
            padding = int((kernel_size[0]-1)/2)
        return nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding)

In [0]:
resnet =VRNet(3,IdentityBlock,ConvolutionalBlock).to(device).double()

In [0]:
x = torch.rand([32,3,22,22]).to(device).double()

In [0]:

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr = 0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [0]:
best_model,model,running_loss_history,val_running_loss_history = train_model(best_model,criterion,optimizer,exp_lr_scheduler,1)
# torch.save(best_model.state_dict(),"/content/drive/My Drive/image memorability/saved models/Vnet_weights(2).pt")


VRNet(
  (stage1_conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2))
  (stage1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (stage1_maxpool): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (stage1_interconv3x3): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))
  (stage1_interpool): AdaptiveAvgPool2d(output_size=64)
  (stage1_intercon1x1): Conv2d(64, 32, kernel_size=(1, 1), stride=(2, 2))
  (stage2_convblock): ConvolutionalBlock(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [0]:
x = torch.rand([128,3,224,224]).to(device).double()

In [0]:
a =resnet(x)

torch.Size([128, 1])


In [0]:
#lstm with cnn 