In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import os
import random
import PIL.Image as Image
import numpy as np
import cv2
import time
import math
import sys
from torch.autograd import Variable
from functools import partial
from __future__ import print_function
from torch.utils.data import DataLoader
from logging import Logger

In [None]:
def conv_S(in_planes,out_planes,stride=1,padding=1):
    # as is descriped, conv S is 1x3x3
    return nn.Conv3d(in_planes,out_planes,kernel_size=(1,3,3),stride=1,
                     padding=padding,bias=False)

def conv_T(in_planes,out_planes,stride=1,padding=1):
    # conv T is 3x1x1
    return nn.Conv3d(in_planes,out_planes,kernel_size=(3,1,1),stride=1,
                     padding=padding,bias=False)

In [None]:
def downsample_basic_block(x, planes, stride):
    out = F.avg_pool3d(x, kernel_size=1, stride=stride)
    zero_pads = torch.Tensor(out.size(0), planes - out.size(1),
                             out.size(2), out.size(3),
                             out.size(4)).zero_()
    if isinstance(out.data, torch.cuda.FloatTensor):
        zero_pads = zero_pads.cuda()

    out = Variable(torch.cat([out.data, zero_pads], dim=1))

    return out

In [None]:
class Bottleneck(nn.Module):
    expansion=4
    def __init__(self,inplanes,planes,stride=1,downsample=None,n_s=0,depth_3d=47,ST_struc=('A','B','C')):
        super(Bottleneck,self).__init__()
        self.downsample=downsample
        self.depth_3d=depth_3d
        self.ST_struc=ST_struc
        self.len_ST=len(self.ST_struc)
        stride_p=stride
        if not self.downsample == None:
            stride_p=(1,2,2)
        if n_s<self.depth_3d:
            if n_s==0:
                stride_p=1
            self.conv1=nn.Conv3d(inplanes,planes,kernel_size=1,bias=False,stride=stride_p)
            self.bn1=nn.BatchNorm3d(planes)
        else:
            if n_s==self.depth_3d:
                stride_p=2
            else:
                stride_p=1
            self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,bias=False,stride=stride_p)
            self.bn1=nn.BatchNorm2d(planes)
        self.id=n_s
        self.ST=list(self.ST_struc)[self.id % self.len_ST]
        if self.id<self.depth_3d:
            self.conv2=conv_S(planes,planes,stride=1,padding=(0,1,1))
            self.bn2=nn.BatchNorm3d(planes)
            self.conv3=conv_T(planes,planes,stride=1,padding=(1,0,0))
            self.bn3=nn.BatchNorm3d(planes)
        else:
            self.conv_normal=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
            self.bn_normal=nn.BatchNorm2d(planes)
        
        if n_s<self.depth_3d:
            self.conv4=nn.Conv3d(planes,planes*4,kernel_size=1,bias=False)
            self.bn4=nn.BatchNorm3d(planes *4 )
        else:
            self.conv4=nn.Conv2d(planes,planes*4,kernel_size=1,bias=False)
            self.bn4=nn.BatchNorm2d(planes *4 )
        self.relu=nn.ReLU(inplace=True)
        self.stride=stride
    
    def ST_A(self,x):
        x=self.conv2(x)
        x=self.bn2(x)
        x=self.relu(x)
        x=self.conv3(x)
        x=self.bn3(x)
        x=self.relu(x)
        return x
    def ST_B(self,x):
        tmp_x=self.conv2(x)
        tmp_x=self.bn2(tmp_x)
        tmp_x=self.relu(tmp_x)
        x=self.conv3(x)
        x=self.bn3(x)
        x=self.relu(x)
        return x+tmp_x
    def ST_C(self,x):
        x=self.conv2(x)
        x=self.bn2(x)
        x=self.relu(x)
        tmp_x=self.conv3(x)
        tmp_x=self.bn3(tmp_x)
        tmp_x=self.relu(tmp_x)
        return x+tmp_x
    def forward(self,x):
        residual=x
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        
        if self.id<self.depth_3d:
            if self.ST=='A':
                out=self.ST_A(out)
            elif self.ST=='B':
                out=self.ST_B(out)
            elif self.ST=='C':
                out=self.ST_C(out)
        else:
            out=self.conv_normal(out)
            out=self.bn_normal(out)
            out=self.relu(out)
        out=self.conv4(out)
        out=self.bn4(out)
        if self.downsample is not None:
            residual=self.downsample(x)
        out+=residual
        out=self.relu(out)
        
        return out

In [None]:
class P3D(nn.Module):

    def __init__(self, block, layers, modality='RGB',
        shortcut_type='B', num_classes=3,dropout=0.5,ST_struc=('A','B','C')):
        print('num_classes=',num_classes)
        print('dropout=',dropout)
        self.inplanes = 64
        super(P3D, self).__init__()
        # self.conv1 = nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2),
        #                        padding=(3, 3, 3), bias=False)
        self.input_channel = 3 if modality=='RGB' else 2  # 2 is for flow 
        self.ST_struc=ST_struc

        self.conv1_custom = nn.Conv3d(self.input_channel, 64, kernel_size=(1,7,7), stride=(1,2,2),
                                padding=(0,3,3), bias=False)

        self.depth_3d=sum(layers[:3])# C3D layers are only (res2,res3,res4),  res5 is C2D

        self.bn1 = nn.BatchNorm3d(64) # bn1 is followed by conv1
        self.cnt=0
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(2, 3, 3), stride=2, padding=(0,1,1))       # pooling layer for conv1.
        self.maxpool_2 = nn.MaxPool3d(kernel_size=(2,1,1),padding=0,stride=(2,1,1))   # pooling layer for res2, 3, 4.

        self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
        self.layer2 = self._make_layer(block, 128, layers[1], shortcut_type, stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], shortcut_type, stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], shortcut_type, stride=2)

        self.avgpool = nn.AvgPool2d(kernel_size=(5, 5), stride=1)                              # pooling layer for res5.
        self.dropout=nn.Dropout(p=dropout)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        # some private attribute
#         self.input_size=(self.input_channel,16,160,160)       # input of the network
#         self.input_mean = [0.485, 0.456, 0.406] if modality=='RGB' else [0.5]
#         self.input_std = [0.229, 0.224, 0.225] if modality=='RGB' else [np.mean([0.229, 0.224, 0.225])]


    @property
    def scale_size(self):
        return self.input_size[2] * 256 // 160   # asume that raw images are resized (340,256).

    @property
    def temporal_length(self):
        return self.input_size[1]

    @property
    def crop_size(self):
        return self.input_size[2]

    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
        downsample = None
        stride_p=stride #especially for downsample branch.

        if self.cnt<self.depth_3d:
            if self.cnt==0:
                stride_p=1
            else:
                stride_p=(1,2,2)
            if stride != 1 or self.inplanes != planes * block.expansion:
                if shortcut_type == 'A':
                    downsample = partial(downsample_basic_block,
                                         planes=planes * block.expansion,
                                         stride=stride)
                else:
                    
                    downsample = nn.Sequential(
                        nn.Conv3d(self.inplanes, planes * block.expansion,
                                  kernel_size=1, stride=stride_p, bias=False),
                        nn.BatchNorm3d(planes * block.expansion)
                    )

        else:
            if stride != 1 or self.inplanes != planes * block.expansion:
                if shortcut_type == 'A':
                    downsample = partial(downsample_basic_block,
                                         planes=planes * block.expansion,
                                         stride=stride)
                else:
                    
                    downsample = nn.Sequential(
                        nn.Conv2d(self.inplanes, planes * block.expansion,
                                  kernel_size=1, stride=2, bias=False),
                        nn.BatchNorm2d(planes * block.expansion)
                    )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample,n_s=self.cnt,depth_3d=self.depth_3d,ST_struc=self.ST_struc))
        self.cnt+=1
        
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes,n_s=self.cnt,depth_3d=self.depth_3d,ST_struc=self.ST_struc))
            self.cnt+=1

        return nn.Sequential(*layers)

    def forward(self, x):

        x = self.conv1_custom(x) #-1

        x = self.bn1(x) #-1

        x = self.relu(x) #-1

        x = self.maxpool(x) #-1

        
        x = self.maxpool_2(self.layer1(x))  #   -1  -            Part Res2

        
        x = self.maxpool_2(self.layer2(x))  #   -1               Part Res3

        
        x = self.maxpool_2(self.layer3(x))  #      -1              Part Res4


        sizes=x.size()
        
       
        x = x.view(-1,sizes[1],sizes[3],sizes[4])  #  Part Res5
        
        x = self.layer4(x)

        x = self.avgpool(x)

        x = x.view(-1,self.fc.in_features)

        x = self.fc(self.dropout(x))

        return x


In [None]:
def get_optim_policies(model=None,modality='RGB',enable_pbn=True):
    first_conv_weight=[]
    first_conv_bias=[]
    normal_weight=[]
    normal_bias=[]
    bn=[]
    if model==None:
        log.l.info('no model')
        exit()
    conv_cnt=0
    bn_cnt=0
    for m in model.modules():
        if isinstance(m,torch.nn.Conv3d) or isinstance(m,torch.nn.Conv2d):
            ps=list(m.parameters())
            conv_cnt+=1
            if conv_cnt==1:
                first_conv_weight.append(ps[0])
                if len(ps)==2:
                    first_conv_bias.append(ps[1])
            else:
                normal_weight.append(ps[0])
                if len(ps)==2:
                    normal_bias.append(ps[1])
        elif isinstance(m,torch.nn.Linear):
            ps=list(m.parameters())
            normal_weight.append(ps[0])
            if len(ps)==2:
                normal_bias.append(ps[1])
        elif isinstance(m,torch.nn.BatchNorm3d):
            bn_cnt+=1
            if not enable_pbn or bn_cnt==1:
                bn.extend(list(m.parameters()))
        elif isinstance(m,torch.nn.BatchNorm2d):
            bn.extend(list(m.parameters()))
        elif len(m._modules)==0:
            if len(list(m.parameters()))>0:
                raise ValueError("{}. Need to give it a learning policy".format(type(m)))
    slow_rate=0.7
    n_fore=int(len(normal_weight)*slow_rate)
    slow_feat=normal_weight[:n_fore]
    slow_bias=normal_bias[:n_fore]
    normal_feat=normal_weight[n_fore:]
    normal_bias=normal_bias[n_fore:]
    return [
        {'params': first_conv_weight, 'lr_mult': 5 if modality == 'Flow' else 1, 'decay_mult': 1,
         'name': "first_conv_weight"},
        {'params': first_conv_bias, 'lr_mult': 10 if modality == 'Flow' else 2, 'decay_mult': 0,
         'name': "first_conv_bias"},
        {'params': slow_feat, 'lr_mult': 1, 'decay_mult': 1,
         'name': "slow_feat"},
        {'params': slow_bias, 'lr_mult': 2, 'decay_mult': 0,
         'name': "slow_bias"},
        {'params': normal_feat, 'lr_mult': 1 , 'decay_mult': 1,
         'name': "normal_feat"},
        {'params': normal_bias, 'lr_mult': 2, 'decay_mult':0,
         'name': "normal_bias"},
        {'params': bn, 'lr_mult': 1, 'decay_mult': 0,
         'name': "BN scale/shift"},
    ]

In [None]:
def P3D199(pretrained=False,modality='RGB',**kwargs):
    """construct a P3D199 model based on a ResNet-152-3D model.
    """
    model = P3D(Bottleneck, [3, 8, 36, 3], modality=modality,**kwargs)
    
    if pretrained==True:
        if modality=='RGB':
            pretrained_file='p3d_rgb_199.checkpoint.pth.tar'
        elif modality=='Flow':
            pretrained_file='p3d_flow_199.checkpoint.pth.tar'
        weights=torch.load(pretrained_file)['state_dict']
        model.load_state_dict(weights)
    return model

In [None]:
model = P3D199(pretrained=False,num_classes=3,dropout=0.5)
model=model.cuda()

In [None]:
#utils function for preparing our datasets.
def get_frames_data(filename,num_frames_per_clip=16):
    ret_arr=[]
    s_index=0
    for parent, dirnames,filenames in os.walk(filename):
        if(len(filenames)<num_frames_per_clip):
            print("Get invaild data!")
            return [],s_index
        filenames=sorted(filenames)
        s_index=random.randint(0,len(filenames)-num_frames_per_clip)
        for i in range(s_index,s_index+num_frames_per_clip):
            image_name=str(filename)+'/'+str(filenames[i])
            img=Image.open(image_name)
            img_data=np.array(img)
            ret_arr.append(img_data)
    return ret_arr,s_index

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00000015, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.0005)

logger = Logger('./logs')

In [None]:
def get_ordered_data(filename,index,num_frames_per_clip):
    crop_size=160
    lines=open(filename,'r')
    lines=list(lines)
    line=lines[index].strip('\n').split()
    dirname=line[0]
    label=line[1]
    
    tmp_data,_=get_frames_data(dirname,num_frames_per_clip)
    img_datas=[]
    if(len(tmp_data)!=0):
        for j in xrange(len(tmp_data)):
            img=Image.fromarray(tmp_data[j].astype(np.uint8))   
            if(img.width>img.height):
                scale=float(crop_size)/float(img.height)
                img=np.array(cv2.resize(np.array(img),(int(img.width*scale+1),crop_size))).astype(np.float32)      
            else:                    
                scale=float(crop_size)/float(img.width)
                img=np.array(cv2.resize(np.array(img),(int(crop_size),img.height*scale+1))).astype(np.float32)
            crop_x=int((img.shape[0]-crop_size)/2)
            crop_y=int((img.shape[1]-crop_size)/2)
            img=img[crop_x:crop_x+crop_size,crop_y:crop_y+crop_size,:] #-np_mean[j] 
            img_datas.append(img)
            
    return np.array(img_datas).astype(np.float32),np.array(label).astype(np.int64),dirname

In [None]:
#This class must be needed for preparing our own dataset using the interface of Pytorch(inherit from torch.utils.data.Dataset)
class Xdata(torch.utils.data.Dataset):
    def __init__(self,filename,start_pos=-1,num_frames_per_clip=16,crop_size=160):
        lines=open(filename,'r')
        read_dirnames=[]
        data=[]
        label=[]
        batch_index=0
        next_batch_start=-1
        lines=list(lines)
        self.len=len(lines)
        self.datalist=lines
        self.num_frames_per_clip=num_frames_per_clip
        self.crop_size=crop_size
        
    def __getitem__(self,index):
        crop_size=self.crop_size
        
#         index=index % self.len
        line=self.datalist[index].strip('\n').split()
        dirname=line[0]
        tmp_label=line[1]
#         use_label=[0] * 10
#         use_label[int(tmp_label)]=1
        tmp_data,_=get_frames_data(dirname,self.num_frames_per_clip)
        img_datas=[]
        if(len(tmp_data)!=0):
            for j in xrange(len(tmp_data)):
                img=Image.fromarray(tmp_data[j].astype(np.uint8))
                
                
                if(img.width>img.height):
                    scale=float(crop_size)/float(img.height)
                    img=np.array(cv2.resize(np.array(img),(int(img.width*scale+1),crop_size))).astype(np.float32)
                    
                else:
                    scale=float(crop_size)/float(img.width)
                    img=np.array(cv2.resize(np.array(img),(crop_size,int(img.height*scale+1)))).astype(np.float32)
                crop_x=int((img.shape[0]-crop_size)/2)
                crop_y=int((img.shape[1]-crop_size)/2)
                img=img[crop_x:crop_x+crop_size,crop_y:crop_y+crop_size,:] #-np_mean[j]
                
                img_datas.append(img)
#             data.append(img_datas)
#             label.append(int(tmp_label))
#             batch_index=batch_index+1
#             read_dirnames.append(dirname)
        np_arr_data=np.array(img_datas).astype(np.float32)
        np_arr_label=np.array(tmp_label).astype(np.int64)
        return np_arr_data,np_arr_label,dirname
    def __len__(self):
        return self.len

In [None]:
#Cell 13
batch_size=7
train_data=Xdata(filename='train.list',
                 num_frames_per_clip=16,
                crop_size=160)
test_data=Xdata(filename='res.list',
                num_frames_per_clip=16,
                crop_size=160)
#train_loader can be iterated.
train_loader=DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last=True)
test_loader=DataLoader(test_data,batch_size=batch_size,shuffle=True,drop_last=True)

batch_iterator = iter(train_loader)
test_iterator=iter(test_loader)

In [None]:
#Cell 14 :
model.load_state_dict(torch.load('checkpoint_ucf_iter_9600.pth'))
model.train()

In [None]:
#Cell 15 :the cell for training the model:
torch.cuda.empty_cache()
epoch=0
m=sys.maxsize
lr=0
for iteration in xrange(9601,13000):
    epoch+=1
    loss=0
    #adjust learning rate
    if(iteration % 2400==0):
        lr=optimizer.param_groups[0]['lr']*0.2
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    #Caution:Pytorch DOES NOT generate the repeat data!
    try:
        images,labels,_=next(batch_iterator)
    except StopIteration:
        batch_iterator = iter(train_loader)
        images,labels,_=next(batch_iterator)
        print("dataloader reloaded.")
    
    
    l_true=labels
    #shape from(batch_size,clip_size,height,width,channel) -> (batch_size,channel,clip_size,height,width)
    images=images.permute(0,4,1,2,3)
    images = Variable(images.cuda())
    labels = Variable(labels.cuda())
    t0 = time.time()
    predict=model(images)
    #calculate the accuracy:
    l_predict=predict.cpu().detach().numpy()
    
    k=np.argmax(l_predict,axis=-1)
    sub=1*np.equal(k,l_true)
    acc=np.mean(sub.cpu().detach().numpy())
    
    optimizer.zero_grad()
    loss= criterion(predict, labels)
    
    loss.backward()
    optimizer.step()
    t1 = time.time()
    if iteration % 10 == 0:
        print('timer: %.4f sec.' % (t1 - t0))
        print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss), end=' ')
        print(' -> accuracy: %.2f.' % (acc))
    if iteration % 800 ==0:
        torch.save(model.state_dict(),'/home/gez/p3d/' + 'x_checkpoint_ucf_iter_{}'.format(iteration) + '.pth')
        print('current lr={}'.format(lr))
torch.save(model.state_dict(),'/home/gez/p3d/' + 'x_checkpoint_ucf_done_finetune' + '.pth')

In [None]:
#Cells below are provided for evaling model,run Cell 1-12 and Cells below.

In [None]:
batch_size=7
train_data=Xdata(filename='train.list',   
                      num_frames_per_clip=16,
                      crop_size=160,
                      )
test_data=Xdata(filename='res.list',
                      num_frames_per_clip=16,
                      crop_size=160,
                      )
#train_loader can be iterated.
train_loader=DataLoader(train_data,batch_size=batch_size,shuffle=False,drop_last=True)
test_loader=DataLoader(test_data,batch_size=batch_size,shuffle=False,drop_last=True)

batch_iterator = iter(train_loader)
test_iterator=iter(test_loader)

In [None]:
model.eval()

In [None]:
model.load_state_dict(torch.load('x_checkpoint_ucf_done_finetune.pth'))

In [None]:
#The cell for validating the model:
acc=0
c=0
torch.cuda.empty_cache()

l=len(open('train.list','r').readlines())

print(l/batch_size)

for i in range(l/batch_size):
    try:
        images,labels,k=next(batch_iterator)
    except StopIteration:
        break
    else:
        l_true=labels
        
        images=images.permute(0,4,1,2,3)
        images=Variable(images.cuda())
        labels = Variable(labels.cuda())
        predict=model(images)
        
        l_predict=predict.cpu().detach().numpy()
        q=np.argmax(l_predict,axis=-1)
        
        sub=np.equal(l_true,q)
        acc_one=np.mean(sub.cpu().detach().numpy())
        
        acc=acc+acc_one
        c=c+1
        print('acc=',acc_one)
acc=acc/c
print('accuracy on test data is %.2f .' % acc)

In [None]:
#The cell for validating the model:
acc=0
c=0
torch.cuda.empty_cache()
l=len(open('res.list','r').readlines())
print(l/batch_size)


for i in range(l/batch_size):
    try:
        images,labels,k=next(test_iterator)
    except StopIteration:
        print('Data has been all fetched out')
        break
    else:
        l_true=labels
        images=images.permute(0,4,1,2,3)
        images=Variable(images.cuda())
        
        predict=model(images)
        
        l_predict=predict.cpu().detach().numpy()
        q=np.argmax(l_predict,axis=-1)
        
        sub=np.equal(l_true,q)
        acc_one=np.mean(sub.cpu().detach().numpy())
        
        acc=acc+acc_one
        c=c+1
        print('acc=',acc_one)
acc=acc/c
print('accuracy on test data is %.2f .' % acc)