In [1]:
from __future__ import print_function, division
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from itertools import chain
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from IPython.core.debugger import set_trace
import itertools
import seaborn as sns
from tqdm import tqdm
import random
import cv2
from natsort import natsorted
import collections
from IPython import display
import pylab as pl
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics.regression import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score
import numpy as np
from torch import nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF


from skorch import NeuralNetRegressor
from skorch.helper import predefined_split
from sklearn.metrics.regression import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score
from skorch import callbacks
from sklearn.model_selection import GridSearchCV

import c3d_wrapper
import tensorflow as tf

In [2]:
BATCH_SIZE = 1
MODEL_PATH = '/data/GaitData/pretrained/C3D/conv3d_deepnetA_sport1m_iter_1900000_TF.model'
MEAN_FILE = 'train01_16_128_171_mean.npy'
FRAME_HOME = "/data/GaitData/CroppedFrameArrays"
FRAMES_PER_CLIP = 16
FRAME_MAXLEN=300
FEATS_MAXLEN=20
target_columns = pd.read_pickle("../preprocess/data/targets_dataframe.pkl").columns.values[:-2]

In [3]:
class TF_Model:
    def __init__(self, batch_size=BATCH_SIZE, model_path=MODEL_PATH, mean_file=MEAN_FILE):
        # define graph
        net = c3d_wrapper.C3DNet(
            pretrained_model_path=model_path, trainable=False,
            batch_size=batch_size)

        self.tf_video_clip = tf.placeholder(tf.float32,
                                       [batch_size, None, 112, 112, 3],
                                       name='tf_video_clip')  # (batch,num_frames,112,112,3)
        self.tf_output = net(inputs=self.tf_video_clip)
        
        self.mean_val = np.load(mean_file).transpose(1,2,3,0)

            
        # create session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
    
    def run(self, X):
        return self.sess.run(self.tf_output, feed_dict={self.tf_video_clip: [X]})

In [4]:
model = TF_Model()

initialize with pretrained weight file...
Done!


In [None]:
def pid2vid(pid):
    num, test_id, trial_id = pid.split('_')
    return '_'.join([num, 'test', test_id, 'trial', trial_id])
    

def vid2pid(vid):
    split = vid.split('_')
    return '_'.join([split[0], split[2], split[4]])

In [None]:
class GAITDataset(Dataset):
    def __init__(self,
                 X, y, scaler, feature_extraction_model,
                 frame_home=FRAME_HOME, frames_per_clip=FRAMES_PER_CLIP, 
                 frame_maxlen=FRAME_MAXLEN, feats_maxlen=FEATS_MAXLEN):
        
        self.frame_home = frame_home
        
        self.X = X
        self.y = y
        self.vids = [ pid2vid(pid) for pid in self.y.index ]
        
        self.feature_extraction_model = feature_extraction_model  # tf model
        
        self.frames_per_clip = frames_per_clip
        self.frame_maxlen = frame_maxlen
        
        # frame_maxlen=300 -> maxlen/frames_per_clip = 18.75 => set feats_maxlen as 20!
        self.feats_maxlen = feats_maxlen
        
        
        if scaler:
            scaled_values = scaler.transform(y)
            self.y.loc[:,:] = scaled_values
            
    
        
    def extract_features(self, stacked_arr):

        def preprocess_clip(clip):
            vid = []
            for img in clip:
                vid.append(cv2.resize(img, (171,128)))
            
            vid = np.array(vid)
            
            leng = len(vid)
            
            vid = vid - self.feature_extraction_model.mean_val[:leng]
            vid = vid[:, 8:120, 30:142, :]

            return vid

        res = []
                
        while True:
            clip = stacked_arr[:self.frames_per_clip]
            if len(clip) == 0: break
            
            clip = preprocess_clip(clip)
            
            
            # (D, H, W, C) -> (C, D, H, W)
            feature = self.feature_extraction_model.run(clip)[0].transpose(3,0,1,2)
            res.append(feature)
            
            # move to next slice !
            stacked_arr = stacked_arr[self.frames_per_clip:]
        
        res = np.concatenate(res, axis=1)
        
        # zero padding for feature sequence
        res = np.pad(res, ((0,0),(0,self.feats_maxlen-res.shape[1]),(0,0),(0,0)), 'constant')
        
        return res

            
    def __len__(self):
        return len(self.vids)
    
    def __getitem__(self, idx):
        
        vid = self.vids[idx]
        positions = [ eval(val) for val in self.X.loc[self.X.vids==vid].pos.values ]
        
        stacked_arr = np.load(os.path.join(self.frame_home, vid) + '.npy')
        
        feats = self.extract_features(stacked_arr)
        
        frames = []        

        for cropped in stacked_arr:  
            pic = cv2.resize(cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY), (64,64))[:,:,None]
            
            pic = TF.to_tensor(pic) # scale to [0.0, 1.0]
            pic = TF.normalize(pic, (0.5,), (0.5,)).permute(1,2,0).numpy()   # scale to [-1.0, 1.0]
            frames.append(pic)
            
        targets = self.y.loc[vid2pid(vid)].values

        # zero padding
        frames = np.pad(frames, ((0,self.frame_maxlen-len(frames)),(0,0),(0,0),(0,0)),
                                               'constant', constant_values=0).transpose(3,0,1,2)
        
        return torch.tensor(feats, dtype=torch.float32), torch.tensor(frames, dtype=torch.float32)

In [None]:
class Decoder(nn.Module):
    def __init__(self, num_filters = [512,256,128,64,1]):
        
        super(Decoder, self).__init__()
        
        self.decode = nn.Sequential(
            # b, 256, 39, 8, 8
            nn.ConvTranspose3d(num_filters[0], num_filters[1], 
                               kernel_size=(3,4,4), stride=2, 
                               padding=1),
            nn.BatchNorm3d(num_filters[1]), 
            nn.ReLU(True),
                        
            # b, 128, 75, 16, 16
            nn.ConvTranspose3d(num_filters[1], num_filters[2], 
                               kernel_size=(3,4,4), stride=2,
                               padding=(2,1,1)),
            nn.BatchNorm3d(num_filters[2]), 
            nn.ReLU(True),
            
            # b, 32, 150, 32, 32
            nn.ConvTranspose3d(num_filters[2], num_filters[3], kernel_size=4, stride=2, padding=1),
            nn.BatchNorm3d(num_filters[3]), 
            nn.ReLU(True),
            
            # b, 1, 300, 64, 64
            nn.ConvTranspose3d(num_filters[3], num_filters[4], kernel_size=4, stride=2, padding=1),
            nn.Tanh()            
        )

    def forward(self, x):
        '''
            x : size = (B, C, D, H, W)
        '''
        return self.decode(x)
    
    
class Reconstructor(nn.Module):
    def __init__(self):
        super(Reconstructor, self).__init__()
        self.decoder = Decoder()
    
            
    def forward(self, encoded):
        decoded = self.decoder(encoded)
        return decoded

In [None]:
from sklearn.model_selection import train_test_split

def filter_input_df_with_vids(df, vids):
    return df[df['vids'].isin(vids)]

def filter_target_df_with_vids(df, vids):
    target_ids = [ vid2pid(vid) for vid in vids ]
    return df.loc[target_ids]

def split_dataset_with_vids(input_df, target_df, vids, test_size=0.3, random_state=42):
    train_vids, test_vids = train_test_split(vids, test_size=test_size, random_state=random_state)

    train_X, train_y = filter_input_df_with_vids(input_df,train_vids), filter_target_df_with_vids(target_df,train_vids)
    test_X, test_y = filter_input_df_with_vids(input_df,test_vids), filter_target_df_with_vids(target_df, test_vids)
        
    return train_X, train_y, train_vids, test_X, test_y, test_vids

In [None]:
from skorch.callbacks import Callback
from torchvision.utils import save_image

def to_img(x):
    x = 0.5 * (x + 1)
    x = np.clip(x, 0.0, 1.0)
    x = 255*x
    return x.astype(np.uint8)

def to_tensor_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 64, 64)
    return x

class SaveResults(Callback):
    def __init__(self, path):
        self.path = path
        
    def on_epoch_end(self, net, **kwargs):
        for name in ['train', 'valid']:
            dataset = kwargs['dataset_'+name]
            rand_ix = np.random.randint(len(dataset))
            X,y = dataset[rand_ix]
            
            save_dir = os.path.join(self.path, name)
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            
            # target img
            y = y.numpy().transpose(1,2,3,0)  # (maxlen,h,w,3)
            
            # predicted img
            pred = net.predict(X[None,:])[0].transpose(1,2,3,0) # (maxlen,h,w,3)
            
            for sub_name,pic in zip(['target', 'pred'], [y,pred]):
                pic = to_tensor_img(torch.from_numpy(pic))
                save_image(pic, os.path.join(save_dir,sub_name+'.png'))

In [None]:
# dataset path
input_file = "../preprocess/data/person_detection_and_tracking_results_drop.pkl"
target_file = "../preprocess/data/targets_dataframe.pkl"

input_df = pd.read_pickle(input_file)
target_df = pd.read_pickle(target_file)[target_columns]

possible_vids = list(set(input_df.vids))
train_X, train_y, train_vids, test_X, test_y, test_vids = split_dataset_with_vids(input_df, target_df, possible_vids, test_size=0.3, random_state=42)

# # target scaler
# scaler = StandardScaler()
# train_y.loc[:,:] = scaler.fit_transform(train_y.values)

scaler = None

# holdouf test set for final evaluation
test_dataset = GAITDataset(test_X, test_y, scaler, feature_extraction_model=model)
test_batcher = DataLoader(test_dataset,batch_size=10, shuffle=False, num_workers=16)

from sklearn.model_selection import KFold
kf = KFold(n_splits=5)

train_vids = np.array(train_vids)

from torch.nn.modules.loss import _Loss

class MyCriterion(_Loss):
    def __init__(self):
        super(MyCriterion, self).__init__()
    
    def forward(self, x, y):
        valid_mask = ~(y.view(y.size(0),FRAME_MAXLEN,-1)==0).all(dim=2)
        valid_mask = valid_mask.float()
        return torch.mean(torch.sum((valid_mask * ((x-y)**2).mean((1,3,4))),1)/torch.sum(valid_mask,1))

# cross validation loop
scores = {'MAPE': [], 'MAE': [], 'RMSE': [], 'R2': [], 'Explained variation': []}

for train, valid in kf.split(train_vids):
    # split trainset with train/valid
    train_split, valid_split = train_vids[train], train_vids[valid]
    
    train_X, train_y = filter_input_df_with_vids(input_df,train_split), filter_target_df_with_vids(target_df,train_split)
    valid_X, valid_y = filter_input_df_with_vids(input_df,valid_split), filter_target_df_with_vids(target_df,valid_split)


    # dsataset !!
    train_dataset = GAITDataset(train_X, train_y, scaler, feature_extraction_model=model)
    valid_dataset = GAITDataset(valid_X, valid_y, scaler, feature_extraction_model=model)
    
    # Init net !
    net = NeuralNetRegressor(
        Reconstructor,
        batch_size=32,
        max_epochs=100,
        lr=1e-3,
        optimizer=torch.optim.Adam,
        optimizer__weight_decay=1e-5,
        #optimizer__momentum=0.9,
        #optimizer__nesterov=True,
        criterion=MyCriterion,
        device='cuda',
        train_split=predefined_split(valid_dataset),
        # Shuffle training data on each epoch
        iterator_train__shuffle=True,
        callbacks=[#('ealy_stop', callbacks.EarlyStopping()),
                   #('lr_scheduler', callbacks.LRScheduler(policy='WarmRestartLR', base_period=2)),
                   ('prog_bar', callbacks.ProgressBar()),
                   ('save_results', SaveResults(path='./results'))
                   ],
    
    )
    
    net.fit(train_dataset, y=None)

HBox(children=(IntProgress(value=0, max=63), HTML(value='')))