In [1]:
# from google.colab import drive
# drive.mount('/content/drive')
# %cd /content/drive/My\ Drive
# %tensorflow_version 1.x
# # !git clone --recurse-submodules https://github.com/xbankov/implementation-system.git
# %cd implementation-system/
# !git pull
# !sudo apt-get install libspatialindex-dev libxml2-dev libxslt-dev python-dev python3-xdg

In [2]:
%%time
# !pip install --no-deps .

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.34 µs


In [3]:
from functools import partial
from collections import defaultdict
import numpy as np
import pandas as pd
import scipy
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm
from itertools import product
import time

from video699.screen.semantic_segmentation.fastai_detector import *
from video699.screen.semantic_segmentation.common import *
from video699.screen.semantic_segmentation.postprocessing import *
from video699.screen.semantic_segmentation.evaluation import *

In [4]:
detector = FastAIScreenDetector()
method_params = list(detector.methods.keys())
train_params = list(detector.train_params.keys())
all_params = train_params + method_params

In [5]:
base = [True]
base_lower_bounds = [5, 7]
base_upper_bounds = [40, 70]
base_factors = [[0.1, 0.01]]

erode_dilate = [True]
erode_dilate_lower_bounds = [5, 7, 15]
erode_dilate_upper_bounds = [40, 50]
erode_dilate_factors = [[0.1, 0.01]]
erode_dilate_iterations = [40, 100]

ratio_split = [True]
ratio_split_lower_bounds = [0.7, 0.9]
ratio_split_upper_bounds = [1.1, 1.5]

methods_values = [base] + [erode_dilate] + [ratio_split] + [base_lower_bounds] + [base_upper_bounds] \
        + [base_factors] + [erode_dilate_lower_bounds] + [erode_dilate_upper_bounds] + [erode_dilate_factors] \
        + [erode_dilate_iterations] + [ratio_split_lower_bounds] + [ratio_split_upper_bounds]

In [6]:
batch_size = [8]
resize_factor = [2, 4]
frozen_epochs = [7, 11]
unfrozen_epochs = [6, 11]
frozen_lr = [1e-2, 1e-3, 1e-4]
unfrozen_lr = [slice(1e-4, 2e-4), slice(1e-3, 1e-4)]

train_params_values = [batch_size] + [resize_factor] + [frozen_epochs] + [unfrozen_epochs] + [frozen_lr] + [unfrozen_lr]

In [7]:
method_settings = list(product(*methods_values))
train_settings = list(product(*train_params_values))

In [8]:
all_lectures = [video.filename for video in ALL_VIDEOS]
all_frames = [frame for video in ALL_VIDEOS for frame in video]
all_frames_grouped_by_videos = {video.filename: [frame for frame in video] for video in ALL_VIDEOS}
actual_detector = AnnotatedSampledVideoScreenDetector()

In [9]:
DEBUG_ENV = None

In [10]:
kf = KFold(n_splits=2, shuffle=True, random_state=123)
for i, split in enumerate(kf.split(all_lectures)):
    other_lectures = [all_lectures[index] for index in split[0]]
    test_lectures = [all_lectures[index] for index in split[1]]
    
    # Model selection
    method_settings = list(product(*methods_values))
    train_settings = list(product(*train_params_values))
    df_all = pd.DataFrame(columns=all_params + ['iou', 'wrong_count', 'kfold_split'])
    df_best_models = pd.DataFrame(columns=all_params + ['iou', 'wrong_count'])
    
    for train_setting in tqdm(train_settings[:2]):
        train_params_dict = dict(zip(train_params, train_setting))
        for j, split in enumerate(kf.split(other_lectures)):
            train_lectures = [other_lectures[index] for index in split[0]]
            valid_lectures = [other_lectures[index] for index in split[1]]
            valid_frames = [frame for lecture in valid_lectures for frame in all_frames_grouped_by_videos[lecture]]

            filtered_by = lambda name: any([lecture in str(name) for lecture in train_lectures + valid_lectures])  \
                            and 'frame' in str(name)
            split_by = lambda name: any([lecture in str(name) for lecture in valid_lectures])

                    
            #TODO REMOVE, ONLY FOR DEBUG PURPOSE
            train_params_dict['unfrozen_epochs'] = 1
            train_params_dict['frozen_epochs'] = 0
            train_params_dict['resize_factor'] = 8
            
            detector = FastAIScreenDetector(train_params=train_params_dict, methods=None, filtered_by=filtered_by,
                                        valid_func=split_by, device='cpu')
        
            detector.train()
            
            actuals = [actual_detector.detect(frame) for frame in valid_frames]
            sem_preds = detector.semantic_segmentation_batch(valid_frames)
            
            # Save inter results, average them and save them into DataFrame
            
            print(f"Iterating through {len(method_settings)} methods in split {j}.")
            
            for i, method_setting in enumerate(method_settings[:5]):    
                preds = detector.post_processing_batch(sem_preds, valid_frames, dict(zip(method_params, method_setting)))
                wrong_count, ious, _ = evaluate(actuals, preds)
                
                iou_score = np.nanmean(ious)
                wrong_count = len(wrong_count)
                df_all.loc[len(df_all)] = train_setting + method_setting + (iou_score, wrong_count, j)
    
    unhashable_columns = ['frozen_lr', 'unfrozen_lr', 'base_factors', 'erode_dilate_factors']
    df_all[unhashable_columns] = df_all[unhashable_columns].astype(str)
    df_all['wrong_count'] = df_all['wrong_count'].astype(int)
    
    best_params = df_all.groupby(train_params + method_params).mean().sort_values(by=['wrong_count', 'iou']).iloc[0].name
    converted_params = []
    for i, par in enumerate(best_params):
        if isinstance(par, np.int64) or isinstance(par, np.float64):
            converted_params.append(par.item())
        else:
            converted_params.append(par)
    best_params = tuple(converted_params)
    print(df_all.groupby(train_params + method_params).mean().sort_values(by=['wrong_count', 'iou']).iloc[0])
    
    best_methods = dict(zip(method_params, best_params[-len(method_params):]))
    best_train_params_dict = dict(zip(train_params, best_params[:len(train_params)]))
    best_train_params_dict['frozen_lr'] = float(best_train_params_dict['frozen_lr'])
    best_train_params_dict['unfrozen_lr'] = eval(best_train_params_dict['unfrozen_lr'])
    
    filtered_by = lambda name: 'frame' in str(name)
    split_by = lambda name: any([lecture in str(name) for lecture in test_lectures])
    
    best_detector = FastAIScreenDetector(train_params=best_train_params_dict, methods=best_methods, filtered_by=filtered_by,
                                        valid_func=split_by, device='cpu')
    print(type(best_detector.train_params['batch_size']))
    best_detector.train()

    actuals = [actual_detector.detect(frame) for frame in valid_frames]
    preds = [best_detector.detect(frame) for frame in valid_frames]
    wrong_count, iou, _ = evaluate(actuals, preds)
    iou_score = np.nanmean(ious)
    wrong_count = len(wrong_count)
    df_best_models.loc[len(df_best_models)] = train_setting + method_setting + (iou_score, wrong_count, j)

df_best_models.to_csv('cross_validation_results.csv')
    
    

  0%|          | 0/2 [00:00<?, ?it/s]

epoch,train_loss,valid_loss,acc,dice,iou,time
0,0.504003,0.206067,0.958636,0.938684,0.894617,00:20


Iterating through 192 methods in split 0.


ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
detector

In [27]:
seg = detector.semantic_segmentation(all_frames[0])

In [37]:
a, b = cv2.findContours(seg, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

In [38]:
a

[array([[[ 55,  38]],
 
        [[ 54,  39]],
 
        [[ 52,  39]],
 
        [[ 50,  41]],
 
        ...,
 
        [[149,  41]],
 
        [[147,  39]],
 
        [[145,  39]],
 
        [[144,  38]]], dtype=int32),
 array([[[ 10, 141]],
 
        [[ 11, 140]],
 
        [[ 18, 140]],
 
        [[ 19, 141]],
 
        ...,
 
        [[  9, 143]],
 
        [[  8, 143]],
 
        [[  7, 142]],
 
        [[  8, 141]]], dtype=int32),
 array([[[ 10, 122]],
 
        [[ 11, 121]],
 
        [[ 18, 121]],
 
        [[ 19, 122]],
 
        [[ 20, 122]],
 
        [[ 21, 123]],
 
        [[ 20, 124]],
 
        [[ 21, 125]],
 
        [[ 20, 126]],
 
        [[ 18, 126]],
 
        [[ 19, 127]],
 
        [[ 18, 128]],
 
        [[ 11, 128]],
 
        [[ 10, 127]],
 
        [[ 11, 126]],
 
        [[  9, 126]],
 
        [[  8, 125]],
 
        [[  9, 124]],
 
        [[  8, 123]],
 
        [[  9, 122]]], dtype=int32),
 array([[[ 10, 109]],
 
        [[ 11, 108]],
 
        [[ 18, 108]

In [39]:
b

array([[[ 4, -1,  1, -1],
        [ 2, -1, -1,  0],
        [ 3,  1, -1,  0],
        [-1,  2, -1,  0],
        [-1,  0,  5, -1],
        [-1, -1, -1,  4]]], dtype=int32)