# Clog Loss: Advance Alzheimer’s Research with Stall Catchers

https://www.drivendata.org/competitions/65/clog-loss-alzheimers-research/page/217/

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
from IPython.display import clear_output
import numpy as np
import pandas as pd
import cv2 
from sklearn.model_selection import train_test_split
from sklearn.metrics import matthews_corrcoef

from IPython.lib.display import YouTubeVideo

import tensorflow as tf
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (15, 10)

from clog_utils import *

AUTOTUNE = tf.data.experimental.AUTOTUNE

np.set_printoptions(precision=3, suppress=True)

tf.__version__

### Constants

In [None]:
DATASET_PATH = '/home/user/percepto/datasets/ClogLoss'

### Train Data

In [None]:
train_labels_df = pd.read_csv(f'{DATASET_PATH}/train_labels.csv')
train_labels_df

In [None]:
train_stalled_df = train_labels_df[train_labels_df['stalled'] > 0]
train_stalled_df

In [None]:
train_metadata_df = pd.read_csv(f'{DATASET_PATH}/train_metadata.csv')
train_metadata_df

### Test Data

In [None]:
test_metadata_df = pd.read_csv(f'{DATASET_PATH}/test_metadata.csv')
test_metadata_df

#### Nano Data

In [None]:
!ls {DATASET_PATH}/nano | wc -l

In [None]:
nano_df = train_metadata_df[train_metadata_df['nano'] == True]
nano_df

#### Micro Data

In [None]:
!ls {DATASET_PATH}/micro | wc -l

In [None]:
micro_df = train_metadata_df[train_metadata_df['micro'] == True]
micro_df

#### Validate that Nano dataset is included inside Micro

In [None]:
np.all(nano_df['filename'].isin(micro_df['filename']))

### Paths and Statistics

In [None]:
df, folder = nano_df, 'nano'
#df, folder = micro_df, 'micro'

In [None]:
train_df, val_df = train_test_split(df, test_size=0.3, random_state=42)

In [None]:
train_metadata_paths = DATASET_PATH+'/train/'+train_metadata_df['filename'].values
test_metadata_paths = DATASET_PATH+'/test/'+test_metadata_df['filename'].values

train_paths = DATASET_PATH+'/'+folder+'/'+train_df['filename'].values
val_paths = DATASET_PATH+'/'+folder+'/'+val_df['filename'].values

In [None]:
print(f'Train Metadata: {len(train_metadata_paths)}')
print(f'Test: {len(test_metadata_paths)}')
print(f'Trainig: train {len(train_paths)}, validation {len(val_paths)}')

### Yutube

In [None]:
YouTubeVideo('-2aW6m60mYg')

In [None]:
YouTubeVideo('c6TtoQhMrbA')

In [None]:
YouTubeVideo('_uJ_dcy-OXQ')

In [None]:
YouTubeVideo('9RFnguYmd_8')

### Stalled and Flowing Clips Pathes

In [None]:
stalled_df = df[df['crowd_score'] >=0.5]
stalled_paths = DATASET_PATH+'/micro/'+stalled_df['filename'].values
len(stalled_paths), stalled_paths[:5]

In [None]:
flowing_df = df[df['crowd_score'] < 0.5]
flowing_paths = DATASET_PATH+'/micro/'+flowing_df['filename'].values
len(flowing_paths), flowing_paths[:5]

### Display Flowing Clips

In [None]:
display_clip(flowing_paths[0])

### Display Stalled Clips

In [None]:
display_clip(stalled_paths[0])

### Display Test Clips

In [None]:
display_clip(test_metadata_paths[10])

### Read Clip as Tensors

In [None]:
frames = read_clip(stalled_paths[100])
frames.shape

##### Show some clips frames

In [None]:
plt.subplot(131)
plt.imshow(frames[0])
plt.subplot(132)
plt.imshow(frames[20])
plt.subplot(133)
plt.imshow(frames[40])

##### Show some mask with frames

In [None]:
masks = extract_masks(frames)

In [None]:
display_fused(frames[30], masks[30])

### DataSet

In [None]:
@tf.function
def read_images(path):
    frames = tf.numpy_function(read_clip, [path], tf.uint8)
    masks = tf.numpy_function(extract_masks, [frames], tf.uint8)
    return frames, masks

In [None]:
train = tf.data.Dataset.from_tensor_slices(train_paths).map(read_images).cache('/tmp/clog_train')
val = tf.data.Dataset.from_tensor_slices(val_paths).map(read_images).cache('/tmp/clog_val')

In [None]:
%%time 
for i, (frames, masks) in enumerate(val.take(1)):
    print(i, frames.shape, masks.shape)
    display_fused(frames[30].numpy(), masks[30].numpy())

In [None]:
train_ds = train.repeat()
#train_ds = train_ds.map(augment, num_parallel_calls=AUTOTUNE, deterministic=False)
train_ds = train_ds.shuffle(100)
train_ds = train_ds.batch(1)
train_ds = train_ds.prefetch(AUTOTUNE)
train_ds

In [None]:
val_ds = val.repeat()
#val_ds = val_ds.map(augment, num_parallel_calls=AUTOTUNE, deterministic=False)
val_ds = val_ds.batch(1)
val_ds = val_ds.prefetch(AUTOTUNE)
val_ds

In [None]:
for i, (frames, masks) in enumerate(val_ds.take(5)):
    print(i, frames.shape, masks.shape)

### Model

### Train

### Evaluation

In [None]:
y_true = (val_df['crowd_score'] >= 0.5).astype(int).values
y_true

In [None]:
y_pred = y_true # should be 1
#y_pred = 1-y_true # should be -1
y_pred = np.random.randint(0, 2, y_true.shape) #  should be around 0
y_pred

In [None]:
matthews_corrcoef(y_true, y_pred)