In [None]:
import tensorflow as tf
import datetime
from sklearn import preprocessing
from tensorflow.keras.utils import to_categorical
from IPython.core.display import Image

from VCWA import Models, AttentionModels, Common, VideoDataGenerator

## Preparation

In [None]:
# mount cloud-storage bucket
# !mkdir /home/jupyter/bucket
!gcsfuse --implicit-dirs gfr-master-data-bucket /home/jupyter/bucket/

In [None]:
train_batch_size = 256 # use 256 only for single-frame batches
test_batch_size = 4

In [None]:
split_no = 1

path = "D:/"

hmdb51_dataset = Common.get_dataset(
    path + "processed_datasets/hmdb51_vid25", 
    path + "datasets/hmdb51_org_splits", 
    path + "processed_datasets/hmdb51_optflowl10_npz25", 
    split_no, 
    "hmdb51"
)
hmdb51_dataset

## TODO: 2D-CNN

## TwoStream-Network (BaseResnet50v2)

### Pre-Training individual Networks

#### Video Model

In [None]:
video_train_gen = VideoDataGenerator.VideoDataGenerator(
    hmdb51_dataset,
    target_size=(224, 224),
    batch_size=train_batch_size,
    preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input,
    shape_format="images",
    single_frame=True,
    rotation_range=20.0,
    shear_range=20.0,
    zoom_range=0.2,
    horizontal_flip=True
)

video_test_gen = VideoDataGenerator.VideoDataGenerator(
    hmdb51_dataset,
    target_size=(224, 224),
    batch_size=test_batch_size,
    preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input,
    shape_format="images"
)

In [None]:
# Load
# video_model = tf.keras.models.load_model("models/twostream_25_L10/ResNet50v2/video")

# Create new
video_model = tf.keras.applications.ResNet50V2(input_shape=(224, 224, 3), classes=51, weights=None)
# video_model = AttentionModels.create_ResidualAttention_ResNet50v2(input_shape=(224, 224, 3), classes=51)
# video_model = AttentionModels.create_CBAM_ResNet50v2(input_shape=(224, 224, 3), classes=51)


video_model.compile(
    loss="categorical_crossentropy", 
    optimizer="adam", 
    metrics=["accuracy", tf.keras.metrics.TopKCategoricalAccuracy(5)]
)

In [None]:
vid_tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir="logs/fit_twostream_25_L10/video/" + video_model.name + "_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 
    histogram_freq=1
)

In [None]:
vid_tensorboard_callback = tensorboard_callback

video_model.fit(
    video_train_gen, 
    epochs=10, 
    validation_data=video_test_gen,
    validation_freq=5,
    callbacks=[vid_tensorboard_callback])

In [None]:
video_model.evaluate(video_test_gen)

In [None]:
video_model.save("models/twostream_25_L10/video/" + video_model.name)

#### OptFlow Model

In [None]:
optflow_dataset = hmdb51_dataset.copy()
del optflow_dataset["path"]
optflow_dataset.rename(columns = {"optflow_path": "path"}, inplace=True)

optflow_train_gen = VideoDataGenerator.VideoDataGenerator(
    optflow_dataset,
    target_size=(224, 224),
    batch_size=train_batch_size,
    preprocessing_function=None,
    shape_format="images",
    single_frame=True,
    rotation_range=20.0,
    shear_range=20.0,
    zoom_range=0.2,
    horizontal_flip=True
)

optflow_test_gen = VideoDataGenerator.VideoDataGenerator(
    optflow_dataset,
    target_size=(224, 224),
    batch_size=test_batch_size,
    preprocessing_function=None,
    shape_format="images"
)

In [None]:
# Load
# video_model = tf.keras.models.load_model("models/twostream_25_L10/ResNet50v2/optflow")

# Create new
#optflow_model = AttentionModels.create_ResidualAttention_ResNet50v2(input_shape=(224, 224, 20), classes=51)
#optflow_model = AttentionModels.create_CBAM_ResNet50v2(input_shape=(224, 224, 20), classes=51)
#optflow_model = AttentionModels.tiny_cnn((224, 224, 20), 51, False)

optflow_model = tf.keras.applications.ResNet50V2(input_shape=(224, 224, 20), classes=51, weights=None)

optflow_model.compile(
    loss="categorical_crossentropy", 
    optimizer="adam", 
    metrics=["accuracy", tf.keras.metrics.TopKCategoricalAccuracy(5)]
)

In [None]:
optflow_tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir="logs/fit_twostream_25_L10/optflow/" + optflow_model.name + "_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 
    histogram_freq=1)

In [None]:
optflow_model.fit(
    optflow_train_gen, 
    epochs=10, 
    validation_data=optflow_test_gen,
    validation_freq=5,
    callbacks=[optflow_tensorboard_callback])

In [None]:
optflow_model.save("models/twostream_25_L10/optflow/" + optflow_model.name)

### Training combined TwoStream Model

In [None]:
twostream_train_gen = VideoDataGenerator.VideoDataGenerator(
    hmdb51_dataset,
    target_size=(224, 224),
    optflow=True,
    batch_size=train_batch_size,
    preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input,
    single_frame=True,
    rotation_range=20.0,
    shear_range=20.0,
    zoom_range=0.2,
    horizontal_flip=True
)

twostream_test_gen = VideoDataGenerator.VideoDataGenerator(
    hmdb51_dataset,
    target_size=(224, 224),
    optflow=True,
    batch_size=test_batch_size,
    preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input
)

In [None]:
twostream = Models.assemble_TwoStreamModel(video_model, optflow_model, 51, fusion="average", recreate_top=True)

In [None]:
twostream_tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir="logs/fit_twostream_25_L10/twostream" + twostream.name + "_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 
    histogram_freq=1)

twostream.fit(
    twostream_train_gen,
    epochs=1,
    validation_data=twostream_test_gen,
    validation_freq=10,
    callbacks=[twostream_tensorboard_callback]
)

In [None]:
twostream_resnet50v2.evaluate(twostream_test_gen)

In [None]:
twostream_resnet50v2.save("models/twostream_25_L1/ResNet50v2")

### Display attention

In [None]:
x, y = twostream_test_gen.__getitem__(4)
x_video, x_optflow = x

In [None]:
x_video.shape

In [None]:
attention = Models.get_twostream_attention(x_video[0], twostream_resnet50v2)
Models.video_to_gif(attention, "./attention.gif")

Image(filename="./attention.gif")

In [None]:
gradcam_attention = Models.get_twostream_gradcam(x_video[0], twostream_resnet50v2, "conv5_block3_3_conv")
Models.video_to_gif(gradcam_attention, "./gradcam_attention.gif")

Image(filename="./gradcam_attention.gif")

## Tests - Solostream

In [None]:
from tensorflow.keras import layers
from tensorflow.python.keras.models import Sequential, Model

def assemble_SoloStreamModel(spatial_stream_model, classes=51, fusion="average", recreate_top=False):
    spatial_stream_input = layers.Input(spatial_stream_model.inputs[0].shape)

    if recreate_top:
        spatial_stream = layers.TimeDistributed(Models.recreate_top_fn(spatial_stream_model, classes))(spatial_stream_input)
    else:
        spatial_stream = layers.TimeDistributed(spatial_stream_model)(spatial_stream_input)

    # late fusion
    if fusion == "average":
        fusion = tf.math.reduce_mean(spatial_stream, axis=1)

    model = Model(inputs=spatial_stream_input, outputs=fusion)
    model.compile(
        optimizer='adam',
        loss="categorical_crossentropy",
        metrics=[tf.keras.metrics.Accuracy(), tf.keras.metrics.TopKCategoricalAccuracy(5)])
    return model

In [None]:
solostream = assemble_SoloStreamModel(video_model, 51, fusion="average", recreate_top=True)

In [None]:
solostream.fit(
    train_gen,
    epochs=10,
    validation_data=test_gen
)

In [None]:
solostream.fit(
    train_gen,
    epochs=10,
    validation_data=test_gen
)

## LSTM

### Basenetwork

In [None]:
basenet = tf.keras.applications.ResNet50V2()

### LSTM

In [None]:
lstm = Models.assemble_lstm(basenet, classes=51, recreate_top=True)

In [None]:
log_dir = "logs/fit_lstm_25_L1/resnet50v2_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

lstm.fit(
    train_gen,
    epochs=10,
    validation_data=test_gen,
    callbacks=[tensorboard_callback]
)

In [None]:
lstm.save("models/lstm_25_L1/ResNet50v2")

## TODO: 3D-CNN

## TODO: (2+1)D-CNN