In [1]:
from dvclive import Live
from dvclive.keras import DVCLiveCallback

2025-03-22 22:24:49.966826: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-22 22:24:49.993294: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742682290.021639   20446 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742682290.027963   20446 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1742682290.044280   20446 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

## Data Loading

In [2]:
# get root directory
from pathlib import Path
import os

ROOT_DIR = Path().resolve().parent  # Assumes notebook is in a subdirectory
ORI_DATA_PATH = os.path.join(ROOT_DIR, 'data', 'raw')

In [3]:
from datasets import load_dataset, DatasetDict, Video

ds = load_dataset("videofolder", data_dir=ORI_DATA_PATH).cast_column("video", Video(decode=False))
ds_train_devtest = ds['train'].train_test_split(test_size=0.2, seed=42)
ds_devtest = ds_train_devtest['test'].train_test_split(test_size=0.5, seed=42)

ds = DatasetDict({
    'train': ds_train_devtest['train'],
    'valid': ds_devtest['train'],
    'test': ds_devtest['test']
})

Resolving data files:   0%|          | 0/743 [00:00<?, ?it/s]

In [4]:
label_feature = ds['train'].features['label']
label_names = label_feature.names
label_dict = {i: name for i, name in enumerate(label_names)}

print(label_dict)

{0: 'apa kabar', 1: 'ayo jalan-jalan', 2: 'jaga kesehatan', 3: 'kamu mau kemana', 4: 'kamu tinggal dimana', 5: 'mau pesan apa', 6: 'nama kamu siapa', 7: 'salam kenal', 8: 'sama-sama', 9: 'sampai jumpa lagi', 10: 'saya minta maaf', 11: 'sekarang jam berapa', 12: 'selamat malam', 13: 'selamat pagi', 14: 'selamat siang', 15: 'terima kasih'}


## Preprocessing

In [12]:
PROCESSED_DATA_PATH = os.path.join(ROOT_DIR, 'data', 'interim', 'trim_padded_ds_random')
MAX_SEQ_LEN = 100

In [8]:
import tensorflow as tf
import tensorflow_io as tfio
# import decord
# from decord import VideoReader, cpu
# decord.bridge.set_bridge('tensorflow')

def trim_pad(example, max_seq_length):
    # vr = VideoReader(example["video"]["path"], ctx=cpu(0))
    frames = tf.io.read_file(example["video"]["path"])
    frames = tfio.experimental.ffmpeg.decode_video(frames)
    frames = frames[:max_seq_length]
    video_length = frames.shape[0]

    # Trim or pad frames to MAX_SEQ_LENGTH
    if video_length == max_seq_length:
        # create mask
        mask = tf.ones(max_seq_length, dtype=tf.bool)
    else:
        # create mask and pad if too short
        mask = tf.zeros(max_seq_length, dtype=tf.bool)
        mask = tf.tensor_scatter_nd_update(
            mask,
            tf.reshape(tf.range(video_length), [-1, 1]),
            tf.ones(video_length, dtype=tf.bool)
        )
        # Pad with zeros
        padding = tf.zeros((max_seq_length - video_length, *frames.shape[1:]), dtype=frames.dtype)
        frames = tf.concat([frames, padding], axis=0)

    return {
        "frames": frames,
        "mask": mask,
    }

In [9]:
ds = ds.map(
    trim_pad,
    MAX_SEQ_LEN,
    batched=False,
    remove_columns=["video"]
)

ds.save_to_disk(PROCESSED_DATA_PATH)

Map:   0%|          | 0/742 [00:00<?, ? examples/s]

2025-03-22 21:31:29.016738: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
caused by: ['/lib/x86_64-linux-gnu/libgobject-2.0.so.0: undefined symbol: ffi_type_uint32, version LIBFFI_BASE_7.0']
caused by: ['libavformat.so.57: cannot open shared object file: No such file or directory']
caused by: ['libavcodec-ffmpeg.so.56: cannot open shared object file: No such file or directory']


NotImplementedError: ('could not find ffmpeg after search through ', ['libtensorflow_io_ffmpeg_4.2.so', 'libtensorflow_io_ffmpeg_3.4.so', 'libtensorflow_io_ffmpeg_2.8.so'])

## Building Model (Preprocessing, feature extraction, sequence recognition, and classification included)

In [5]:
IMAGE_SIZE = 299

### Preprocessing Layers

In [6]:
# trim and extend
import tensorflow as tf

class VideoTrimmerExtender(tf.keras.layers.Layer):
    def __init__(self, max_seq_length, **kwargs):
        super(VideoTrimmerExtender, self).__init__(**kwargs)
        self.max_seq_length = max_seq_length

    @tf.function
    def call(self, frames):
        video_shape = tf.shape(frames)

        if len(frames.shape) == 5:  # (batch, frames, height, width, channels)
            batch_size = video_shape[0]
            video_length = video_shape[1]
            height = video_shape[2]
            width = video_shape[3]
            channels = video_shape[4]

            # if video is longer
            if video_length >= self.max_seq_length:
                frames = frames[:, :self.max_seq_length]
                mask = tf.ones([batch_size, self.max_seq_length], dtype=tf.bool)

            else:
                mask = tf.zeros([batch_size, self.max_seq_length], dtype=tf.bool)

                # Create indices for the valid frames using meshgrid
                batch_indices = tf.tile(
                    tf.expand_dims(tf.range(batch_size), 1),
                    [1, video_length]
                )
                time_indices = tf.tile(
                    tf.expand_dims(tf.range(video_length), 0),
                    [batch_size, 1]
                )

                # Stack indices to create coordinate pairs
                indices = tf.stack([
                    tf.reshape(batch_indices, [-1]),
                    tf.reshape(time_indices, [-1])
                ], axis=1)

                # Update mask
                mask = tf.tensor_scatter_nd_update(
                    mask,
                    indices,
                    tf.ones(batch_size * video_length, dtype=tf.bool)
                )

                # Create padding with all 5 dimensions
                padding_shape = [
                    batch_size,                   # batch
                    self.max_seq_length - video_length,  # time
                    height,                       # height
                    width,                        # width
                    channels                      # channels
                ]
                padding = tf.zeros(padding_shape, dtype=frames.dtype)
                # Concatenate along time dimension (axis=1), NOT batch dimension
                frames = tf.concat([frames, padding], axis=1)
        else:
            raise ValueError("Expected input shape with 5 dimensions (batch, frames, height, width, channels)")

        return frames, mask

    def compute_output_shape(self, input_shape):
        # Return shape for frames and mask
        frames_shape = (None,) + input_shape[1:]
        mask_shape = (self.max_seq_length,)
        return [frames_shape, mask_shape]

In [7]:
# center cropping

class CenterSquareCrop(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(CenterSquareCrop, self).__init__(**kwargs)

    def call(self, inputs):
        if len(inputs.shape) != 4:  # (frames, height, width, channels)
            raise ValueError("Expected input shape with 4 dimensions (frames, height, width, channels)")

        # Get the dynamic shape of the input image
        shape = tf.shape(inputs)
        height = shape[1]
        width = shape[2]
        # Determine the side length of the largest possible central square
        crop_size = tf.minimum(height, width)
        # Compute offsets for centering the crop
        offset_height = (height - crop_size) // 2
        offset_width = (width - crop_size) // 2
        # Crop the central square from each image in the batch
        return tf.image.crop_to_bounding_box(inputs, offset_height, offset_width, crop_size, crop_size)

In [8]:
class CustomPreprocessing(tf.keras.layers.Layer):
    def __init__(self, image_size, **kwargs):
        super(CustomPreprocessing, self).__init__(**kwargs)
        self.image_size = image_size

        # create all layers during initialization
        self.random_brightness = tf.keras.layers.RandomBrightness(0.2)
        self.random_contrast = tf.keras.layers.RandomContrast(0.2)
        self.center_crop = CenterSquareCrop()
        self.resize = tf.keras.layers.Resizing(image_size, image_size)

    def call(self, inputs, training=None):
        # 1. Transpose inputs to (batch, height, width, channels * time)
        shape = tf.shape(inputs)
        batch = shape[0]
        time = shape[1]
        width = shape[2]
        height = shape[3]
        channels = shape[4]
        x = tf.transpose(inputs, [0, 2, 3, 4, 1])
        # combine channels and time
        x = tf.reshape(
            x,
            (batch, width, height, time * channels)
        )

        # 2. Apply Preprocessing
        if training:
            x = self.random_brightness(x)
            x = self.random_contrast(x)
        x = self.center_crop(x)
        x = self.resize(x)

        # 3. Transpose back to (batch, time, height, width, channels)
        new_shape = tf.shape(x)
        new_batch = new_shape[0]
        new_width = new_shape[1]
        new_height = new_shape[2]
        new_channels = new_shape[3]
        # Calculate time dimension for reshape
        time_dim = new_channels // channels

        x = tf.reshape(
            x,
            (new_batch, new_width, new_height, channels, time_dim)
        )
        x = tf.transpose(x, [0, 4, 1, 2, 3])
        return x

    def compute_output_shape(self, input_shape):
        batch, time, _, _, _ = input_shape
        return (batch, time, self.image_size, self.image_size, 3)

### Feature Extraction

In [9]:
class VideoFeatureExtractor(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(VideoFeatureExtractor, self).__init__(**kwargs)
        self.feature_extractor = tf.keras.applications.InceptionV3(
            include_top=False,
            weights='imagenet',
            pooling='avg',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
        )

    def call(self, frames):
        # Preprocess frames
        frames = tf.keras.applications.inception_v3.preprocess_input(frames)
        # Extract features from frames
        features = self.feature_extractor(frames)
        return features

    def compute_output_shape(self, input_shape):
        return (input_shape[0], 2048)

## Building Model

In [13]:
# input layer
frames_inputs = tf.keras.layers.Input(shape=(MAX_SEQ_LEN, None, None, 3), name='frames')
mask_inputs = tf.keras.layers.Input(shape=(MAX_SEQ_LEN,), name='mask')


# preprocessing process =====
# frames, mask = VideoTrimmerExtender(max_seq_length=MAX_SEQ_LEN)(inputs)

x = CustomPreprocessing(image_size=IMAGE_SIZE, name="preprocessing")(frames_inputs)
# ============================

# feature extraction process =====
# features output shape is (time, features)
feature_extractor = VideoFeatureExtractor()
features = tf.keras.layers.TimeDistributed(feature_extractor,
                                           name="cnn_feature_extractor")(x)
# ================================

# lstm process
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True))(features, mask=mask_inputs)

# classification process
x = tf.keras.layers.Dropout(0.4)(x)
x = tf.keras.layers.LSTM(128)(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(1024, activation="elu")(x)
x = tf.keras.layers.Dense(512, activation='elu')(x)
output = tf.keras.layers.Dense(len(label_names), activation="softmax")(x)

model = tf.keras.Model(inputs=[frames_inputs, mask_inputs], outputs=output)



In [14]:
model.summary()

In [15]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(1e-4),
    metrics=["accuracy"]
)

## Training Model


In [16]:
BATCH_SIZE = 8
EPOCHS = 500
MODEL_SAVE_PATH = os.path.join(ROOT_DIR, 'models', 'inceptionv3-lstm.keras')

In [17]:

# ds_train_devtest = ds['train'].train_test_split(test_size=0.2, seed=42)
# ds_devtest = ds_train_devtest['test'].train_test_split(test_size=0.5, seed=42)

# ds = DatasetDict({
#     'train': ds_train_devtest['train'],
#     'valid': ds_devtest['train'],
#     'test': ds_devtest['test']
# })

In [None]:
import decord

In [23]:
vr = VideoReader(ds["train"][0]["video"]["path"])
vr[:10]

<tf.Tensor: shape=(10, 1080, 1620, 3), dtype=uint8, numpy=
array([[[[234, 227, 222],
         [234, 227, 222],
         [234, 227, 222],
         ...,
         [137, 135, 136],
         [138, 136, 137],
         [139, 137, 138]],

        [[234, 227, 222],
         [234, 227, 222],
         [234, 227, 222],
         ...,
         [137, 135, 136],
         [138, 136, 137],
         [139, 137, 138]],

        [[234, 227, 222],
         [234, 227, 222],
         [234, 227, 222],
         ...,
         [137, 135, 136],
         [138, 136, 137],
         [139, 137, 138]],

        ...,

        [[207, 200, 193],
         [207, 200, 193],
         [207, 200, 193],
         ...,
         [ 81,  43,  23],
         [ 80,  42,  20],
         [ 80,  42,  20]],

        [[208, 201, 194],
         [208, 201, 194],
         [208, 201, 194],
         ...,
         [ 78,  40,  20],
         [ 77,  39,  17],
         [ 77,  39,  17]],

        [[163, 156, 149],
         [163, 156, 149],
         [163, 

In [27]:
import decord
from decord import VideoReader

def video_generator(dataset, max_seq_length):
    """A generator that yields video frames and labels."""
    for example in dataset:
        vr = VideoReader(example["video"]["path"])
        frames = vr[:max_seq_length]
        frames = tf.convert_to_tensor(frames.asnumpy())

        video_length = frames.shape[0]

        # Trim or pad frames to MAX_SEQ_LENGTH
        if video_length == max_seq_length:
            # create mask
            mask = tf.ones(max_seq_length, dtype=tf.bool)
        else:
            # create mask and pad if too short
            mask = tf.zeros(max_seq_length, dtype=tf.bool)
            mask = tf.tensor_scatter_nd_update(
                mask,
                tf.reshape(tf.range(video_length), [-1, 1]),
                tf.ones(video_length, dtype=tf.bool)
            )
            # Pad with zeros
            padding = tf.zeros((max_seq_length - video_length, *frames.shape[1:]), dtype=frames.dtype)
            frames = tf.concat([frames, padding], axis=0)

        yield (frames, mask), example["label"]



train_ds = tf.data.Dataset.from_generator(
    lambda: video_generator(ds['train'], MAX_SEQ_LEN),
    output_signature=(
        (tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.uint8), tf.TensorSpec(shape=(None,), dtype=tf.bool)),
        tf.TensorSpec(shape=(), dtype=tf.int64)
    )
).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

valid_ds = tf.data.Dataset.from_generator(
    lambda: video_generator(ds['valid'], MAX_SEQ_LEN),
    output_signature=(
        (tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.uint8), tf.TensorSpec(shape=(None,), dtype=tf.bool)),
        tf.TensorSpec(shape=(), dtype=tf.int64)
    )
).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

test_ds = tf.data.Dataset.from_generator(
    lambda: video_generator(ds['test'], MAX_SEQ_LEN),
    output_signature=(
        (tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.uint8), tf.TensorSpec(shape=(None,), dtype=tf.bool)),
        tf.TensorSpec(shape=(), dtype=tf.int64)
    )
).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

In [28]:
tf.keras.backend.clear_session()

In [29]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                                    patience=50,
                                                    restore_best_weights=True,
                                                    verbose=1)


with Live(dir=os.path.join(ROOT_DIR, 'dvclive')) as live:
    history = model.fit(
        train_ds,
        validation_data=valid_ds,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        callbacks=[
            early_stopping,
            DVCLiveCallback(live=live)
        ],
        verbose=2
    )

    model.save(MODEL_SAVE_PATH)
    live.log_artifact(
        str(MODEL_SAVE_PATH),
        type="model",
        name="inceptionv3-lstm",
        desc="InceptionV3 + LSTM model",
        labels=["inceptionv3", "lstm", "adam", "sparse_categorical_crossentropy", "1e-4"],
    )

    loss, accuracy = model.evaluate(test_ds, verbose=0)
    live.log_metric("test_loss", loss)
    live.log_metric("test_accuracy", accuracy)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

Epoch 1/500


2025-03-22 22:32:12.647299: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 14117750193991197659
2025-03-22 22:32:12.647326: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 14516317140644479754
2025-03-22 22:32:12.647333: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 11309262624434904116
2025-03-22 22:32:12.647805: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 5245893010251101508
	.python-version, notebooks/0.1.1-sam-cnn-embedded-lstm-model.ipynb


InvalidArgumentError: Graph execution error:

Detected at node IteratorGetNext defined at (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 86, in _run_code

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3077, in run_cell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3132, in _run_cell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3336, in run_cell_async

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3519, in run_ast_nodes

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3579, in run_code

  File "/tmp/ipykernel_20446/3578193721.py", line 8, in <module>

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

Detected at node IteratorGetNext defined at (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 86, in _run_code

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3077, in run_cell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3132, in _run_cell

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3336, in run_cell_async

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3519, in run_ast_nodes

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3579, in run_code

  File "/tmp/ipykernel_20446/3578193721.py", line 8, in <module>

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/teamspace/studios/this_studio/bisindo-video-recognition/.venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

2 root error(s) found.
  (0) INVALID_ARGUMENT:  Cannot batch tensors with different shapes in component 0. First element had shape [100,1080,1620,3] and element 6 had shape [100,720,1280,3].
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/Shape/_8]]
  (1) INVALID_ARGUMENT:  Cannot batch tensors with different shapes in component 0. First element had shape [100,1080,1620,3] and element 6 had shape [100,720,1280,3].
	 [[{{node IteratorGetNext}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_multi_step_on_iterator_68763]