In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
import tensorflow as tf
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten, Rescaling, Conv2D, MaxPooling2D, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adagrad
from tensorflow.keras.losses import KLDivergence
import keras
from keras import layers

In [3]:
y_data = np.load(f'/content/drive/MyDrive/kaggle/y_data.npy')
y_data.shape

In [8]:
def build_rn_model(version=0):
  checkpoint_filepath = '/content/drive/MyDrive/kaggle/rn_model_checkpoint.h5'
  if version:
    model = Sequential()

    pretrained_model = keras.applications.ResNet50V2(
    include_top=False,
    weights="imagenet",
    input_shape=(400, 256, 3),
    pooling='average',
    classifier_activation ='softmax',
    classes=6
    )

    modified_pretrained_model = keras.Model(inputs=pretrained_model.input, outputs=pretrained_model.layers[-1].output)
    for layer in modified_pretrained_model.layers:
      layer.trainable = False

    model.add(modified_pretrained_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(6, activation='softmax'))

    loss = KLDivergence()
    opt = Adam(learning_rate=0.00001)
    model.compile(optimizer=opt, loss=loss)

  else:
    model = load_model(checkpoint_filepath)

  return model

In [9]:
rn_model = build_rn_model(version=1)
rn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_1 (Functional)        (None, 13, 8, 2048)       23564800  
                                                                 
 global_average_pooling2d_1  (None, 2048)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_1 (Dense)             (None, 6)                 12294     
                                                                 
Total params: 23577094 (89.94 MB)
Trainable params: 12294 (48.02 KB)
Non-trainable params: 23564800 (89.89 MB)
_________________________________________________________________


In [None]:
checkpoint_filepath_v1 = '/content/drive/MyDrive/kaggle/rn_model_checkpoint_v1.h5'

batch_size=16
num_batches = 12  # Calculate total number of batches
for i in range(1, num_batches + 1):
    if i == 8713:
      batch_path = f'/content/drive/MyDrive/kaggle/X_data_batch_filtered_8713.npy'
    else:
      batch_path = f'/content/drive/MyDrive/kaggle/X_data_batch_filtered_{i*792}.npy'

    X_batch = np.load(batch_path)
    print(f'File {i}/{num_batches} loaded')
    # Split X_batch into smaller batches of size 16
    num_sub_batches = len(X_batch) // batch_size
    for j in range(num_sub_batches):
        start_idx = j * batch_size
        end_idx = (j + 1) * batch_size
        X_sub_batch = X_batch[start_idx:end_idx]
        rn_model.fit(X_sub_batch, y_data[(i-1)*len(X_batch) + start_idx:(i-1)*len(X_batch) + end_idx], epochs=1, validation_split=0.2)

    rn_model.save(checkpoint_filepath_v1)

File 1/12 loaded
File 2/12 loaded
File 3/12 loaded
File 4/12 loaded