# SimCLR and Few-shot Learning

We train a SimCLR base model on a meta-dataset composed of DIRG+Paderborn, then transfer it to a CWRU few-shot dataset.

In [1]:
# %xmode minimal

import os
import json

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # disable GPU devices
os.environ["TFDS_DATA_DIR"] = os.path.expanduser("~/tensorflow_datasets")  # default location of tfds database

# Turn off logging for TF
import logging
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import os
# os.environ["KERAS_BACKEND"] = "jax"
# os.environ["KERAS_BACKEND"] = "torch"
os.environ["KERAS_BACKEND"] = "tensorflow"

import keras
from keras import layers, models

import tensorflow as tf
import tensorflow_datasets as tfds
tf.get_logger().setLevel(logging.ERROR)

import librosa
import librosa.display

import numpy as np
from matplotlib import pyplot as plt

from pathlib import Path

from IPython.display import Audio

# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

from keras import layers, models, ops, losses, metrics

from keras.applications import resnet

# from keras.applications import vgg16

# tf.config.experimental_run_functions_eagerly(True)

# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())


In [2]:
import dpmhm
# dpmhm.datasets.get_dataset_list()

from dpmhm.datasets import preprocessing, transformer, feature, utils, spectral_window_pipeline, spectral_pipeline
from dpmhm.models import simclr

workdir = Path(os.path.expanduser("~/tmp/dpmhm/SimCLR-fewshot"))
os.makedirs(workdir, exist_ok=True)

## Build the meta-dataset from DIRG and Paderborn

In [3]:
_func = lambda x, sr: feature.spectral_features(
    x, sr, 'spectrogram',
    # n_mfcc=256,
    time_window=0.025, hop_step=0.0125,
    # n_fft=1024,
    normalize=False, to_db=True)[0]

In [4]:
window_shape = (64,64)
ds_all = {}

foo = spectral_pipeline(
    'DIRG', _func, 
    split='variation', compactor_kwargs=dict(channels=['A1']),
    shuffle_files=True  # turn on shuffle at the file level
)
ds_all['DIRG_A1'] = utils.restore_cardinality(
    foo[0].dataset,
    foo[-1]
)

foo = spectral_pipeline(
    'DIRG', _func, 
    split='variation', compactor_kwargs=dict(channels=['A2']),
    shuffle_files=True  # turn on shuffle at the file level
)
ds_all['DIRG_A2'] = utils.restore_cardinality(
    foo[0].dataset,
    foo[-1]
)

foo = spectral_pipeline(
    'Paderborn', _func,
    split='healthy+artificial', compactor_kwargs=dict(channels=['vibration', 'current']),
    # split='healthy[:25%]+artificial[:25%]', compactor_kwargs=dict(channels=['vibration', 'current']),
    shuffle_files=True
)
ds_all['Paderborn'] = utils.restore_cardinality(
    foo[0].dataset,
    foo[-1]
)

In [5]:
ds1 = transformer.SpecAugment(
    ds_all['DIRG_A1'],
    output_shape=window_shape,
    # blur_kwargs={'prob':0},
    # fade_kwargs={'prob':0},
    # flip_kwargs={'prob':0}
).dataset.map(lambda x: x['feature'])

ds2 = transformer.SpecAugment(
    ds_all['DIRG_A2'],
    output_shape=window_shape,
).dataset.map(lambda x: x['feature'])

ds3 = transformer.SpecAugment(
    ds_all['Paderborn'],
    output_shape=window_shape,
).dataset.map(lambda x: x['feature'])

In [6]:
# ds1, ds2, ds3 = ds_all['DIRG_A1'], ds_all['DIRG_A2'], ds_all['Paderborn']
ds0 = ds1.concatenate(ds2).concatenate(ds3.take(ds1.cardinality()+ds2.cardinality()))
# ds0 = ds1.concatenate(ds2).concatenate(ds3)

print(ds0.cardinality(), ds0.element_spec)

tf.Tensor(476, shape=(), dtype=int64) TensorSpec(shape=(3, 64, 64), dtype=tf.float32, name=None)


In [7]:
ds, input_shape = utils.twins_dataset_ssl(ds0, stack=False, fake_label=True)

ds.element_spec

((TensorSpec(shape=(64, 64, 3), dtype=tf.float32, name=None),
  TensorSpec(shape=(64, 64, 3), dtype=tf.float32, name=None)),
 TensorSpec(shape=(), dtype=tf.float32, name=None))

In [8]:
ds = utils.restore_cardinality(ds, ds0.cardinality())
ds_size = int(ds.cardinality())  # utils.get_dataset_size(ds)

ds_size

476

## Base SimCLR model

We train a base SimCLR model on the meta-dataset.

In [25]:
splits = {'train':0.8, 'val':0.2}

ds_split = utils.split_dataset(ds, splits, ds_size=ds_size)

batch_size = 32

ds_train = ds_split['train']\
    .shuffle(ds_size, reshuffle_each_iteration=True)\
    .batch(batch_size, drop_remainder=True)\
    .prefetch(tf.data.AUTOTUNE)
ds_val = ds_split['val'].batch(batch_size, drop_remainder=True)
# ds_test = ds_split['test'].batch(1, drop_remainder=True)

ds_train.element_spec

((TensorSpec(shape=(32, 64, 64, 3), dtype=tf.float32, name=None),
  TensorSpec(shape=(32, 64, 64, 3), dtype=tf.float32, name=None)),
 TensorSpec(shape=(32,), dtype=tf.float32, name=None))

In [26]:
# encoder_kwargs = dict(include_top=False, weights='imagenet', pooling='max')
encoder_kwargs = dict(include_top=False, weights=None, pooling='avg')

model = dpmhm.models.simclr.SimCLR(input_shape, name='VGG16', tau=0.1, **encoder_kwargs)
model._encoder.trainable = True

model.compile(
    optimizer=keras.optimizers.Adam(),
)

model.summary()

In [27]:
hh = model.fit(ds_train,
               validation_data=ds_val,
               epochs=500)

# plt.plot(hh.history['loss'])

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 5s/step - loss: 44742.3594 - val_loss: 47288.3281


From the trained SimCLR model, we extract the feature transformation part which includes the base encoder and the first two dense layers of the projection head. 

In [29]:
x = layers.Input(input_shape)

# same same
# _proj = models.Model(inputs=model._projector.inputs, outputs=model._projector.layers[3].output)
_proj = models.Model(inputs=model._projector.layers[0].input, outputs=model._projector.layers[3].output)

# _proj.summary()  # shows a concrete value for batch

f = _proj(model._encoder(x))

model_feature = models.Model(inputs=x, outputs=f, name='SimCLR_feature')

model_feature.summary()  # shows `None` for batch

## Transfer Learning

In [31]:
ds_all['CWRU'], full_labels_dict = spectral_window_pipeline(
    'CWRU', _func,
    split='all',
    compactor_kwargs=dict(keys=['FaultLocation', 'FaultComponent', 'FaultSize']),
    window_kwargs=dict(window_size=(64,64), hop_size=(64,64))
)

labels = list(full_labels_dict.keys())  
n_classes = len(labels) + 1

In [None]:
preproc = preprocessing.get_mapping_supervised(labels)

dw = utils.restore_cardinality(
    utils.restore_shape(
        ds_all['CWRU'].map(preproc, num_parallel_calls=tf.data.AUTOTUNE),
        key=0
    )
)

dw_size = int(dw.cardinality())

### Supervised fine tuning

We add a classification head to the feature transformation network and fine tune the model on some new data.

In [40]:
splits = {'train':0.7, 'val':0.2, 'test':0.1}
batch_size = 64

dw_split = utils.split_dataset(
    dw, splits, 
    ds_size=dw_size, 
    # labels=np.arange(n_classes)
)

dw_train = dw_split['train']\
    .shuffle(dw_size, reshuffle_each_iteration=True)\
    .batch(batch_size, drop_remainder=True)\
    .prefetch(tf.data.AUTOTUNE)
dw_val = dw_split['val'].repeat().batch(batch_size, drop_remainder=True)
dw_test = dw_split['test'].batch(batch_size, drop_remainder=True)

The classification head here is a simple MLP. The weights of the feature transformation network are frozen for the training.

In [41]:
model_feature.trainable = False

class_head = models.Sequential([
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(n_classes) # nb labels
], name='Classification_head')

x = layers.Input(input_shape)

model_fine = models.Model(inputs=x, outputs=class_head(model_feature(x)))

model_fine.compile(
    optimizer=keras.optimizers.Adam(),
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[metrics.SparseCategoricalAccuracy()]
)

In [42]:
hh = model_fine.fit(
    dw_train,
    validation_data=dw_val,
    epochs=10
)

Epoch 1/10
[1m178/178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302ms/step - loss: 3.1987 - sparse_categorical_accuracy: 0.0869


KeyboardInterrupt



In [43]:
model_fine.evaluate(dw_test)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 305ms/step - loss: 3.0477 - sparse_categorical_accuracy: 0.0944


[3.059857130050659, 0.09749999642372131]

# EOF

### Few-shot learning

In few-shot learning the number of new data per category is limited. We can prepare the data for few-shot learning by splitting separately data of each category.

However for unknown reasons, the performance of the few-shot split seems to be very low compared to the normal split.

In [None]:
splits = {'train':0.2, 'val':0.7, 'test':0.1}
batch_size = 64

n_classes = len(labels) + 1

In [None]:
# Only for demonstration, here we apply the preprocessing after the split.
dw_split = utils.split_dataset(
    dw, splits, 
    labels=labels
)

for k, dv in dw_split.items():
    dv.save(str(workdir/f'fs_split_{k}'))

In [94]:
dw_split = {}
for k in splits.keys():
    dw_split[k] = tf.data.Dataset.load(str(workdir/f'fs_split_{k}'))

In [116]:
dw_train = dw_split['train']\
    .map(preproc, num_parallel_calls=tf.data.AUTOTUNE)\
    .shuffle(1000, reshuffle_each_iteration=True)\
    .batch(batch_size, drop_remainder=True)\
    .prefetch(tf.data.AUTOTUNE)
dw_val = dw_split['val']\
    .map(preproc, num_parallel_calls=tf.data.AUTOTUNE)\
    .batch(batch_size, drop_remainder=True)
dw_test = dw_split['test']\
    .map(preproc, num_parallel_calls=tf.data.AUTOTUNE)\
    .batch(batch_size, drop_remainder=True)

In [118]:
%time eles = list(dw_train.take(10))

CPU times: user 172 ms, sys: 13.4 ms, total: 185 ms
Wall time: 58.7 ms


2024-06-18 23:51:57.085419: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
eles[0][0]

In [None]:
model_feature(eles[0][0])

In [101]:
model_feature.trainable = False

class_head = models.Sequential([
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(n_classes) # nb labels
], name='Classification_head')

x = layers.Input(input_shape)

model_fs = models.Model(inputs=x, outputs=class_head(model_feature(x)))

model_fs.compile(
    optimizer=keras.optimizers.Adam(),
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[metrics.SparseCategoricalAccuracy()]
)

In [102]:
hh = model_fs.fit(
    dw_train,
    validation_data=dw_val,
    epochs=10
)

RuntimeError: Unable to automatically build the model. Please build it yourself before calling fit/evaluate/predict. A model is 'built' when its variables have been created and its `self.built` attribute is True. Usually, calling the model on a batch of data is the right way to build it.
Exception encountered:
'Exception encountered when calling Conv2D.call().

[1m'NoneType' object is not callable[0m

Arguments received by Conv2D.call():
  • inputs=jnp.ndarray(shape=(64, 70, 70, 3), dtype=float32)'

In [None]:
model_fs.evaluate(dw_test)

2024-06-18 23:41:21.174297: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 23:41:22.954204: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 23:41:24.714093: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 23:41:26.471666: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 23:41:28.231720: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 23:41:30.062777: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-06-18 23:41:31.896178: W tensorflow/core/framework/local_rendezvous.cc:404] L