In [1]:
from pathlib import Path
import numpy as np
import tensorflow as tf
from ResNet import *
from InceptionNet import *

from tensorflow.keras import layers
from tensorflow.keras import Model
np.random.seed(42)
tf.keras.utils.set_random_seed(42)

In [2]:
featdir = Path('audio_features')
embdir = Path('embeddings')
weightsdir = Path('weights')
lbldir = Path('labels')
datadir = Path('datadir')

In [3]:
NClass = 49

SMALL = 0
shape = shape = (64, 64, 3) if SMALL else (160, 160, 3)

In [4]:
class dataHolder():
  def __init__(self):
    self.prepareData()
                      
  def prepareData(self):
    ADD = '_64' if SMALL else '_160'
    # ~ 5 gigs RAM overall
    self.trainimg = np.load(datadir / f'train_faces_only{ADD}.npy')
    self.valimg   = np.load(datadir / f'val_faces_only{ADD}.npy')
    self.testimg  = np.load(datadir / f'test_faces_only{ADD}.npy')

In [5]:
trainlabels = np.load(lbldir / 'tr_lbl.npy')
vallabels   = np.load(lbldir / 'val_lbl.npy')
testlabels  = np.load(lbldir / 'tst_lbl.npy')

train_out = tf.keras.utils.to_categorical(trainlabels, NClass)
val_out   = tf.keras.utils.to_categorical(vallabels, NClass)
test_out  = tf.keras.utils.to_categorical(testlabels, NClass)

## Load our Encoder

In [6]:
def get_encoder(Net):

  if Net==InceptionResNetV2:
    model = Net(include_top=1,
                weights=None,
                input_tensor=None,
                input_shape=shape,
                pooling=None,
                classes=49,
                classifier_activation="softmax")

  elif Net==ResNet50 or Net==ResNet_small:
    model = Net(include_top=1,
                weights=None,#"imagenet",
                input_tensor=None,
                input_shape=shape,
                pooling=None,
                classes=49)

  # add some dropout at the end:
  # (this was used in the training, and is not useful here.
  #  but we keep it for consistency)
  l2 = model.layers[-3]
  l1 = model.layers[-2]
  output = model.layers[-1]

  dropout1 = tf.keras.layers.Dropout(0.7)
  dropout2 = tf.keras.layers.Dropout(0.5)

  x = dropout1(l2.output)
  x = l1(x)
  x = dropout2(x)
  outputdrop = output(x)

  modeldrop = Model(inputs=model.input, outputs=outputdrop)

  return modeldrop

## Compute Embeddings

In [7]:
RECOMPUTE_EMBEDDINGS = True #False
if RECOMPUTE_EMBEDDINGS:
    model = get_encoder(InceptionResNetV2)

    # load the weights:
    model.load_weights(weightsdir / '28epochscrosscat_30epochstriplet_inceptionresnet_smaller_sepconv.h5')    
    
    data = dataHolder()
    
    # make the embeddings:
    embtest = model.predict(data.testimg)
    embtrain = model.predict(data.trainimg)
    embval = model.predict(data.valimg)
    
    np.save(embdir / 'embeddings_val_img.npy',   embval)
    np.save(embdir / 'embeddings_test_img.npy' , embtest)
    np.save(embdir / 'embeddings_train_img.npy', embtrain)

else:
    embval = np.load(embdir / 'embeddings_val_img.npy')
    embtest = np.load(embdir / 'embeddings_test_img.npy')
    embtrain = np.load(embdir / 'embeddings_train_img.npy')

2022-05-31 14:28:08.669580: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




## Classify Images Alone

In [8]:
# now we experiment and classify the embeddings of images alone with a 
# dense net.

def get_classifier():
    input_shape = (49)
    inputs = layers.Input(shape=input_shape)

    layer2 = layers.Dense(49)(inputs)
    outputs = layers.Activation('softmax')(layer2)

    model_classifier = Model(inputs=inputs, outputs=outputs)

    return model_classifier

classifier = get_classifier()
print(classifier.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 49)]              0         
                                                                 
 dense (Dense)               (None, 49)                2450      
                                                                 
 activation_44 (Activation)  (None, 49)                0         
                                                                 
Total params: 2,450
Trainable params: 2,450
Non-trainable params: 0
_________________________________________________________________
None


In [9]:
loss = tf.keras.losses.CategoricalCrossentropy()
trainclass = True #False
if trainclass:

    # Compile the model
    classifier.compile(
        optimizer=tf.keras.optimizers.Adam(0.1),
        loss=loss,
        metrics=['accuracy'])
    
    history = classifier.fit(
        embtrain, train_out,
        validation_data=(embval, val_out),
        batch_size=3000,
        epochs=150,
        shuffle=True)
    

else:
    classifier.load_weights(weightsdir / 'image_classifier.h5')

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

## Testing

In [10]:
pred = np.argmax(classifier(embtest), axis=1)
print(f"Test accuracy : {np.sum(pred==testlabels)/pred.size*100:.01f}%")

Test accuracy : 93.2%
