In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense,Conv2D,Dropout,MaxPool2D,Flatten
from tensorflow.keras.models import Sequential
import numpy as np
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
model = Sequential()
model.add(Conv2D(4,kernel_size=(3,3),activation="relu",padding="valid",input_shape=(224,224,3)))
model.add(Conv2D(16,kernel_size=(3,3),activation="relu",padding="valid"))

model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(32,kernel_size=(3,3),activation="relu",padding="valid"))
model.add(Conv2D(64,kernel_size=(3,3),activation="relu",padding="valid"))

model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(64,activation="relu"))
model.add(Dense(16,activation="relu"))
model.add(Dense(8,activation="softmax"))

model.compile(loss="categorical_crossentropy",optimizer="adam")

In [None]:
labels = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise", "surprised"]
values = [np.eye(len(labels))[i] for i in range(len(labels))]

one_hot_dict = dict(zip(labels,values))

In [None]:
data_folder_path = "../data"
img_paths = []

for type_path in os.listdir(data_folder_path):

    if type_path != "data_preprocessing.ipynb":
        spectrogram_paths = os.listdir(os.path.join(data_folder_path, type_path))

        for spectrogram_path in spectrogram_paths:
            img_path = os.path.join(data_folder_path, type_path, spectrogram_path)
            img_paths.append(img_path)

img_paths = np.array(img_paths)
np.random.shuffle(img_paths)

In [None]:
def image_processor(img_path):
    img = Image.open(img_path).convert("RGB")
    img = img.resize((224,224))
    img = np.array(img)

    img = img/255.0
    
    category = img_path.split(os.sep)[1]
    label = one_hot_dict[category]


    return (img,label)

In [None]:
X = []
Y = []

for img_path in img_paths:
    img, label = image_processor(img_path)
    X.append(img)
    Y.append(label)

X = np.array(X)
Y = np.array(Y)
print(X.shape, Y.shape)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2, shuffle=True)

In [None]:
model.fit(X_train,Y_train, epochs=10)

In [None]:
y_true = np.argmax(Y_train, axis=1)
y_pred = np.argmax(model.predict(X_train), axis=1)

accuracy = accuracy_score(y_true, y_pred)
print("Training Accuracy:", accuracy)

In [None]:
y_true = np.argmax(Y_test, axis=1)
y_pred = np.argmax(model.predict(X_test), axis=1)

accuracy = accuracy_score(y_true, y_pred)
print("Test Accuracy:", accuracy)

In [None]:
for i in range(y_pred.shape[0]):
    print(labels[y_pred[i]],end=":")
    print(labels[y_true[i]])

# Testing with random data from internet

In [None]:
import librosa
from matplotlib import pyplot as plt

In [None]:
def audio_to_spectrogram(img_path,save_path=".",img_name="img"):
  
  audio_data, sample_rate = librosa.load(img_path)

  Spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate, n_mels=128, fmax=8000)

  Spectrogram_dB = librosa.power_to_db(Spectrogram, ref=np.max)

  plt.figure(figsize=(12, 12))
  librosa.display.specshow(Spectrogram_dB, sr=sample_rate,fmax=8000,cmap="magma")
  os.makedirs(save_path, exist_ok=True)

  plt.savefig(f"{save_path}/{img_name}_img.png",bbox_inches = 'tight', pad_inches = 0)
  plt.close()
  return f"{save_path}/{img_name}_img.png"

In [None]:
audio_path = "./shut up.mp3"
img_path = audio_to_spectrogram(audio_path,img_name="shut_up")

In [None]:
def test_img_preprocessor(img_path):
    img = Image.open(img_path).convert("RGB")
    img = img.resize((224,224))
    img = np.array(img)

    img = img/255.0
    img.reshape(1,224,224,3)
    return img

In [None]:
test_img = test_img_preprocessor(img_path)
test_img.shape

In [None]:
labels[np.argmax(model.predict(test_img.reshape(1,224,224,3)))]

In [None]:
model.save("model_9982.h5")