In [1]:
import pandas as pd
import os
import torch
import librosa
import librosa.display
import pywt
import matplotlib.pyplot as plt
import csv
from keras.optimizers import Adam
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from keras.layers import LSTM, Dense, Dropout, Conv2D, Flatten, Reshape, MaxPooling1D
from keras.layers import Conv1D, LSTM, TimeDistributed
import numpy as np
import keras
from keras.utils import to_categorical

In [2]:

FOLDER_PATH = './dataset/fr/clips/'
FEATURES_FILE = 'matthieu_features.pkl'

In [11]:
def padding(data, axis, length):
    pad_width = [(0, 0)] * len(data.shape)
    pad_width[axis] = (0, max(0, length - data.shape[axis]))
    padded_data = np.pad(data, pad_width, mode='constant', constant_values=0)
    return padded_data


def get_features(df_in):
    features = []
    labels = []
    max_len = 270000

    for index in range(0, len(df_in)):
        filename = FOLDER_PATH + df_in.path[index]
        label = df_in.sentence[index]

        # load the file
        y, sr = librosa.load(filename)
        
        cA, cD = pywt.dwt(y, 'haar')
        
        data = np.concatenate((cA, cD), axis=0)
        
        print(data)
        print(len(data))

        data = np.pad(data, (0, max_len - len(data)))
        features.append(data)
        labels.append(label)

    output = np.concatenate(features, axis=0)
    return (features, labels)


def preprocess_data(X, y):
    X = np.array((X - np.min(X)) / (np.max(X) - np.min(X)))
    X = X / np.std(X)
    y = np.array(y)
    return X, y


def split_data(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123, stratify=y)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=123)
    return X_train, X_test, X_val, y_train, y_test, y_val


def create_model(input_shape):
    model = keras.Sequential()
    model.add(Conv1D(64, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.25))

    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(14, activation='softmax'))
    
    return model

In [4]:
data = pd.read_csv('./dataset/fr/test.tsv', delimiter='\t')
data = data[['path', 'sentence']]


In [5]:

if os.path.exists(FEATURES_FILE):
    with open(FEATURES_FILE, 'rb') as f:
        X, y = pickle.load(f)
else:
    X, y = get_features(data)
    with open(FEATURES_FILE, 'wb') as f:
        pickle.dump((X, y), f)


In [6]:
X, y = preprocess_data(X, y)

print(X.shape)
print(y.shape)

print(len(X))
print(len(y))

(5685, 270000)
(5685,)
5685
5685


In [16]:
X_train, X_test, X_val, y_train, y_test, y_val = split_data(X, y)

input_shape = (None, 270000)
X_train = X_train.reshape((1, X_train.shape[0], X_train.shape[1]))
X_val = X_val.reshape((1, X_val.shape[0], X_val.shape[1]))
y_train = y_train.reshape((1, y_train.shape[0]))
y_val = y_val.reshape((1, y_val.shape[0]))

num_classes = 14  # Remplacez par le nombre de classes réel
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
y_test = to_categorical(y_test, num_classes)

max_seq_length = X_train.shape[1]  # Longueur maximale de séquence
y_train = y_train[:, :max_seq_length, :]
y_val = y_val[:, :max_seq_length, :]

print(X_train.shape)
print(y_train.shape)

model = create_model(input_shape)
model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=20, batch_size=254, validation_data=(X_val, y_val))

TrainLoss, Trainacc = model.evaluate(X_train, y_train)
TestLoss, Testacc = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)

print('Confusion_matrix: ', tf.math.confusion_matrix(y_test, np.argmax(y_pred, axis=1)))

model.save("model.h5")

MemoryError: Unable to allocate 1.07 GiB for an array with shape (1066, 270000) and data type float32