<a href="https://colab.research.google.com/github/yassine-cherni/NLP/blob/main/YC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install librosa
!pip install librosa resampy

Collecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: resampy
Successfully installed resampy-0.4.3


In [13]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# Function to extract MFCC features from audio file
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    audio, sample_rate = librosa.load(file_path, sr=None)
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
        features.append(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)
        features.append(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate).T, axis=0)
        features.append(mel)
    return np.concatenate(features)

# Load data and extract features
def load_data(data_dir):
    features = []
    labels = []
    for folder in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, folder)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            feature = extract_features(file_path)
            features.append(feature)
            labels.append(folder)
    return np.array(features), np.array(labels)

In [14]:
# Load data and labels
data_dir = "/content/drive/MyDrive/DATA/AUDIO"  # Update with your data directory path
features, labels = load_data(data_dir)

In [18]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

In [19]:
# Train-validation-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

In [21]:
# Build the RNN-LSTM model
model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Reshape((1, -1)),
    layers.LSTM(128, return_sequences=True),
    layers.LSTM(64),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(np.unique(labels_encoded)), activation='softmax')
])
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [22]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32, verbose=2)

Epoch 1/20
203/203 - 7s - loss: 1.1073 - accuracy: 0.5470 - val_loss: 0.5323 - val_accuracy: 0.8006 - 7s/epoch - 37ms/step
Epoch 2/20
203/203 - 2s - loss: 0.3416 - accuracy: 0.8784 - val_loss: 0.2399 - val_accuracy: 0.9183 - 2s/epoch - 8ms/step
Epoch 3/20
203/203 - 2s - loss: 0.1969 - accuracy: 0.9330 - val_loss: 0.2302 - val_accuracy: 0.9224 - 2s/epoch - 11ms/step
Epoch 4/20
203/203 - 2s - loss: 0.1580 - accuracy: 0.9461 - val_loss: 0.1771 - val_accuracy: 0.9321 - 2s/epoch - 12ms/step
Epoch 5/20
203/203 - 2s - loss: 0.1339 - accuracy: 0.9529 - val_loss: 0.1455 - val_accuracy: 0.9460 - 2s/epoch - 8ms/step
Epoch 6/20
203/203 - 2s - loss: 0.1323 - accuracy: 0.9547 - val_loss: 0.1542 - val_accuracy: 0.9501 - 2s/epoch - 8ms/step
Epoch 7/20
203/203 - 2s - loss: 0.1161 - accuracy: 0.9567 - val_loss: 0.2211 - val_accuracy: 0.9238 - 2s/epoch - 8ms/step
Epoch 8/20
203/203 - 2s - loss: 0.0932 - accuracy: 0.9661 - val_loss: 0.1269 - val_accuracy: 0.9557 - 2s/epoch - 8ms/step
Epoch 9/20
203/203 - 

In [27]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

# Optional: Save the model
model.save("speech_recognition_model.keras")

Test Loss: 0.11777452379465103, Test Accuracy: 0.9663341641426086


In [28]:
import librosa
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model("speech_recognition_model.keras")  # Load your saved model here

# Function to extract MFCC features from audio file
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    audio, sample_rate = librosa.load(file_path, sr=None)
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
        features.append(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)
        features.append(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate).T, axis=0)
        features.append(mel)
    return np.concatenate(features)

# Function to predict command from audio file
def predict_command(file_path):
    features = extract_features(file_path)
    features = np.expand_dims(features, axis=0)
    prediction = model.predict(features)
    predicted_label = np.argmax(prediction)
    return predicted_label

# Example usage
audio_file_path = "/content/drive/MyDrive/DATA/AUDIO/ACTIVATE_LANE_ASSIST/ACTIVATE LANE ASSIST 1.wav"  # Update with path to your audio file
predicted_label = predict_command(audio_file_path)
print("Predicted Command:", label_encoder.inverse_transform([predicted_label])[0])


Predicted Command: ACTIVATE_LANE_ASSIST
