In [1]:
import os
import numpy as np
import librosa
import soundfile as sf
from sklearn.mixture import GaussianMixture
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

# Langues à reconnaître
languages = ['francais', 'italien', 'allemande', 'russe', '']


In [2]:
# Extraction améliorée des caractéristiques : MFCC + delta + delta-delta + suppression silence
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    y_trimmed, _ = librosa.effects.trim(y, top_db=20)

    mfcc = librosa.feature.mfcc(y=y_trimmed, sr=sr, n_mfcc=13)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)

    features = np.vstack([mfcc, delta, delta2]).T  # shape (n_frames, 39)

    # Normalisation
    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    return features

In [3]:
# Chargement du dataset
def load_dataset(dataset_path):
    X, y = [], []
    for label in languages:
        class_path = os.path.join(dataset_path, label)
        for file in os.listdir(class_path):
            if file.endswith(".wav"):
                try:
                    features = extract_features(os.path.join(class_path, file))
                    X.append(features)
                    y.append(label)
                except Exception as e:
                    print(f"[ERREUR] {file} ignoré ({e})")
    return X, y

In [4]:
# Entraînement des modèles GMM
def train_gmm_models(X_train, y_train, n_components=16):
    models = {}
    for lang in languages:
        features = np.vstack([x for x, y in zip(X_train, y_train) if y == lang])
        gmm = GaussianMixture(n_components=n_components, covariance_type='diag', max_iter=200, random_state=0)
        gmm.fit(features)
        models[lang] = gmm
    return models

In [5]:
# Prédiction par log-vraisemblance maximale
def predict(models, X):
    predictions = []
    for features in X:
        scores = {lang: model.score(features) for lang, model in models.items()}
        best_lang = max(scores, key=scores.get)
        predictions.append(best_lang)
    return predictions

In [6]:
def save_gmm_models(models, save_dir="models_gmm"):
    os.makedirs(save_dir, exist_ok=True)
    for lang, gmm in models.items():
        # Sauvegarder chaque attribut séparément
        np.save(os.path.join(save_dir, f"{lang}_means.npy"), gmm.means_)
        np.save(os.path.join(save_dir, f"{lang}_covariances.npy"), gmm.covariances_)
        np.save(os.path.join(save_dir, f"{lang}_weights.npy"), gmm.weights_)
        np.save(os.path.join(save_dir, f"{lang}_precisions_cholesky.npy"), gmm.precisions_cholesky_)
        print(f"✅ Modèle {lang} sauvegardé dans {save_dir}/")

In [7]:
print("Chargement des données...")
X_train, y_train = load_dataset("dataset_cv - Copie/train")
X_val, y_val = load_dataset("dataset_cv - Copie/validation")
X_test, y_test = load_dataset("dataset_cv - Copie/test")

Chargement des données...


In [70]:
for n_components in [8, 16, 32, 64, 128, 256, 512]:
    print(f"\n🔁 Entraînement des GMMs avec {n_components} composantes...")
    models = train_gmm_models(X_train, y_train, n_components)

    print("📊 Évaluation sur la validation :")
    y_val_pred = predict(models, X_val)
    print(classification_report(y_val, y_val_pred, digits=3))

    print("📊 Évaluation sur le test :")
    y_test_pred = predict(models, X_test)
    print(classification_report(y_test, y_test_pred, digits=3))


🔁 Entraînement des GMMs avec 8 composantes...
📊 Évaluation sur la validation :
              precision    recall  f1-score   support

   allemande      0.625     0.721     0.670       104
    francais      0.808     0.567     0.667       104
     italien      0.684     0.769     0.724       104
       russe      0.689     0.702     0.695       104

    accuracy                          0.690       416
   macro avg      0.701     0.690     0.689       416
weighted avg      0.701     0.690     0.689       416

📊 Évaluation sur le test :
              precision    recall  f1-score   support

   allemande      0.639     0.745     0.688       204
    francais      0.819     0.578     0.678       204
     italien      0.656     0.711     0.682       204
       russe      0.681     0.711     0.695       204

    accuracy                          0.686       816
   macro avg      0.699     0.686     0.686       816
weighted avg      0.699     0.686     0.686       816


🔁 Entraînement des GMM

In [71]:
# Suppose que tu as déjà : X_train, y_train
models = train_gmm_models(X_train, y_train, n_components=512)  # ou 32, 64, etc.
save_gmm_models(models)

✅ Modèle francais sauvegardé dans models_gmm/
✅ Modèle italien sauvegardé dans models_gmm/
✅ Modèle allemande sauvegardé dans models_gmm/
✅ Modèle russe sauvegardé dans models_gmm/


In [3]:
!pip install -U openai-whisper

Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
     ---------------------------------------- 0.0/803.2 kB ? eta -:--:--
     ------------- -------------------------- 262.1/803.2 kB ? eta -:--:--
     ------------------------ ------------- 524.3/803.2 kB 2.8 MB/s eta 0:00:01
     -------------------------------------- 803.2/803.2 kB 1.9 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.11.0-cp312-cp312-win_amd64.whl.metadata (6.9 kB)
Downloading tiktoken-0.11.0-cp312-cp312-win_amd64.whl (884 kB)
   ---------------------------------------- 0.0/884.3 kB ? eta -:--:--
   ----------------------- -----