In [1]:
pip install librosa

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import librosa
import numpy as np
import soundfile as sf

def preprocess_audio(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for root, dirs, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.wav'):
                file_path = os.path.join(root, file_name)
                print(f"Processing: {file_path}")
                
                try:
                    # Load Audio
                    audio, sr = librosa.load(file_path, sr=16000)
                    
                    # Trim Silence
                    audio_trimmed, _ = librosa.effects.trim(audio)
                    
                    # Normalize Audio
                    max_amplitude = np.max(np.abs(audio_trimmed))
                    if max_amplitude > 0:
                        audio_normalized = audio_trimmed / max_amplitude
                    else:
                        audio_normalized = audio_trimmed
                    
                    # Save Processed Audio
                    relative_path = os.path.relpath(file_path, input_folder)
                    output_path = os.path.join(output_folder, relative_path)
                    os.makedirs(os.path.dirname(output_path), exist_ok=True)
                    sf.write(output_path, audio_normalized, sr)
                    print(f"Saved: {output_path}")
                
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

# Example Usage
input_folder = r'C:\Users\LENOVO\Desktop\RP\data'
output_folder = r'C:\Users\LENOVO\Desktop\RP\processed_data'
preprocess_audio(input_folder, output_folder)


Processing: C:\Users\LENOVO\Desktop\RP\data\English\A01_M\en_A01_M_01.wav
Saved: C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_01.wav
Processing: C:\Users\LENOVO\Desktop\RP\data\English\A01_M\en_A01_M_02.wav
Saved: C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_02.wav
Processing: C:\Users\LENOVO\Desktop\RP\data\English\A01_M\en_A01_M_03.wav
Saved: C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_03.wav
Processing: C:\Users\LENOVO\Desktop\RP\data\English\A01_M\en_A01_M_04.wav
Saved: C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_04.wav
Processing: C:\Users\LENOVO\Desktop\RP\data\English\A01_M\en_A01_M_05.wav
Saved: C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_05.wav
Processing: C:\Users\LENOVO\Desktop\RP\data\English\A02_M\en_A02_M_01.wav
Saved: C:\Users\LENOVO\Desktop\RP\processed_data\English\A02_M\en_A02_M_01.wav
Processing: C:\Users\LENOVO\Desktop\RP\data\English\A02_M\en_A02_M_02.wav
Saved: C

In [3]:
pip install torch torchaudio transformers

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.6.0-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting torchaudio
  Downloading torchaudio-2.6.0-cp313-cp313-win_amd64.whl.metadata (6.7 kB)
Collecting transformers
  Downloading transformers-4.50.2-py3-none-any.whl.metadata (39 kB)
Collecting filelock (from torch)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers)
  Downloading huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collect


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2Model

# Load Pretrained Wav2Vec2 Model
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")

def extract_features(audio_path):
    try:
        # Load and preprocess the audio
        audio, sr = librosa.load(audio_path, sr=16000)
        inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)

        # Extract embeddings using Wav2Vec2
        with torch.no_grad():
            outputs = model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()

        print(f"Extracted features from {audio_path}")
        return embeddings
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

def process_folder(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for root, _, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.wav'):
                file_path = os.path.join(root, file_name)
                embeddings = extract_features(file_path)

                if embeddings is not None:
                    # Save as numpy array
                    relative_path = os.path.relpath(file_path, input_folder)
                    output_path = os.path.join(output_folder, relative_path.replace('.wav', '.npy'))
                    os.makedirs(os.path.dirname(output_path), exist_ok=True)
                    np.save(output_path, embeddings)
                    print(f"Saved features to {output_path}")

# Paths
input_folder = r"C:\Users\LENOVO\Desktop\RP\processed_data"
output_folder = r"C:\Users\LENOVO\Desktop\RP\features"
process_folder(input_folder, output_folder)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Extracted features from C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_01.wav
Saved features to C:\Users\LENOVO\Desktop\RP\features\English\A01_M\en_A01_M_01.npy
Extracted features from C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_02.wav
Saved features to C:\Users\LENOVO\Desktop\RP\features\English\A01_M\en_A01_M_02.npy
Extracted features from C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_03.wav
Saved features to C:\Users\LENOVO\Desktop\RP\features\English\A01_M\en_A01_M_03.npy
Extracted features from C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_04.wav
Saved features to C:\Users\LENOVO\Desktop\RP\features\English\A01_M\en_A01_M_04.npy
Extracted features from C:\Users\LENOVO\Desktop\RP\processed_data\English\A01_M\en_A01_M_05.wav
Saved features to C:\Users\LENOVO\Desktop\RP\features\English\A01_M\en_A01_M_05.npy
Extracted features from C:\Users\LENOVO\Desktop\RP\processed_data\English\A02_M\en_A02_M_01.wav
Save