In [None]:
# Import necessary libraries
import librosa
import numpy as np
import os
import pandas as pd
import concurrent.futures
import soundfile as sf

In [None]:
# Convert MP3 files to WAV format
# Define paths
mp3_dir = r'C:/Users/yingx/data/MEMD_audio'
wav_dir = r'C:/Users/yingx/data/WaveFiles'

# Create output directory if it doesn't exist
os.makedirs(wav_dir, exist_ok=True)

# Process files from 2.mp3 to 2058.mp3
for i in range(2, 2059):
    mp3_path = os.path.join(mp3_dir, f'{i}.mp3')
    wav_path = os.path.join(wav_dir, f'{i}.wav')
    
    # Only process if the MP3 file exists
    if os.path.exists(mp3_path):
        try:
            # Load MP3 file with librosa (automatically converts to 22050 Hz mono)
            y, sr = librosa.load(mp3_path, sr=None)  # sr=None keeps original sample rate
            
            # Save as WAV file using soundfile
            sf.write(wav_path, y, sr)
            
            print(f'Successfully converted {i}.mp3 to {i}.wav')
        except Exception as e:
            print(f'Error processing {i}.mp3: {str(e)}')
    else:
        print(f'{i}.mp3 not found, skipping')

print('Conversion complete!')

In [None]:
# Create feastures.csv from audio files in a specified directory    
def extract_features(wav_file):
    # Load the audio file
    y, sr = librosa.load(wav_file, sr=None)

    # Calculate features
    duration = librosa.get_duration(y=y, sr=sr)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
    chroma_feature = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)
    rms = np.mean(librosa.feature.rms(y=y))
    harmonicity = np.mean(librosa.effects.harmonic(y))

    return {
        'filename': os.path.basename(wav_file),
        'duration': duration,
        'tempo': tempo,
        'spectral_centroid': spectral_centroid,
        'zero_crossing_rate': zero_crossing_rate,
        'chroma_feature': chroma_feature.tolist(),
        'mfccs': mfccs.tolist(),
        'rms': rms,
        'harmonicity': harmonicity
    }

def save_features_to_csv(features, csv_file):
    # Create a DataFrame from the features
    df = pd.DataFrame(features)

    # Ensure the directory exists
    os.makedirs(os.path.dirname(csv_file), exist_ok=True)

    # Save to CSV
    df.to_csv(csv_file, index=False)

def process_file(wav_file):
    if os.path.exists(wav_file):
        features = extract_features(wav_file)
        print(f"Processed: {wav_file}")
        return features
    else:
        print(f"File not found: {wav_file}")
        return None

def safe_extract(wav_file):
    try:
        if os.path.exists(wav_file):
            return extract_features(wav_file)
        else:
            print(f"File not found: {wav_file}")
            return None
    except Exception as e:
        print(f"Error processing {wav_file}: {e}")
        return None

if __name__ == "__main__":
    wav_dir = r'C:\Users\yingx\data\WaveFiles'
    output_csv = r'C:\Users\yingx\data\features.csv'
    # Start from 703.wav since previous files are already processed
    wav_files = [f'{wav_dir}\\{i}.wav' for i in range(703, 2059)]
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)

    batch_size = 100
    header_written = os.path.exists(output_csv)

    for batch_start in range(0, len(wav_files), batch_size):
        batch_files = wav_files[batch_start:batch_start + batch_size]
        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = list(executor.map(safe_extract, batch_files))
        features_batch = [res for res in results if res is not None]
        if features_batch:
            df = pd.DataFrame(features_batch)
            df.to_csv(output_csv, mode='a', header=not header_written, index=False)
            header_written = True
        print(f"Processed batch {batch_start + 703} to {batch_start + 703 + len(batch_files) - 1}")

    print(f"Feature extraction complete. Features saved to: {output_csv}")

File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\705.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\716.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\720.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\751.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\752.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\753.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\754.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\755.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\760.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\761.wav
File not found: C:\Users\yingx\OneDrive\Documents\GitHub\Capstone\data\WaveFiles\762.wav
File not found: C:\Us