# GENDER PREDICTION FROM AUDIO FILES

Import the necessary libraries

In [1]:
import os
import tkinter as tk
from tkinter import filedialog, messagebox
from pydub import AudioSegment
import speech_recognition as sr
import librosa
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import sounddevice as sd
from scipy.io.wavfile import write



Feature Extraction

In [2]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

Train the model on the dataset

In [3]:
def load_and_train_model(dataset_path):
    features = []
    labels = []
    
    for gender in ["MALE", "FEMALE"]:
        gender_dir = os.path.join(dataset_path, gender)
        label = 1 if gender == "MALE" else 0
        for file_name in os.listdir(gender_dir):
            file_path = os.path.join(gender_dir, file_name)
            feature = extract_features(file_path)
            features.append(feature)
            labels.append(label)
    
    X = np.array(features)
    y = np.array(labels)
    
    # Split the dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train a classifier
    model = SVC(kernel='linear', probability=True)
    model.fit(X_train, y_train)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    
    # Save the model
    with open('gender_prediction_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    
    return model


Process the audio file and make predictions

In [11]:
def process_file(file_path, model):
    recognizer = sr.Recognizer()
    
    # Convert audio to text
    with sr.AudioFile(file_path) as source:
        audio = recognizer.record(source)
        try:
            audio_text = recognizer.recognize_google(audio)
            print(f"Recognized Text: {audio_text}")
        except sr.UnknownValueError:
            audio_text = ""
            messagebox.showerror("Error", "Google Speech Recognition could not understand the audio.")
        except sr.RequestError as e:
            messagebox.showerror("Error", f"Could not request results from Google Speech Recognition service; {e}")
            return
    
    # Check if "HI" is in the audio
    if "HI" in audio_text.upper():
        messagebox.showerror("Error", "The audio contains the word 'HI'. Please upload or record a voice note without 'HI'.")
        return
    
    # Check duration
    audio_segment = AudioSegment.from_wav(file_path)
    duration_seconds = len(audio_segment) / 1000
    if duration_seconds < 30:
        messagebox.showerror("Error", "The audio is less than 30 seconds. Please upload or record a longer voice note.")
        return
    
    # Check if the audio is blank
    if not audio_text.strip():
        messagebox.showerror("Error", "The audio is blank. Please speak and try again.")
        return
    
    # Extract features and predict gender
    features = extract_features(file_path).reshape(1, -1)
    gender_prediction = model.predict(features)[0]
    
    gender = "Male" if gender_prediction == 1 else "Female"
    messagebox.showinfo("Gender Prediction", f"The predicted gender is: {gender}")


Function to upload an audio file

In [5]:
def upload_file():
    file_path = filedialog.askopenfilename(filetypes=[("WAV Files", "*.wav")])
    if file_path:
        process_file(file_path, gender_model)


Function to record an audio file

In [6]:
def record_audio():
    duration = 10  # seconds
    fs = 44100  # Sample rate
    messagebox.showinfo("Recording", "Recording will start. Please speak into the microphone.")
    
    try:
        # Record the audio
        recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
        sd.wait()  # Wait until recording is finished
        
        # Normalize the recording
        recording = np.int16(recording / np.max(np.abs(recording)) * 32767)
        
        # Save the recording as a WAV file
        file_path = 'recorded_audio.wav'
        write(file_path, fs, recording)
        
        # Process the recorded audio
        process_file(file_path, gender_model)
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during recording: {str(e)}")


Load pre-trained gender prediction model

In [7]:
gender_model = load_and_train_model('SPEECH DATA')

Model Accuracy: 100.00%


Setting up the GUI

In [10]:
root = tk.Tk()
root.title("Gender Prediction from Audio")

frame = tk.Frame(root, padx=10, pady=10)
frame.pack(pady=20)

upload_button = tk.Button(frame, text="Upload Audio File", command=upload_file, width=30)
upload_button.grid(row=0, column=0, pady=10)

record_button = tk.Button(frame, text="Record Audio", command=record_audio, width=30)
record_button.grid(row=1, column=0, pady=10)

root.mainloop()