In [2]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Function to extract features from an audio file
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

# Directories containing the audio samples
positive_dir = '/content/drive/MyDrive/segmented/pos'
negative_dir = '/content/drive/MyDrive/segmented/neg'

# Extract features and labels
X, y = [], []

# Positive samples
for file_name in os.listdir(positive_dir):
    if file_name.endswith('.mp3'):
        file_path = os.path.join(positive_dir, file_name)
        features = extract_features(file_path)
        X.append(features)
        y.append(1)  # Positive label

# Negative samples
for file_name in os.listdir(negative_dir):
    if file_name.endswith('.mp3'):
        file_path = os.path.join(negative_dir, file_name)
        features = extract_features(file_path)
        X.append(features)
        y.append(0)  # Negative label

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train an SVM model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)


Accuracy: 0.8
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.67      0.76        12
           1       0.75      0.92      0.83        13

    accuracy                           0.80        25
   macro avg       0.82      0.79      0.79        25
weighted avg       0.82      0.80      0.80        25



In [3]:
import os
import joblib

# Ensure the directory exists
save_dir = '/content/drive/MyDrive/model_saves'
os.makedirs(save_dir, exist_ok=True)

# Save the scaler and SVM model
scaler_path = os.path.join(save_dir, 'scaler.pkl')
model_path = os.path.join(save_dir, 'svm_model.pkl')

joblib.dump(scaler, scaler_path)  # Save the scaler to a file
joblib.dump(svm_model, model_path)  # Save the SVM model to a file


['/content/drive/MyDrive/model_saves/svm_model.pkl']

In [4]:
import os
import numpy as np
import librosa
from sklearn.preprocessing import StandardScaler
import joblib

# Function to extract features from an audio file
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

# Path to the new audio file
new_audio_path = '/content/drive/MyDrive/segmented/pos/pos-0421-084-cough-m-50-0.mp3'

# Extract features from the new audio file
new_features = extract_features(new_audio_path)

# Load the scaler and model (assuming they were saved earlier)
scaler = joblib.load('/content/drive/MyDrive/model_saves/scaler.pkl')
svm_model = joblib.load('/content/drive/MyDrive/model_saves/svm_model.pkl')

# Preprocess the features
new_features = scaler.transform([new_features])

# Predict the label for the new audio sample
prediction = svm_model.predict(new_features)

# Output the prediction
if prediction == 1:
    print("The cough is COVID-19 positive.")
else:
    print("The cough is COVID-19 negative.")


The cough is COVID-19 positive.
