In [21]:

import torchaudio
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import pandas as pd
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split
import torch
import librosa
import librosa.display
import matplotlib.pyplot as plt


In [22]:
labels_df = pd.read_excel('./data/data/NEW_IRB300012145_Patient_ID_deidentified.xlsx')
smoking_status = labels_df.iloc[:, [0,27]]
smoking_status.iloc[:, 0] = smoking_status.iloc[:, 0].apply(lambda x: x[-3:])

filtered_df = smoking_status[~pd.isna(smoking_status.iloc[:, 1])]

label_list = list(filtered_df.itertuples(index=False, name=None))
label_list

[('020', 78.86),
 ('022', 83.72),
 ('023', 89.58),
 ('024', 96.51),
 ('025', 67.07),
 ('026', 73.86),
 ('029', 70.62),
 ('030', 89.22),
 ('031', 75.68),
 ('032', 78.21),
 ('033', 71.77),
 ('034', 60.95),
 ('035', 81.99),
 ('036', 80.05),
 ('037', 90.6),
 ('038', 82.59),
 ('039', 81.06),
 ('040', 81.5),
 ('041', 44.05),
 ('042', 77.16),
 ('043', 75.69),
 ('044', 82.62),
 ('045', 66.13),
 ('046', 76.57),
 ('047', 65.16),
 ('048', 63.78),
 ('049', 58.63),
 ('051', 68.89),
 ('052', 83.25),
 ('053', 85.91),
 ('054', 73.39),
 ('057', 55.0),
 ('058', 68.51),
 ('061', 88.75),
 ('063', 67.59),
 ('064', 61.47),
 ('065', 75.97),
 ('066', 69.39),
 ('067', 69.55),
 ('068', 83.77),
 ('069', 69.4),
 ('070', 33.99),
 ('073', 57.3),
 ('074', 79.12),
 ('075', 80.09),
 ('076', 75.58),
 ('078', 46.58),
 ('079', 76.7),
 ('080', 81.79),
 ('081', 87.53),
 ('082', 80.36),
 ('083', 91.9),
 ('084', 76.89),
 ('085', 79.3),
 ('088', 84.25),
 ('091', 63.78),
 ('093', 80.25),
 ('094', 87.95),
 ('095', 54.19),
 ('09

In [23]:
import torchaudio.transforms as T

class AmplitudeNormalization:
    def __call__(self, waveform):
        # Normalize the waveform to be within [-1, 1]
        peak = waveform.abs().max()
        if peak > 0:
            waveform = waveform / peak
        return waveform

# To use it:
# waveform, sample_rate = torchaudio.load('path/to/audio.wav')
# waveform = AmplitudeNormalization()(waveform)


class PadTrimAudio:
    def __init__(self, max_len):
        self.max_len = max_len

    def __call__(self, waveform):
        if waveform.size(1) > self.max_len:
            # Trim the waveform if longer than max_len
            waveform = waveform[:, :self.max_len]
        elif waveform.size(1) < self.max_len:
            # Pad with zeros if shorter than max_len
            padding_size = self.max_len - waveform.size(1)
            waveform = torch.nn.functional.pad(waveform, (0, padding_size), "constant", 0)

        if waveform.size(0) < 128:
            padding_size = 128 - waveform.size(0)
            waveform = torch.nn.functional.pad(waveform, (padding_size, 0), "constant", 0)
        return waveform

from sklearn.preprocessing import StandardScaler
import numpy as np

class FeatureNormalization:
    def __init__(self):
        self.scaler = StandardScaler()

    def fit(self, features):
        # Fit the scaler on the training set features
        self.scaler.fit(features)

    def transform(self, features):
        # Apply normalization to features
        return self.scaler.transform(features)

In [24]:
label_to_int = {
    'Normal': 0,
    'Restrictive vent. Defect': 1,
    'Obstructive vent. Defect': 1,
    'Obstr. Cannot R/O Restriction': 1
}


In [25]:
class SoundDataset(Dataset):
    def __init__(self, data_dir, labels_df, transform=None, max_len=10000):
        self.data_dir = data_dir
        self.labels_df = labels_df
        self.transform = transform
        self.max_len = max_len
        self.orderlist = ['RLP.wav', 'RUP.wav', 'RUA Hum.wav', 'LUA Hum.wav', 'RUA.wav', 'RMP.wav', 'LMP.wav', 'LUA.wav', 'LLP.wav', 'LUP.wav']



    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        patient_id = self.labels_df[idx][0]
        # audio_dir = os.path.join(self.data_dir, str(patient_id), 'breath Eko')
        audio_dir = os.path.join(self.data_dir, str(patient_id), 'Voice')
        audio_file = [os.path.join(audio_dir, f) for f in os.listdir(audio_dir) if f.endswith('.wav') and '_' not in f][0]

        y, sr = librosa.load(audio_file, sr=None)


        D = librosa.stft(y, n_fft=2048, hop_length=512)  # n_fft and hop_length can be adjusted based on your needs
        S = np.abs(D) ** 2  # Convert to power spectrum
        mel_spec = librosa.feature.melspectrogram(S=S, sr=sr, n_mels=128)


        waveform = PadTrimAudio(self.max_len)(torch.tensor(mel_spec))

        label = self.labels_df[idx][1]

        return waveform, label

transform = AmplitudeNormalization()
data_dir = './data/data/Patients'
sound_dataset = SoundDataset(data_dir, label_list, transform=transform)

all_len =  len(label_list)
train_len = round(all_len * 0.9)

train_dataset, test_dataset = random_split(sound_dataset, [train_len, all_len-train_len])

traindataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
testdataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

dataloader = DataLoader(sound_dataset, batch_size=20, shuffle=True)

In [26]:
for batch in traindataloader:
    waveforms, labels = batch
    print(waveforms.shape)
    print(labels)

torch.Size([4, 128, 10000])
tensor([62.5400, 67.5900, 71.7700, 73.0800], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([82.6200, 65.1600, 62.9000, 83.7200], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([44.0500, 75.9200, 71.2700, 86.2900], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([67.0700, 74.1600, 81.2300, 89.5800], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([38.7500, 83.7700, 96.5100, 83.8600], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([46.3500, 89.2200, 79.1600, 74.0500], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([84.1000, 81.5000, 79.1200, 80.3600], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([67.4700, 83.7100, 80.0500, 70.7700], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([73.8600, 79.5100, 70.6200, 88.5300], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([79.0500, 79.4400, 93.3300, 75.6800], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([84.7200, 80.9800, 76.720

In [27]:
for batch in testdataloader:
    waveforms, labels = batch
    print(waveforms.shape)
    print(labels)

torch.Size([4, 128, 10000])
tensor([58.6300, 63.7800, 76.9100, 85.0100], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([47.4700, 87.5900, 68.8900, 76.5800], dtype=torch.float64)
torch.Size([4, 128, 10000])
tensor([84.7000, 59.5900, 39.1000, 71.8600], dtype=torch.float64)
torch.Size([3, 128, 10000])
tensor([81.7800, 81.0600, 84.6600], dtype=torch.float64)


In [28]:
class AudioClassifier(nn.Module):
    def __init__(self):
        super(AudioClassifier, self).__init__()
        # Adjusted the number of input channels to 1280
        self.conv1 = nn.Conv1d(128, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(2, 2)
        self.conv2 = nn.Conv1d(32, 32, kernel_size=3, stride=1, padding=1)

        # Calculate the size of the output from the last conv layer to pass to the first linear layer
        initial_length = 10000  # Initial length of the signal
        final_conv_length = self._get_conv1d_output_size(initial_length, 3, 1, 1, 2)  # After first conv and pool
        final_conv_length = self._get_conv1d_output_size(final_conv_length, 3, 1, 1, 2)  # After second conv and pool

        self.fc1 = nn.Linear(32 * final_conv_length, 128)  # Linear layer for feature reduction
        self.fc2 = nn.Linear(128, 1)  # Output layer for 3 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten from the second dimension onward
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def _get_conv1d_output_size(self, L, kernel_size, stride, padding, pool):
        # Utility function to calculate the size of the output of a conv1d layer after pooling
        L = (L + 2 * padding - kernel_size) // stride + 1
        L = L // pool
        return L

class MLPClassifier(nn.Module):
    def __init__(self):
        super(MLPClassifier, self).__init__()
        # Flatten the input from 1280x1000 to 1280000
        self.fc1 = nn.Linear(1280 * 1000, 1024)  # First fully connected layer
        self.fc2 = nn.Linear(1024, 512)          # Second fully connected layer
        self.fc3 = nn.Linear(512, 3)             # Output layer for 3 classes

    def forward(self, x):
        # Flatten the input
        x = x.view(-1, 1280 * 1000)  # Ensure input tensor is reshaped to (batch_size, 1280*1000)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation function here as it will be used with CrossEntropyLoss
        return x

In [29]:
import torch

def test_model(model, test_loader, device='cpu'):
    """
    Tests the given model on the provided test data loader.

    Parameters:
        model (torch.nn.Module): The model to test.
        test_loader (torch.utils.data.DataLoader): DataLoader for the test set.
        device (str): Device to run the model on ('cpu' or 'cuda').

    Returns:
        float: Accuracy of the model on the test set.
    """
    # Set the model to evaluation mode
    model.eval()
    model.to(device)

    correct = 0
    total = 0
    predictions = []
    true_labels = []

    with torch.no_grad():
        all_mse = 0
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            print(labels)
            print(outputs.squeeze())
            criterion = nn.MSELoss()
            all_mse += criterion(outputs.squeeze(), labels)
            print('----------------------------------------------')
        print(all_mse)


    accuracy = 0


    return accuracy




In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AudioClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

def train(model, device, train_loader, optimizer, criterion, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        correct = 0
        total = 0

        for data, targets in train_loader:
            data = data.to(device).float()  # Convert data to float
            targets = targets.to(device).float()  # Convert targets to float

            optimizer.zero_grad()
            outputs = model(data)  # Add channel dimension
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()


        avg_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch+1}, Loss: {avg_loss:.4f}')
        accuracy = test_model(model, testdataloader, device)

train(model, device, traindataloader, optimizer, criterion)


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 10022.9601
tensor([58.6300, 63.7800, 76.9100, 85.0100], dtype=torch.float64)
tensor([ 56.9032, 218.8469,  33.0102,   7.7583])
----------------------------------------------
tensor([47.4700, 87.5900, 68.8900, 76.5800], dtype=torch.float64)
tensor([101.5140,  48.6252,  50.2133,  10.6333])
----------------------------------------------
tensor([84.7000, 59.5900, 39.1000, 71.8600], dtype=torch.float64)
tensor([64.1892, 36.2095, 29.6432, 53.2128])
----------------------------------------------
tensor([81.7800, 81.0600, 84.6600], dtype=torch.float64)
tensor([44.2152, 34.3144,  9.8557])
----------------------------------------------
tensor(13685.2457, dtype=torch.float64)
Epoch 2, Loss: 2435.0509
tensor([58.6300, 63.7800, 76.9100, 85.0100], dtype=torch.float64)
tensor([102.2732, 128.0534,  28.8913,  14.1675])
----------------------------------------------
tensor([47.4700, 87.5900, 68.8900, 76.5800], dtype=torch.float64)
tensor([96.5901, 51.7649, 61.4852, 17.5850])
--------------