In [1]:
import pytorch_lightning as pl
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import os
from pytorch_lightning.loggers import TensorBoardLogger
import torchmetrics
import pandas as pd

# import torch.multiprocessing as mp

# mp.set_start_method('spawn')

os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2"
os.environ["WORLD_SIZE"] = "1"
torch.set_float32_matmul_precision("highest")

file_word = pd.read_excel("doc/speaker_wordlist.xls", sheet_name="Word_filename")

ci = {}
for i, row in file_word.iterrows():
    code = row["FILE NAME"]
    ci[code] = i

class SpeechDataset(Dataset):
    def __init__(self, folder_path, train=True):
        """
        Constructor for the speech dataset.
        :param folder_path: Path to the folder containing the MFCC files.
        :param train: Boolean indicating if this is a training dataset.
        """
        mfccs_folder = "mfccs"
        mfccs_delta_folder = "mfccs_delta"
        mfccs_delta2_folder = "mfccs_delta2"
        self.data = []
        for file_name in os.listdir("mfccs"):
            if (train and ('B1' in file_name or 'B3' in file_name)) or (not train and 'B2' in file_name):
                if "UW" not in file_name:
                    mfcc = np.load(os.path.join(mfccs_folder, file_name))
                    mfcc_delta = np.load(os.path.join(mfccs_delta_folder, file_name))
                    mfcc_delta2 = np.load(os.path.join(mfccs_delta2_folder, file_name))
                    features = np.stack([mfcc, mfcc_delta, mfcc_delta2])
                    
                    code = file_name.replace(".wav.npy", "").split("_")[2]
                    label = ci[code]
                    self.data.append((features, label))            

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        mfcc, label = self.data[idx]
        return torch.tensor(mfcc, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

class SpeechRecognitionModel(pl.LightningModule):
    def __init__(self, num_classes, batch_size):
        super().__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(32),  # Batch Normalization
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(64),  # Batch Normalization
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Additional Dropout can be added here if needed
        )
        self.flatten = nn.Flatten()
        self.fc_layer = nn.Sequential(
            nn.Linear(64 * 75, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )
        self.accuracy = torchmetrics.Accuracy("multiclass", num_classes=num_classes)
        self.batch_size = batch_size

    def forward(self, x):
        # x = x.view(x.shape[0], 3, x.shape[1], x.shape[2])
        x = self.conv_layer(x)
        x = self.flatten(x)
        x = self.fc_layer(x)
        return x

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.accuracy(outputs, labels)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
        self.log('val_accuracy', self.accuracy, on_step=True, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)

    def test_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.accuracy(outputs, labels)
        self.log('test_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)
        self.log('test_accuracy', self.accuracy, on_step=True, on_epoch=True, prog_bar=True, logger=True, sync_dist=True)

    def configure_optimizers(self):
        # optimizer = torch.optim.Adam(self.parameters(), lr=1e-3, weight_decay=1e-5)  # Adding L2 regularization
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def train_dataloader(self):
        train_dataset = SpeechDataset('mfccs', train=True)
        return DataLoader(train_dataset, batch_size=self.batch_size, num_workers = 25, shuffle=True)

    def val_dataloader(self):
        val_dataset = SpeechDataset('mfccs', train=False)
        return DataLoader(val_dataset, batch_size=self.batch_size, num_workers = 25)

    def test_dataloader(self):
        test_dataset = SpeechDataset('mfccs', train=False)
        return DataLoader(test_dataset, batch_size=self.batch_size, num_workers = 25)

In [2]:
batch_size=1024

logger = TensorBoardLogger('tb_logs', name='speech_recognition_model')
trainer = pl.Trainer(max_epochs=50, devices=3, logger=logger)
model = SpeechRecognitionModel(num_classes=155, batch_size=batch_size)
trainer.fit(model)
test_result = trainer.test(model)

print(test_result)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/3
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/3
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/3
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 3 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES:

Sanity Checking: |                                                                                            …

/home/myid/zis35724/.conda/envs/speech/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (19) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=50` reached.
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/3
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/3
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/3
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 3 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0, 1, 2]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0, 1, 2]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0, 1, 

Testing: |                                                                                                    …

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_accuracy_epoch      0.6945281028747559
     test_loss_epoch        1.6860181093215942
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
[{'test_loss_epoch': 1.6860181093215942, 'test_accuracy_epoch': 0.6945281028747559}]


In [None]:
test_result = trainer.test(model)

print(test_result)

In [17]:
# train = False
# folder_path = "mfccs"
# labels = []
# for file_name in os.listdir(folder_path):
#     if (train and ('B1' in file_name or 'B3' in file_name)) or (not train and 'B2' in file_name):
#         if "UW" not in file_name:
#             # mfcc = np.load(os.path.join(folder_path, file_name))

#             split = file_name.replace(".wav.npy", "").split("_")
#             code = split[2]
#             label = ci[code]
#             # print(code, label)
#             labels.append(label)

# print(len(labels), len(set(labels)))

27886 155


In [44]:
# conv_layer1 = nn.Sequential(
#     nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
#     nn.ReLU(),
#     nn.MaxPool2d(kernel_size=2, stride=2)
# )
# conv_layer2 = nn.Sequential(
#     nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
#     nn.ReLU(),
#     nn.MaxPool2d(kernel_size=2, stride=2)
# )
# flatten = nn.Flatten()
# x = torch.ones((32, 1, 100, 13))
# flatten(conv_layer2(conv_layer1(x))).shape

torch.Size([32, 4800])

In [3]:
torch.zeros(2).cuda(0)

tensor([0., 0.], device='cuda:0')