In [None]:
# Google drive setup
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
!tar -xvf /content/drive/MyDrive/cola/BBB.tar.gz

In [None]:
!pip install pytorch_lightning
!pip install efficientnet_pytorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-1.7.7-py3-none-any.whl (708 kB)
[K     |████████████████████████████████| 708 kB 15.5 MB/s 
Collecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.10.1-py3-none-any.whl (529 kB)
[K     |████████████████████████████████| 529 kB 58.1 MB/s 
Collecting pyDeprecate>=0.3.1
  Downloading pyDeprecate-0.3.2-py3-none-any.whl (10 kB)
Installing collected packages: torchmetrics, pyDeprecate, pytorch-lightning
Successfully installed pyDeprecate-0.3.2 pytorch-lightning-1.7.7 torchmetrics-0.10.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for effic

In [None]:
import pytorch_lightning as pl
import torch
from efficientnet_pytorch import EfficientNet
from torch.nn import functional as F
import random
import librosa
import numpy as np
import os
from glob import glob
import numpy as np
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
# from audio_encoder.audio_processing import random_crop, random_mask, random_multiply,pre_process_audio_mel_t
# from audio_encoder.encoder import Cola

In [None]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
num_workers = 4 if cuda else 0

In [None]:
input_length = 16000 * 30
n_mels = 64
def pre_process_audio_mel_t(audio, sample_rate=16000):
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40

    return mel_db.T

def random_crop(data, crop_size=128):
    start = int(random.random() * (data.shape[0] - crop_size))
    # print(start)
    return data[start : (start + crop_size), :]

def random_mask(data, rate_start=0.1, rate_seq=0.2):
    new_data = data.copy()
    mean = new_data.mean()
    prev_zero = False
    for i in range(new_data.shape[0]):
        if random.random() < rate_start or (
            prev_zero and random.random() < rate_seq
        ):
            prev_zero = True
            new_data[i, :] = mean
        else:
            prev_zero = False

    return new_data

def random_multiply(data):
    new_data = data.copy()
    return new_data * (0.9 + random.random() / 5.)

In [None]:
class Encoder(torch.nn.Module):
    def __init__(self, drop_connect_rate=0.1):
        super(Encoder, self).__init__()

        self.cnn1 = torch.nn.Conv2d(1, 3, kernel_size=3)
        self.efficientnet = EfficientNet.from_name(
            "efficientnet-b0", include_top=False, drop_connect_rate=drop_connect_rate
        )

    def forward(self, x):
        x = x.unsqueeze(1)

        x = self.cnn1(x)
        x = self.efficientnet(x)

        y = x.squeeze(3).squeeze(2)

        return y


class Cola(pl.LightningModule):
    def __init__(self, p=0.1, n=2):
        super().__init__()
        self.save_hyperparameters()

        self.p = p

        self.do = torch.nn.Dropout(p=self.p)

        self.encoder = Encoder(drop_connect_rate=p)

        self.g = torch.nn.Linear(1280, 512)
        self.layer_norm = torch.nn.LayerNorm(normalized_shape=512)
        self.linear = torch.nn.Linear(512, 512, bias=False)
        self.n=n

    def forward(self, x):
        x1, x2, x3= x

        x1 = self.do(self.encoder(x1))
        x1 = self.do(self.g(x1))
        x1 = self.do(torch.tanh(self.layer_norm(x1)))

        x2 = self.do(self.encoder(x2))
        x2 = self.do(self.g(x2))
        x2 = self.do(torch.tanh(self.layer_norm(x2)))

        x3 = self.do(self.encoder(x3))
        x3 = self.do(self.g(x3))
        x3 = self.do(torch.tanh(self.layer_norm(x3)))

        x11 = self.linear(x1)
        x22 = self.linear(x2)
        x33 = self.linear(x3)

        return x1, x2, x3, x11, x22, x33


    def training_step(self, x, batch_idx):
        x1, x2, x3, x11, x22, x33 = self(x)

        y = torch.arange(x1.size(0), device=x1.device)

        y_hat1 = torch.mm(x11, x2.t())
        y_hat2 = torch.mm(x22, x3.t())
        y_hat3 = torch.mm(x33, x1.t())

        loss1 = F.cross_entropy(y_hat1, y)
        loss2 = F.cross_entropy(y_hat2, y)
        loss3 = F.cross_entropy(y_hat3, y)

        loss = (loss1+loss2+loss3)/3

        _, predicted1 = torch.max(y_hat1, 1)
        _, predicted2 = torch.max(y_hat2, 1)
        _, predicted3 = torch.max(y_hat3, 1)

        acc1 = (predicted1 == y).double().mean()
        acc2 = (predicted2 == y).double().mean()
        acc3 = (predicted3 == y).double().mean()

        acc = (acc1+acc2+acc3)/3

        self.log("train_loss", loss)
        self.log("train_acc", acc)

        return loss

    def validation_step(self, x, batch_idx):
        x1, x2, x3, x11, x22, x33 = self(x)

        y = torch.arange(x1.size(0), device=x1.device)

        y_hat1 = torch.mm(x11, x2.t())
        y_hat2 = torch.mm(x22, x3.t())
        y_hat3 = torch.mm(x33, x1.t())

        loss1 = F.cross_entropy(y_hat1, y)
        loss2 = F.cross_entropy(y_hat2, y)
        loss3 = F.cross_entropy(y_hat3, y)

        loss = (loss1+loss2+loss3)/3

        _, predicted1 = torch.max(y_hat1, 1)
        _, predicted2 = torch.max(y_hat2, 1)
        _, predicted3 = torch.max(y_hat3, 1)

        acc1 = (predicted1 == y).double().mean()
        acc2 = (predicted2 == y).double().mean()
        acc3 = (predicted3 == y).double().mean()

        acc = (acc1+acc2+acc3)/3

        self.log("valid_loss", loss)
        self.log("valid_acc", acc)

    def test_step(self, x, batch_idx):
        x1, x2, x3, x11, x22, x33 = self(x)

        y = torch.arange(x1.size(0), device=x1.device)

        y_hat1 = torch.mm(x11, x2.t())
        y_hat2 = torch.mm(x22, x3.t())
        y_hat3 = torch.mm(x33, x1.t())

        loss1 = F.cross_entropy(y_hat1, y)
        loss2 = F.cross_entropy(y_hat2, y)
        loss3 = F.cross_entropy(y_hat3, y)

        loss = (loss1+loss2+loss3)/3

        _, predicted1 = torch.max(y_hat1, 1)
        _, predicted2 = torch.max(y_hat2, 1)
        _, predicted3 = torch.max(y_hat3, 1)

        acc1 = (predicted1 == y).double().mean()
        acc2 = (predicted2 == y).double().mean()
        acc3 = (predicted3 == y).double().mean()

        acc = (acc1+acc2+acc3)/3

        self.log("test_loss", loss)
        self.log("test_acc", acc)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)

In [None]:
class AudioDataset(torch.utils.data.Dataset):
    def __init__(self, data, max_len=100, augment=True):
        self.data = data
        self.max_len = max_len
        self.augment = augment

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        npy_path = self.data[idx]

        # x = np.load(npy_path, encoding='latin1')
        x = np.load(npy_path, allow_pickle=True)
        # print(x.shape)
        #x = pre_process_audio_mel_t(x)
        x=np.transpose(x)
        # print(x.shape)
        if self.augment:
            x = random_mask(x)

        if x.shape[0]<100:
          x = np.zeros((100,128))

        x1 = random_crop(x, crop_size=self.max_len)
        x2 = random_crop(x, crop_size=self.max_len)
        x3 = random_crop(x, crop_size=self.max_len)
        # print(x1.shape)
        # print(x2.shape)

        if self.augment:
            x1 = random_multiply(x1)
            x2 = random_multiply(x2)
            x3 = random_multiply(x3)

        x1 = torch.tensor(x1, dtype=torch.float)
        x2 = torch.tensor(x2, dtype=torch.float)
        x3 = torch.tensor(x3, dtype=torch.float)

        return x1, x2, x3


class DecayLearningRate(pl.Callback):
    def __init__(self):
        self.old_lrs = []

    def on_train_start(self, trainer, pl_module):
        # track the initial learning rates
        for opt_idx, optimizer in enumerate(trainer.optimizers):
            group = []
            for param_group in optimizer.param_groups:
                group.append(param_group["lr"])
            self.old_lrs.append(group)

    def on_train_epoch_end(self, trainer, pl_module, outputs):
        for opt_idx, optimizer in enumerate(trainer.optimizers):
            old_lr_group = self.old_lrs[opt_idx]
            new_lr_group = []
            for p_idx, param_group in enumerate(optimizer.param_groups):
                old_lr = old_lr_group[p_idx]
                new_lr = old_lr * 0.99
                new_lr_group.append(new_lr)
                param_group["lr"] = new_lr
            self.old_lrs[opt_idx] = new_lr_group

In [None]:
import numpy as np

In [None]:
t=np.load('/content/111/11/YsAJAhrh16dA_250.000_260.000.m4a.orig.spec.npy', allow_pickle=True)

In [None]:
t.shape

(128, 863)

In [None]:
from pathlib import Path
batch_size = 128
epochs = 256

mp3_path = Path('/content/111/11')

files = sorted(list(glob(str(mp3_path / "*.npy"))))

_train, test = train_test_split(files, test_size=0.05, random_state=1337)

train, val = train_test_split(_train, test_size=0.05, random_state=1337)

train_data = AudioDataset(train, augment=True)
test_data = AudioDataset(test, augment=False)
val_data = AudioDataset(val, augment=False)

train_loader = DataLoader(
    train_data, batch_size=batch_size, num_workers=0, shuffle=True
)
val_loader = DataLoader(
    val_data, batch_size=batch_size, num_workers=0, shuffle=True
)
test_loader = DataLoader(
    test_data, batch_size=batch_size, shuffle=False, num_workers=0
)
    

model = Cola()
print(model)
logger = TensorBoardLogger(
    save_dir=".",
    name="lightning_logs",
)

# checkpoint_callback = ModelCheckpoint(
#      monitor='valid_acc',
#      mode="max",
#      dirpath="models/"
# )

# checkpoint_callback = ModelCheckpoint(
#     monitor="valid_acc", mode="max", filepath="models/", prefix="encoder"
# )

trainer = pl.Trainer(
    max_epochs=epochs,
    gpus=1,
    logger=logger
    # checkpoint_callback=checkpoint_callback,
    # callbacks=[DecayLearningRate()]
)
trainer.fit(model, train_loader, val_loader)

trainer.test(test_dataloaders=test_loader)

Cola(
  (do): Dropout(p=0.1, inplace=False)
  (encoder): Encoder(
    (cnn1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
    (efficientnet): EfficientNet(
      (_conv_stem): Conv2dStaticSamePadding(
        3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
        (static_padding): ZeroPad2d((0, 1, 0, 1))
      )
      (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_blocks): ModuleList(
        (0): MBConvBlock(
          (_depthwise_conv): Conv2dStaticSamePadding(
            32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
            (static_padding): ZeroPad2d((1, 1, 1, 1))
          )
          (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
          (_se_reduce): Conv2dStaticSamePadding(
            32, 8, kernel_size=(1, 1), stride=(1, 1)
            (static_padding): Identity()
          )
          (_se_expand): Conv2dStaticSamePad

  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type      | Params
-----------------------------------------
0 | do         | Dropout   | 0     
1 | encoder    | Encoder   | 4.0 M 
2 | g          | Linear    | 655 K 
3 | layer_norm | LayerNorm | 1.0 K 
4 | linear     | Linear    | 262 K 
-----------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.706    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


TypeError: ignored

In [None]:
class AudioClassifier(pl.LightningModule):
    def __init__(self, classes=8, p=0.1):
        super().__init__()
        self.save_hyperparameters()

        self.p = p

        self.do = torch.nn.Dropout(p=self.p)

        self.encoder = Encoder(drop_connect_rate=self.p)

        self.g = torch.nn.Linear(1280, 512)
        self.layer_norm = torch.nn.LayerNorm(normalized_shape=512)

        self.fc1 = torch.nn.Linear(512, 256)
        self.fy = torch.nn.Linear(256, classes)

    def forward(self, x):
        x = self.do(self.encoder(x))

        x = self.do(self.g(x))
        x = self.do(torch.tanh(self.layer_norm(x)))

        x = F.relu(self.do(self.fc1(x)))
        y_hat = self.fy(x)

        return y_hat

    def training_step(self, batch, batch_idx):
        x, y = batch

        y_hat = self(x)

        loss = F.cross_entropy(y_hat, y)

        _, predicted = torch.max(y_hat, 1)
        acc = (predicted == y).double().mean()

        self.log("train_loss", loss)
        self.log("train_acc", acc)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch

        y_hat = self(x)

        loss = F.cross_entropy(y_hat, y)

        _, predicted = torch.max(y_hat, 1)
        acc = (predicted == y).double().mean()

        self.log("valid_loss", loss)
        self.log("valid_acc", acc)

    def test_step(self, batch, batch_idx):
        x, y = batch

        y_hat = self(x)

        loss = F.cross_entropy(y_hat, y)

        _, predicted = torch.max(y_hat, 1)
        acc = (predicted == y).double().mean()

        self.log("test_loss", loss)
        self.log("test_acc", acc)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)

In [None]:
model = AudioClassifier()

In [None]:
#Template to load model
temp = torch.load('/content/epoch=80-step=12311.ckpt') 
model.load_state_dict(temp['state_dict'], strict=False)
# optimizer.load_state_dict(temp['optimizer_state_dict'])
# scheduler.load_state_dict(temp['scheduler_state_dict'])

_IncompatibleKeys(missing_keys=['fc1.weight', 'fc1.bias', 'fy.weight', 'fy.bias'], unexpected_keys=['linear.weight'])

In [None]:
print(model)

In [None]:
temp=torch.load('/content/models/epoch=80-step=12311.ckpt')
print(temp)
# encoder_cnn1_weight = temp['encoder.cnn1.weight']

In [None]:
trainer = pl.Trainer(
    max_epochs=epochs,
    gpus=1,
    logger=logger,
    checkpoint_callback=checkpoint_callback,
    callbacks=[DecayLearningRate()]
)
trainer.fit(model, train_loader, val_loader)
trainer.test(test_dataloaders=test_loader)