In [None]:
!git clone https://github.com/viksit-siddhant/speechClass

In [None]:
import os, threading
import numpy as np
import random
import torchaudio
import torch
import matplotlib.pyplot as plt
from speechClass.models.deepSpectrum import Model
from speechClass.utils import Dataset
%matplotlib inline

path = "/content/drive/MyDrive/data.npz"

In [None]:
audio = torchaudio.load('data/LeNormand/TD/kevin.wav')[0]
spec = torchaudio.transforms.Spectrogram(512)(audio)
print(spec.shape)

In [None]:
with open(path, 'rb') as f:
    data = np.load(f)
    czech_x = data['czech_x']
    czech_y = data['czech_y']
    english_x = data['english_x']
    english_y = data['english_y']
    lenormand_x = data['lenormand_x']
    lenormand_y = data['lenormand_y']


In [None]:
m = Model()

In [None]:
#Convert to 3 channels
lenormand_x = np.concatenate((lenormand_x, lenormand_x, lenormand_x), axis=1)
czech_x = np.concatenate((czech_x, czech_x, czech_x), axis=1)
english_x = np.concatenate((english_x, english_x, english_x), axis=1)



lenormand_neg_x = lenormand_x[lenormand_y.reshape((-1)) == 0]
lenormand_neg_y = lenormand_y[lenormand_y.reshape((-1)) == 0]
czech_neg_x = czech_x[czech_y.reshape((-1)) == 0]
czech_neg_y = czech_y[czech_y.reshape((-1)) == 0]
czech_pos_x = czech_x[czech_y.reshape((-1)) == 1]
czech_pos_y = czech_y[czech_y.reshape((-1)) == 1]

def inflate(x,y,target_len):
    num_samples = max(len(x), target_len)
    samples = np.random.randint(0, len(x), num_samples-len(x))
    x = np.concatenate((x, x[samples]))
    y = np.concatenate((y, y[samples]))
    return x,y

num_samples = max(len(lenormand_neg_x), len(czech_neg_x), len(czech_pos_x))
lenormand_neg_x, lenormand_neg_y = inflate(lenormand_neg_x, lenormand_neg_y, num_samples)
czech_neg_x, czech_neg_y = inflate(czech_neg_x, czech_neg_y, num_samples)
czech_pos_x, czech_pos_y = inflate(czech_pos_x, czech_pos_y, 2*num_samples)

train_data = Dataset(np.concatenate((lenormand_neg_x, czech_neg_x, czech_pos_x)), np.concatenate((lenormand_neg_y, czech_neg_y, czech_pos_y)))
dataloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)



In [None]:
feat = m.get_data(dataloader)
print(feat.shape)

In [None]:
m.fit(feat, train_data.y, epochs=100)

In [None]:
from torch.nn.modules import activation
from torchvision import transforms

dataset = torch.utils.data.ConcatDataset([czech,english,lenormand])
train_data, test_data = torch.utils.data.random_split(dataset, [int(len(dataset)*0.8), len(dataset)-int(len(dataset)*0.8)])
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True)

convmod = ConvModel([1,n_mfcc,maxlen])

opt = torch.optim.Adam(convmod.parameters(), lr=0.001)

transformer = transforms.Compose([
    transforms.RandomAffine(degrees=0,translate=(0.66,0.66)),
    transforms.GaussianBlur(3, sigma=(0.1, 0.6)),
    ])

loss = torch.nn.NLLLoss(weight=torch.tensor([czech.num_pos,len(czech)-czech.num_pos+len(english)+len(lenormand)]).to("cuda",dtype=torch.float))

def train(model, train_loader, test_loader, loss, opt, epochs,transformer = None):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Loading model to ", device)
    model.to(device)
    for epoch in range(epochs):
        print(f"Epoch {epoch+1} of {epochs}")
        model.train()
        train_loss = 0
        train_steps = len(train_loader)
        counter = 0
        for x, y in train_loader:
            x = x.to(device,dtype=torch.float)
            y = y.to(device,dtype=torch.long)
            y = torch.flatten(y)
            x = transformer(x)
            opt.zero_grad()
            pred = model(x)
            l = loss(pred, y)
            l.backward()
            opt.step()
            train_loss += l.item()
            print(f"Step {counter+1} of {train_steps}", end='\r')
            counter+=1
        print("")
        print("Train Loss: ",train_loss)
        model.eval()
        if test_loader is None:
            continue
        test_loss = 0
        counter = 0
        num_correct_predictions = 0
        test_steps = len(test_loader)
        with torch.no_grad():
            for x, y in test_loader:
                x = x.to(device,dtype=torch.float)
                y = y.to(device,dtype=torch.long)
                y = torch.flatten(y)
                pred = model(x)
                l = loss(pred, y)
                y = torch.flatten(y)
                test_loss += l.item()

                pred = torch.argmax(pred, dim=1)
                num_correct_predictions += torch.sum(pred == y).item()
                print(f"Step {counter+1} of {test_steps}", end='\r')
                counter+=1
        print("")
        print(f"Test Loss: {test_loss}, Accuracy: {num_correct_predictions/len(test_data)}")

In [None]:
#print(torch.cuda.is_available())
convmod.unfreeze()
train(convmod, train_loader, test_loader, loss, opt, 10, transformer)

In [None]:
import matplotlib.pyplot as plt

#Plot 10 random images from model.train_x

figs, axs = plt.subplots(2,5, figsize=(15,6))
for i in range(2):
    for j in range(5):
        axs[i,j].imshow(train_data[random.randint(0,len(train_data))][0][0], cmap='viridis')

plt.show()

In [None]:
lenormand = LeNormandData(16000, 32,32)
#print(model.train_x[0])

#Plot 10 random images from model.train_x

figs, axs = plt.subplots(2,5, figsize=(15,6))
for i in range(2):
    for j in range(5):
        axs[i,j].imshow(lenormand[np.random.randint(0,len(lenormand))][0].squeeze())

plt.show()

In [None]:
train_fraction = 0.1
train_ln,test_ln = torch.utils.data.random_split(lenormand,[int(train_fraction*len(lenormand)),len(lenormand)-int(train_fraction*len(lenormand))])
train_ln = torch.utils.data.DataLoader(train_ln, batch_size=32, shuffle=True)
test_ln = torch.utils.data.DataLoader(test_ln, batch_size=32, shuffle=True)
convmod.freeze(1)

train(convmod, train_ln, test_ln, loss, opt, 20, transformer)

In [None]:
ln_loader = torch.utils.data.DataLoader(lenormand, batch_size=32, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_correct_0 = 0
num_correct_1 = 0
num_0 = 0
num_1 = 0
for x,y in ln_loader:
    x = x.to(device,dtype=torch.float)
    y = y.numpy().flatten()
    with torch.no_grad():
        pred = np.argmax(convmod(x).cpu().numpy(),axis=1)
        num_correct_0 += np.sum((pred < 0.5) & (y == 0))
        num_correct_1 += np.sum((pred >= 0.5) & (y == 1))
        print(pred)
        print(y)
        num_0 += np.sum(y == 0)
        num_1 += np.sum(y == 1)

print(f"Accuracy on zero: {num_correct_0/num_0}")
print(f"Accuracy on one: {num_correct_1/num_1}")
