In [1]:
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torchvision import datasets, transforms

In [2]:
data_path = 'Data/Images' #looking in subfolder train

scream_dataset = datasets.ImageFolder(
    root=data_path,
    transform=transforms.Compose([transforms.Resize((64,862)),
                                  transforms.ToTensor()])
)
scream_dataset

Dataset ImageFolder
    Number of datapoints: 2906
    Root location: Data/Images
    StandardTransform
Transform: Compose(
               Resize(size=(64, 862), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )

In [3]:
class_map=scream_dataset.class_to_idx

print("\nClass category and index of the images: {}\n".format(class_map))


Class category and index of the images: {'not_clean': 0, 'scream_clean': 1}



In [4]:
#split data to test and train
#use 80% to train
train_size = int(0.8 * len(scream_dataset))
test_size = len(scream_dataset) - train_size
scream_train_dataset, scream_test_dataset = torch.utils.data.random_split(scream_dataset, [train_size, test_size])

print("Training size:", len(scream_train_dataset))
print("Testing size:",len(scream_test_dataset))

Training size: 2324
Testing size: 582


In [5]:
from collections import Counter

# labels in training set
train_classes = [label for _, label in scream_train_dataset]
Counter(train_classes)

Counter({0: 1802, 1: 522})

In [6]:
train_dataloader = torch.utils.data.DataLoader(
    scream_train_dataset,
    batch_size=64,
    num_workers=2,
    shuffle=True
)

test_dataloader = torch.utils.data.DataLoader(
    scream_test_dataset,
    batch_size=64,
    num_workers=2,
    shuffle=True
)

In [7]:
td = train_dataloader.dataset[0][0]
td

tensor([[[0.1529, 0.1882, 0.2196,  ..., 0.4667, 0.4471, 0.6863],
         [0.1608, 0.4118, 0.3098,  ..., 0.6549, 0.5529, 0.6157],
         [0.4863, 0.4275, 0.2196,  ..., 0.7412, 0.7176, 0.6157],
         ...,
         [0.1255, 0.2784, 0.2706,  ..., 0.2784, 0.2784, 0.2667],
         [0.1255, 0.2784, 0.2784,  ..., 0.2824, 0.2784, 0.2627],
         [0.1255, 0.2745, 0.2745,  ..., 0.2784, 0.2784, 0.2627]],

        [[0.4863, 0.7059, 0.7255,  ..., 0.8157, 0.8118, 0.8627],
         [0.6863, 0.8000, 0.7647,  ..., 0.8588, 0.8392, 0.8510],
         [0.8235, 0.8078, 0.7255,  ..., 0.8706, 0.8667, 0.8510],
         ...,
         [0.5686, 0.0784, 0.0314,  ..., 0.0863, 0.0784, 0.2235],
         [0.5686, 0.0863, 0.0588,  ..., 0.0980, 0.0863, 0.2314],
         [0.5686, 0.0431, 0.0549,  ..., 0.0824, 0.0784, 0.2275]],

        [[0.5569, 0.4784, 0.4627,  ..., 0.3216, 0.3333, 0.1804],
         [0.4980, 0.3569, 0.4118,  ..., 0.2000, 0.2667, 0.2275],
         [0.3098, 0.3451, 0.4627,  ..., 0.1490, 0.1608, 0.

In [8]:
td.shape

torch.Size([3, 64, 862])

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [10]:
from torchvision.models import resnet34
import torch

model = resnet34()
model.fc = nn.Linear(512,2)
model.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
model = model.to(device)

In [11]:
# cost function used to determine best parameters
cost = torch.nn.CrossEntropyLoss()

# used to create optimal parameters
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Create the training function

def train(dataloader, model, loss, optimizer):
    model.train()
    size = len(dataloader.dataset)
    for batch, (X, Y) in enumerate(dataloader):

        X, Y = X.to(device), Y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = cost(pred, Y)
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'loss: {loss:>7f}  [{current:>5d}/{size:>5d}]')


# Create the validation/test function

def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            pred = model(X)

            test_loss += cost(pred, Y).item()
            correct += (pred.argmax(1)==Y).type(torch.float).sum().item()

    test_loss /= size
    correct /= size

    print(f'\nTest Error:\nacc: {(100*correct):>0.1f}%, avg loss: {test_loss:>8f}\n')

In [19]:
epochs = 50

for t in range(epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train(train_dataloader, model, cost, optimizer)
    test(test_dataloader, model)
print('Done!')

Epoch 1
-------------------------------
loss: 0.194932  [    0/ 2324]
loss: 0.187081  [  640/ 2324]
loss: 0.231479  [ 1280/ 2324]
loss: 0.174144  [ 1920/ 2324]

Test Error:
acc: 89.9%, avg loss: 0.004248

Epoch 2
-------------------------------
loss: 0.182678  [    0/ 2324]
loss: 0.385900  [  640/ 2324]
loss: 0.361852  [ 1280/ 2324]
loss: 0.173719  [ 1920/ 2324]

Test Error:
acc: 89.2%, avg loss: 0.004353

Epoch 3
-------------------------------
loss: 0.295594  [    0/ 2324]
loss: 0.304038  [  640/ 2324]
loss: 0.281105  [ 1280/ 2324]
loss: 0.261402  [ 1920/ 2324]

Test Error:
acc: 89.7%, avg loss: 0.004442

Epoch 4
-------------------------------
loss: 0.368367  [    0/ 2324]
loss: 0.258722  [  640/ 2324]
loss: 0.190463  [ 1280/ 2324]
loss: 0.394416  [ 1920/ 2324]

Test Error:
acc: 89.7%, avg loss: 0.004939

Epoch 5
-------------------------------
loss: 0.240979  [    0/ 2324]
loss: 0.416361  [  640/ 2324]
loss: 0.209305  [ 1280/ 2324]
loss: 0.369274  [ 1920/ 2324]

Test Error:
acc: 89

In [20]:
import torch
from datetime import datetime

# Get the current timestamp in the desired format
timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M-%S")

# Define the file name with the timestamp
file_name = f"model_{timestamp}.pt"

# Save the entire model (including architecture and weights)
torch.save(model, file_name)

# Print the saved file name
print(f"Model saved as {file_name}")

Model saved as model_2023-10-10--17-02-06.pt


In [21]:
# Load the model's state_dict
model = torch.load('model_2023-10-10--17-02-06.pt')

In [22]:
from torchsummary import summary

# Assuming 'model' is your PyTorch model
summary(model, input_size=(3, 64, 862))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 64, 32, 431]           9,408
       BatchNorm2d-2          [-1, 64, 32, 431]             128
              ReLU-3          [-1, 64, 32, 431]               0
         MaxPool2d-4          [-1, 64, 16, 216]               0
            Conv2d-5          [-1, 64, 16, 216]          36,864
       BatchNorm2d-6          [-1, 64, 16, 216]             128
              ReLU-7          [-1, 64, 16, 216]               0
            Conv2d-8          [-1, 64, 16, 216]          36,864
       BatchNorm2d-9          [-1, 64, 16, 216]             128
             ReLU-10          [-1, 64, 16, 216]               0
       BasicBlock-11          [-1, 64, 16, 216]               0
           Conv2d-12          [-1, 64, 16, 216]          36,864
      BatchNorm2d-13          [-1, 64, 16, 216]             128
             ReLU-14          [-1, 64, 

In [23]:
import os
import torchaudio
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# Define a function to transform audio data into images
def transform_data_to_image(audio, sample_rate, label, i):
    spectrogram_tensor = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate, n_mels=64, n_fft=1024)(audio)[0].log2()
    # Save the spectrogram as an image
    image_path = f'Data/TestImages/{label}/image{i}.png'
    plt.imsave(image_path, spectrogram_tensor.numpy(), cmap='viridis')
    return image_path

# Define the image transformation pipeline
transform = transforms.Compose([
    transforms.Resize((64, 862)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[:3, :, :])
])

In [29]:
import pandas as pd

# Define the folder containing WAV files
folder_path = 'Data/Screaming'  # Replace with the path to your folder
label = 'Screaming'  # Label for the images

# Create an empty list to store data
predictions_data = []

# Iterate through WAV files in the folder
for i, filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.wav'):
        # Load the audio
        audio, sample_rate = torchaudio.load(os.path.join(folder_path, filename))

        # Transform audio to an image and save it
        image_path = transform_data_to_image(audio, sample_rate, label, i)

        # Load the saved image and apply transformations
        image = Image.open(image_path)
        image = transform(image).unsqueeze(0)  # Add batch dimension

        # Make predictions using the model
        model.eval()
        with torch.no_grad():
            outputs = model(image.to(device))

        predict = outputs.argmax(dim=1).cpu().detach().numpy().ravel()[0]

        # Store the filename and prediction in the DataFrame
        predictions_data.append({'Filename': filename, 'Prediction': predict})

# Create a DataFrame from the list of data
predictions_df = pd.DataFrame(predictions_data)

# Display the DataFrame with predictions
predictions_df

  resdat -= vmin
  resdat /= (vmax - vmin)


Unnamed: 0,Filename,Prediction
0,---1_cCGK4M_out.wav,0
1,-20uudT97E0_out.wav,0
2,-2yygHLdpXc_out.wav,0
3,-3bGlOhRkAo_out.wav,1
4,-4pUrlMafww_out.wav,1
...,...,...
857,_QMEw67gWIA_out.wav,0
858,_TLzbbay6Hw_out.wav,0
859,_XPPISqmXSE_out.wav,0
860,_xRpsu02t9o_out.wav,1


In [32]:
predictions_df['Prediction'].value_counts()

Prediction
1    445
0    417
Name: count, dtype: int64

In [33]:
# Define the folder containing WAV files
folder_path = 'Data/NotScreaming'  # Replace with the path to your folder
label = 'NotScreaming'  # Label for the images
import pandas as pd

# Create an empty list to store data
predictions_data = []

# Iterate through WAV files in the folder
for i, filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.wav'):
        # Load the audio
        audio, sample_rate = torchaudio.load(os.path.join(folder_path, filename))

        # Transform audio to an image and save it
        image_path = transform_data_to_image(audio, sample_rate, label, i)

        # Load the saved image and apply transformations
        image = Image.open(image_path)
        image = transform(image).unsqueeze(0)  # Add batch dimension

        # Make predictions using the model
        model.eval()
        with torch.no_grad():
            outputs = model(image.to(device))

        predict = outputs.argmax(dim=1).cpu().detach().numpy().ravel()[0]

        # Store the filename and prediction in the DataFrame
        predictions_data.append({'Filename': filename, 'Prediction': predict})

# Create a DataFrame from the list of data
predictions_df = pd.DataFrame(predictions_data)

# Display the DataFrame with predictions
predictions_df

  resdat -= vmin
  resdat /= (vmax - vmin)


Unnamed: 0,Filename,Prediction
0,--PJHxphWEs_out.wav,0
1,-28U1_qW0sU_out.wav,0
2,-4xJv59_zcA_out.wav,0
3,-5GhUbDLYkQ_out.wav,1
4,-5Jlimvsuwo_out.wav,0
...,...,...
2094,_XusTa2prSw_out.wav,0
2095,_y07ENAx2_E_out.wav,0
2096,_yqlQimkHpQ_out.wav,0
2097,_Zsk5Fxqbkc_out.wav,0


In [34]:
predictions_df['Prediction'].value_counts()

Prediction
0    2012
1      87
Name: count, dtype: int64