In [1]:
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torchvision import datasets, transforms

In [2]:
data_path = 'Data/Images' #looking in subfolder train

scream_dataset = datasets.ImageFolder(
    root=data_path,
    transform=transforms.Compose([transforms.Resize((64,862)),
                                  transforms.ToTensor()])
)
scream_dataset

Dataset ImageFolder
    Number of datapoints: 3295
    Root location: Data/Images
    StandardTransform
Transform: Compose(
               Resize(size=(64, 862), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )

In [3]:
class_map=scream_dataset.class_to_idx

print("\nClass category and index of the images: {}\n".format(class_map))


Class category and index of the images: {'not': 0, 'scream': 1}



In [4]:
#split data to test and train
#use 80% to train
train_size = int(0.8 * len(scream_dataset))
test_size = len(scream_dataset) - train_size
scream_train_dataset, scream_test_dataset = torch.utils.data.random_split(scream_dataset, [train_size, test_size])

print("Training size:", len(scream_train_dataset))
print("Testing size:",len(scream_test_dataset))

Training size: 2636
Testing size: 659


In [5]:
from collections import Counter

# labels in training set
train_classes = [label for _, label in scream_train_dataset]
Counter(train_classes)

Counter({0: 1746, 1: 890})

In [6]:
train_dataloader = torch.utils.data.DataLoader(
    scream_train_dataset,
    batch_size=64,
    num_workers=2,
    shuffle=True
)

test_dataloader = torch.utils.data.DataLoader(
    scream_test_dataset,
    batch_size=64,
    num_workers=2,
    shuffle=True
)

In [7]:
td = train_dataloader.dataset[0][0]
td

tensor([[[0.7098, 0.6549, 0.6549,  ..., 0.4667, 0.5647, 0.7294],
         [0.5059, 0.5843, 0.7725,  ..., 0.4549, 0.6863, 0.7725],
         [0.2392, 0.6235, 0.7412,  ..., 0.5843, 0.8235, 0.6471],
         ...,
         [0.2157, 0.2588, 0.2784,  ..., 0.2824, 0.2745, 0.2745],
         [0.2196, 0.2667, 0.2784,  ..., 0.2784, 0.2784, 0.2784],
         [0.2235, 0.2784, 0.2706,  ..., 0.2745, 0.2706, 0.2745]],

        [[0.8667, 0.8588, 0.8588,  ..., 0.8157, 0.8392, 0.8706],
         [0.8275, 0.8431, 0.8745,  ..., 0.8157, 0.8627, 0.8745],
         [0.7333, 0.8510, 0.8706,  ..., 0.8431, 0.8824, 0.8549],
         ...,
         [0.3490, 0.2392, 0.1490,  ..., 0.1373, 0.0549, 0.1843],
         [0.3373, 0.2157, 0.1725,  ..., 0.1451, 0.0824, 0.1725],
         [0.3333, 0.0667, 0.0314,  ..., 0.0431, 0.0314, 0.1882]],

        [[0.1686, 0.2000, 0.2000,  ..., 0.3216, 0.2627, 0.1529],
         [0.2980, 0.2471, 0.1294,  ..., 0.3294, 0.1804, 0.1294],
         [0.4549, 0.2196, 0.1490,  ..., 0.2471, 0.1059, 0.

In [8]:
td.shape

torch.Size([3, 64, 862])

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [10]:
from torchvision.models import resnet34
import torch

model = resnet34()
model.fc = nn.Linear(512,2)
model.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
model = model.to(device)

In [11]:
# cost function used to determine best parameters
cost = torch.nn.CrossEntropyLoss()

# used to create optimal parameters
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.00001)

# Create the training function

def train(dataloader, model, loss, optimizer):
    model.train()
    size = len(dataloader.dataset)
    for batch, (X, Y) in enumerate(dataloader):

        X, Y = X.to(device), Y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = cost(pred, Y)
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'loss: {loss:>7f}  [{current:>5d}/{size:>5d}]')


# Create the validation/test function

def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            pred = model(X)

            test_loss += cost(pred, Y).item()
            correct += (pred.argmax(1)==Y).type(torch.float).sum().item()

    test_loss /= size
    correct /= size

    print(f'\nTest Error:\nacc: {(100*correct):>0.1f}%, avg loss: {test_loss:>8f}\n')

In [15]:
epochs = 100

for t in range(epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train(train_dataloader, model, cost, optimizer)
    test(test_dataloader, model)
print('Done!')

Epoch 1
-------------------------------
loss: 0.040950  [    0/ 2636]
loss: 0.033444  [  640/ 2636]
loss: 0.034684  [ 1280/ 2636]
loss: 0.080261  [ 1920/ 2636]
loss: 0.084260  [ 2560/ 2636]

Test Error:
acc: 91.5%, avg loss: 0.005836

Epoch 2
-------------------------------
loss: 0.025030  [    0/ 2636]
loss: 0.011634  [  640/ 2636]
loss: 0.012492  [ 1280/ 2636]
loss: 0.116268  [ 1920/ 2636]
loss: 0.035474  [ 2560/ 2636]

Test Error:
acc: 91.7%, avg loss: 0.005059

Epoch 3
-------------------------------
loss: 0.105061  [    0/ 2636]
loss: 0.054576  [  640/ 2636]
loss: 0.078936  [ 1280/ 2636]
loss: 0.027302  [ 1920/ 2636]
loss: 0.019530  [ 2560/ 2636]

Test Error:
acc: 91.7%, avg loss: 0.005394

Epoch 4
-------------------------------
loss: 0.064643  [    0/ 2636]
loss: 0.027754  [  640/ 2636]
loss: 0.016308  [ 1280/ 2636]
loss: 0.026558  [ 1920/ 2636]
loss: 0.016095  [ 2560/ 2636]

Test Error:
acc: 91.8%, avg loss: 0.005599

Epoch 5
-------------------------------
loss: 0.019866  [   

In [17]:
import torch
from datetime import datetime

# Get the current timestamp in the desired format
timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M-%S")

# Define the file name with the timestamp
file_name = f"Resnet34_Model_{timestamp}.pt"

# Save the entire model (including architecture and weights)
torch.save(model, file_name)

# Print the saved file name
print(f"Model saved as {file_name}")

Model saved as Resnet34_Model_2023-10-13--14-05-19.pt


In [14]:
# Load the model's state_dict
model = torch.load('Resnet34_Model_2023-10-13--12-12-18.pt')

In [None]:
from torchsummary import summary

# Assuming 'model' is your PyTorch model
summary(model, input_size=(3, 64, 862))

In [None]:
import os
import torchaudio
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# Define a function to transform audio data into images
def transform_data_to_image(audio, sample_rate, label, i):
    spectrogram_tensor = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate, n_mels=64, n_fft=1024)(audio)[0].log2()
    # Save the spectrogram as an image
    image_path = f'Data/TestImages/{label}/image{i}.png'
    plt.imsave(image_path, spectrogram_tensor.numpy(), cmap='viridis')
    return image_path

# Define the image transformation pipeline
transform = transforms.Compose([
    transforms.Resize((64, 862)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[:3, :, :])
])

In [None]:
import pandas as pd

# Define the folder containing WAV files
folder_path = 'Data/Screaming'  # Replace with the path to your folder
label = 'Screaming'  # Label for the images

# Create an empty list to store data
predictions_data = []

# Iterate through WAV files in the folder
for i, filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.wav'):
        # Load the audio
        audio, sample_rate = torchaudio.load(os.path.join(folder_path, filename))

        # Transform audio to an image and save it
        image_path = transform_data_to_image(audio, sample_rate, label, i)

        # Load the saved image and apply transformations
        image = Image.open(image_path)
        image = transform(image).unsqueeze(0)  # Add batch dimension

        # Make predictions using the model
        model.eval()
        with torch.no_grad():
            outputs = model(image.to(device))

        predict = outputs.argmax(dim=1).cpu().detach().numpy().ravel()[0]

        # Store the filename and prediction in the DataFrame
        predictions_data.append({'Filename': filename, 'Prediction': predict})

# Create a DataFrame from the list of data
predictions_df = pd.DataFrame(predictions_data)

# Display the DataFrame with predictions
predictions_df

In [None]:
predictions_df['Prediction'].value_counts()

In [None]:
# Define the folder containing WAV files
folder_path = 'Data/NotScreaming'  # Replace with the path to your folder
label = 'NotScreaming'  # Label for the images
import pandas as pd

# Create an empty list to store data
predictions_data = []

# Iterate through WAV files in the folder
for i, filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.wav'):
        # Load the audio
        audio, sample_rate = torchaudio.load(os.path.join(folder_path, filename))

        # Transform audio to an image and save it
        image_path = transform_data_to_image(audio, sample_rate, label, i)

        # Load the saved image and apply transformations
        image = Image.open(image_path)
        image = transform(image).unsqueeze(0)  # Add batch dimension

        # Make predictions using the model
        model.eval()
        with torch.no_grad():
            outputs = model(image.to(device))

        predict = outputs.argmax(dim=1).cpu().detach().numpy().ravel()[0]

        # Store the filename and prediction in the DataFrame
        predictions_data.append({'Filename': filename, 'Prediction': predict})

# Create a DataFrame from the list of data
predictions_df = pd.DataFrame(predictions_data)

# Display the DataFrame with predictions
predictions_df

In [None]:
predictions_df['Prediction'].value_counts()