In [2]:
!pip install torch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [66]:
import os
import pandas as pd
import torch

from torch.utils.data import Dataset
from skimage import io

In [67]:
class DroneDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
        
        if self.transform:
            image = self.transform(image)
            
        return (image, y_label)

In [68]:
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models

from torch.utils.data import DataLoader

In [70]:
in_channel = 3
num_classes = 5
learning_rate = 1e-3
batch_size = 25
num_epochs = 20

In [71]:
dataset = DroneDataset('scaleogram.csv', 'drone_dataset_resize', transform=transforms.ToTensor())

In [72]:
train_set, test_set = torch.utils.data.random_split(dataset, [6000, 2179])

In [73]:
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

In [74]:
resnet18 = models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)



In [75]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet18.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [76]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet18.parameters(), lr=learning_rate)

In [77]:
import time
from tqdm import tqdm

start = time.time()

for epoch in tqdm(range(1, num_epochs+1)):
    losses = []
    resnet18.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device=device), targets.to(device=device)
        
        scores = resnet18(data)
        loss = criterion(scores, targets)
        
        losses.append(loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
    print(f'Cost at epoch {epoch} is {sum(losses) / len(losses)}')
        
finish = time.time()
print('Finished Training in ', finish-start)

torch.save(resnet18.state_dict(), './resnet18_model.pth')

  5%|▌         | 1/20 [01:00<19:02, 60.15s/it]

Cost at epoch 1 is 0.38207545784922936


 10%|█         | 2/20 [01:57<17:35, 58.66s/it]

Cost at epoch 2 is 0.2391573897950972


 15%|█▌        | 3/20 [02:56<16:37, 58.68s/it]

Cost at epoch 3 is 0.18368383756993958


 20%|██        | 4/20 [03:57<15:56, 59.76s/it]

Cost at epoch 4 is 0.14971279375022278


 25%|██▌       | 5/20 [05:01<15:16, 61.11s/it]

Cost at epoch 5 is 0.13314407752283539


 30%|███       | 6/20 [06:00<14:07, 60.52s/it]

Cost at epoch 6 is 0.14740029296372087


 35%|███▌      | 7/20 [07:02<13:11, 60.86s/it]

Cost at epoch 7 is 0.11947934155274803


 40%|████      | 8/20 [08:02<12:06, 60.58s/it]

Cost at epoch 8 is 0.10317004184180405


 45%|████▌     | 9/20 [09:03<11:10, 60.91s/it]

Cost at epoch 9 is 0.10278553498598436


 50%|█████     | 10/20 [10:08<10:19, 61.95s/it]

Cost at epoch 10 is 0.09419090061467918


 55%|█████▌    | 11/20 [11:07<09:10, 61.15s/it]

Cost at epoch 11 is 0.074071326701475


 60%|██████    | 12/20 [12:05<08:01, 60.22s/it]

Cost at epoch 12 is 0.08337624045625489


 65%|██████▌   | 13/20 [13:09<07:08, 61.17s/it]

Cost at epoch 13 is 0.06751442998890221


 70%|███████   | 14/20 [14:11<06:09, 61.62s/it]

Cost at epoch 14 is 0.04960429434170995


 75%|███████▌  | 15/20 [15:12<05:06, 61.39s/it]

Cost at epoch 15 is 0.06147763631342969


 80%|████████  | 16/20 [16:12<04:04, 61.05s/it]

Cost at epoch 16 is 0.0495751044518632


 85%|████████▌ | 17/20 [17:17<03:06, 62.27s/it]

Cost at epoch 17 is 0.04439307091840116


 90%|█████████ | 18/20 [18:17<02:03, 61.57s/it]

Cost at epoch 18 is 0.039777379801186424


 95%|█████████▌| 19/20 [19:18<01:01, 61.29s/it]

Cost at epoch 19 is 0.02877781282595606


100%|██████████| 20/20 [20:23<00:00, 61.16s/it]

Cost at epoch 20 is 0.021796014324309
Finished Training in  1223.2814421653748





In [78]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    resnet18.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device=device), y.to(device=device)
            
            scores = resnet18(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
    accuracy = num_correct / num_samples
    print(f'Accuracy: {accuracy * 100:.2f}%')

In [79]:
check_accuracy(train_loader, resnet18)

Accuracy: 98.20%


In [80]:
check_accuracy(test_loader, resnet18)

Accuracy: 93.30%


In [81]:
def test_single_image(loader, model, image_index):
    model.eval()
    with torch.no_grad():
        for i, (images, labels) in enumerate(test_loader):
            if i == image_index:
                image = images[0].to(device)
                label = labels[0].item()

                scores = model(image.unsqueeze(0)) 

                _, prediction = scores.max(1)
                prediction = prediction.item()

                print(f'Ground Truth Label: {label}')
                print(f'Model Prediction: {prediction}')
                print(f'Correct Prediction: {prediction == label}')
                break

In [None]:
for images, labels in test_loader:
    print(f'Batch Size: {images.size(0)}')
    print('Labels:', labels)

    for i in range(images.size(0)):
        image = images[i].numpy().transpose((1, 2, 0))
        label = labels[i].item()

        print(f'Image {i+1} - Label: {label}')

In [82]:
def enum(index):
    if index == 0:
        return 'big drone'
    if index == 1:
        return 'bird'
    if index == 2:
        return 'human'
    if index == 3:
        return 'free space'
    if index == 4:
        return 'small copter'

In [83]:
from PIL import Image
from torchvision.transforms import ToTensor
import torch.nn.functional as F


def test_single_image(model, image_path):
    image = Image.open(image_path).convert('RGB')
    resized_img = image.resize((590, 390), Image.LANCZOS)

    transform = ToTensor()
    image_tensor = transform(resized_img).unsqueeze(0)

    model.eval()
    with torch.no_grad():
        outputs = model(image_tensor.to(device))

    _, predicted = torch.max(outputs, 1)
    predicted_class = predicted.item()
    
    probabilities = F.softmax(outputs, dim=1)
    
    return predicted_class, probabilities.squeeze().cpu().numpy()

    # print(f'Predicted Class: {predicted_class}')

In [105]:
predicted_class_number, probability = test_single_image(resnet18, 'image_test/small_copter388.png')
predicted_class_name = enum(predicted_class_number)
print(f'Predicted class is {predicted_class_name}, probability = {probability[predicted_class_number]}')

Predicted class is small copter, probability = 1.0


In [26]:
pip install git+https://github.com/EricCreusen/scaleogram.git

Collecting git+https://github.com/EricCreusen/scaleogram.git
  Cloning https://github.com/EricCreusen/scaleogram.git to /tmp/pip-req-build-sb0f3obq
  Running command git clone --filter=blob:none --quiet https://github.com/EricCreusen/scaleogram.git /tmp/pip-req-build-sb0f3obq
  Resolved https://github.com/EricCreusen/scaleogram.git to commit 5804642af123f2f19c60dade3278f5da5fe414e4
  Preparing metadata (setup.py) ... [?25ldone

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [108]:
import numpy as np
import scaleogram as scg
import time

from scipy.io import wavfile
from matplotlib import pyplot as plt

wavelet = 'cmor1-1.5'
coikw = {'alpha': 0.5, 'hatch': '/'}

def get_signal(file_path: str):
    samplerate, x = wavfile.read(file_path)
    return samplerate, x

def save_scaleogram(file_path, signal, time, scales, wavelet):
    cwt = scg.CWT(time=time, signal=signal, scales=scales)
    scg.cws(cwt, figsize=(6, 4), coikw=coikw, wavelet=wavelet, yaxis='frequency', spectrum='amp', title='')
    plt.tight_layout()
    plt.savefig(file_path, bbox_inches='tight')
    plt.close()

def classify_sound_split(file_path, model):
    counter = np.zeros(5)
    image_path = 'temp.png'
    scales = scg.periods2scales(np.logspace(np.log10(2), np.log10(1000)), wavelet)

    sample_rate, signal = get_signal(file_path)
    signal_length = signal.shape[0] / sample_rate
    if signal_length >= 0.2:
        step_size = 0.2
        sample_step = int(sample_rate * step_size)
        signal_shape = signal.shape[0]

        # time = np.linspace(0, signal_length, signal.shape[0])
        for i in range(0, signal_shape, sample_step):
            start = time.time()
            new_signal = signal[i:i + sample_step]
            tm = np.linspace(0, step_size, new_signal.shape[0])
            save_scaleogram(image_path, new_signal, tm, scales, wavelet)
            
            predicted_class_number, probability = test_single_image(model, image_path)
            print(predicted_class_number, probability)
            counter[predicted_class_number] += 1
            end = time.time()
            print(end - start)
    return counter

def classify_sound(file_path, model):
    image_path = 'temp.png'
    scales = scg.periods2scales(np.logspace(np.log10(2), np.log10(1000)), wavelet)
    sample_rate, signal = get_signal(file_path)
    signal_length = signal.shape[0] / sample_rate
    time = np.linspace(0, signal_length, signal.shape[0])
    
    save_scaleogram(image_path, signal, time, scales, wavelet)
            
    predicted_class_number, probability = test_single_image(model, image_path)
    
    return predicted_class_number, probability

In [109]:
sound_path = 'test_data/sound_human_test2.wav'

print(f'Predicted class is {enum(np.argmax(classify_sound_split(sound_path, resnet18)))}')

2 [4.8134165e-08 3.7009226e-08 1.0000000e+00 6.3155231e-10 8.7476019e-11]
1.4550728797912598
2 [9.9203987e-07 1.0397373e-04 9.9989426e-01 3.0686687e-07 4.6476916e-07]
1.6795954704284668
1 [8.2204677e-04 8.0923313e-01 1.8837063e-01 2.9707656e-04 1.2770958e-03]
1.4435739517211914
2 [2.4170613e-04 2.8920551e-03 9.9683458e-01 1.5169665e-05 1.6514987e-05]
1.4458868503570557
2 [6.3490884e-09 3.5551940e-07 9.9999964e-01 3.7645798e-10 2.0293465e-11]
1.4966843128204346
Predicted class is human


In [100]:
predicted_class_number, probability = classify_sound('sound_human2.wav', resnet18)
predicted_class_name = enum(predicted_class_number)
print(f'Predicted class is {predicted_class_name}, probability = {probability[predicted_class_number]}')

Predicted class is human, probability = 0.9995480179786682


In [24]:
!pip install Pillow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [26]:
import os
from PIL import Image 

image_folder_path = 'drone_dataset'
output_folder_path = 'drone_dataset_resize'

image_files = [f for f in os.listdir(image_folder_path) if os.path.isfile(os.path.join(image_folder_path, f))]

for image_file in image_files:
    image_path = os.path.join(image_folder_path, image_file)
    img = Image.open(image_path)
    resized_img = img.resize((590, 390), Image.LANCZOS)
    output_image_path = os.path.join(output_folder_path, image_file)
    
    resized_img.save(output_image_path)