In [1]:
import dataloader
import numpy as np
import torch
import random

In [2]:
raw_dataset = dataloader.get_dataset(None, 8192)
print({ k: len(v) for k, v in raw_dataset.items() })

{'ignore': 8192, 'Whistling': 21, 'Horn': 748, 'Train': 895, 'Clapping': 544, 'Laughing': 216, 'Whistle': 18, 'Dog': 2582, 'Noise': 1770, 'Sneezing': 151, 'BirdChirp': 1299, 'Scream': 29, 'Aircraft': 1119, 'Siren': 1683, 'Crow': 621, 'Rooster': 402, 'Gunshot': 8, 'Wind': 530, 'Frog': 3608, 'CarDoor': 439, 'VehicleExhaust': 252, 'Engine': 4197, 'Coughing': 626, 'Cat': 241, 'Thunder': 472, 'Beeping': 91, 'Unknown': 293, 'Insects': 1986, 'Sheep': 191, 'Basketball': 495, 'Drums': 6193, 'Cow': 168, 'GunNoise': 2286, 'PowerTool': 1065, 'GlassBreak': 587, 'Bell': 678, 'Skateboarding': 76, 'Jackhammer': 2882, 'HandSaw': 332}


In [3]:
print(f'gpu enabled: {torch.cuda.is_available()}')
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

# import vae_7 as vae
# encoder = vae.Encoder(embedding_size = 4).to(device)
# encoder.load_state_dict(torch.load('vae-7-F4-tested-55/encoder-F4-E200-L110.pt')) # 85/50 ish

# import vae_8 as vae
# encoder = vae.Encoder(embedding_size = 4).to(device)
# encoder.load_state_dict(torch.load('vae-8-F4-A0.50-tested-54/encoder-F4-A0.50-E208-L56.pt')) # 80/50 ish
# encoder.load_state_dict(torch.load('vae-8-F4-A0.33-tested-44/encoder-F4-A0.33-E200-L41.pt')) # 85/60 ish
# encoder.load_state_dict(torch.load('vae-8-F4-A0.67-tested-57/encoder-F4-A0.67-E224-L71.pt')) # 85/50 ish

# import vae_8 as vae
# encoder = vae.Encoder(embedding_size = 16).to(device)
# encoder.load_state_dict(torch.load('vae-8-F16-A0.50-tested-46/encoder-F16-A0.50-E200-L57.pt')) # 85/60 ish
# encoder.load_state_dict(torch.load('vae-8-F16-A0.33-tested-42/encoder-F16-A0.33-E256-L43.pt')) # 85/50 ish
# encoder.load_state_dict(torch.load('vae-8-F16-A0.9-tested-56/encoder-F16-A0.9-E256-L89.pt')) # 85/50 ish

# import vae_9 as vae
# encoder = vae.Encoder(embedding_size = 16).to(device)
# encoder.load_state_dict(torch.load('vae-9-F16-A0.9-tested-55/encoder-F16-A0.9-E256-L83.pt')) # 85/55 ish

import vae_10 as vae
encoder = vae.Encoder(embedding_size = 16).to(device)
encoder.load_state_dict(torch.load('vae-10-F16-A0.9-tested-60-79/encoder-F16-A0.9-E256-L66.pt'))

encoder.eval()

def prep_sample(x):
    f, t, Sxx = dataloader.get_spectrogram(x)
    with torch.no_grad():
        encoder_input = torch.tensor(np.log10(np.maximum(Sxx, 1e-20)).reshape(1, *Sxx.shape)).to(device)
        mean, logstd = encoder.forward(encoder_input)
        assert mean.shape == (1, encoder.embedding_size)
        return np.array(mean.cpu()).reshape(-1), np.array(logstd.exp().cpu()).reshape(-1)

dataset = { k: [prep_sample(x) for x in v] for k, v in raw_dataset.items() }

gpu enabled: True


In [279]:
benchmark_dataset = dict(random.sample([(k,v) for k,v in dataset.items() if len(v) >= 256], 10))
print({ k: len(v) for k, v in benchmark_dataset.items() })
max_clusters = 32
def benchmark(filter_class, filter_thresh):
    print(f'bench: {filter_class.__name__}')
    print()
    print('heterogeneous class tests:')
    vals = []
    for i in range(16):
        f = filter_class(max_clusters, encoder.embedding_size, filter_thresh)
        keeps = []
        reps = 30
        for rep in range(reps):
            for label, samples in benchmark_dataset.items():
                if f.insert(*random.choice(samples)):
                    keeps.append(label)
        vals.append(100 * len(keeps) / len(benchmark_dataset) / reps)
        print(f'iter {i:>4}: kept {len(keeps)}/{len(benchmark_dataset) * reps} ({vals[-1]:.2f}%)')
    print(f'mean: {np.mean(vals):>5.2f} std: {np.std(vals):>5.2f}')

    print()
    print('homogenous class tests:')
    vals = []
    for label, samples in benchmark_dataset.items():
        f = filter_class(max_clusters, encoder.embedding_size, filter_thresh)
        keeps = []
        reps = 300
        for rep in range(reps):
            if f.insert(*random.choice(samples)):
                keeps.append(i)
        vals.append(100 * len(keeps) / reps)
        print(f'{label:>16}: kept {len(keeps):>4}/{reps} ({vals[-1]:>5.2f}%)')
    print(f'mean: {np.mean(vals):>5.2f} std: {np.std(vals):>5.2f}')

{'HandSaw': 332, 'Dog': 2582, 'Clapping': 544, 'GunNoise': 2286, 'Train': 895, 'Aircraft': 1119, 'Noise': 1770, 'Engine': 4197, 'Unknown': 293, 'Rooster': 402}


In [283]:
class ClusterFilterAug2:
    def __init__(self, max_clusters, embedding_size, thresh):
        self.means = np.zeros((max_clusters, embedding_size))
        self.weights = np.zeros((max_clusters,))
        self.max_clusters = max_clusters
        self.base_radius = thresh
    def insert(self, mean, std):
        assert mean.shape == self.means.shape[1:] and mean.shape == std.shape and self.means.shape[0] == self.max_clusters and self.weights.shape == (self.max_clusters,)
        l2_norm = np.sqrt(np.sum((self.means - mean) ** 2, axis = 1))
        close = l2_norm <= np.sqrt(self.weights) * self.base_radius
        
        if np.any(close):
            center = (np.sum((self.means[close].T * self.weights[close]).T, axis = 0) + mean) / (np.sum(self.weights[close]) + 1)
            weight = np.sum(self.weights[close]) + 1
            self.means = np.concatenate([
                self.means[~close],
                [ center ],
                np.zeros((self.means.shape[0] - (np.sum(~close) + 1), self.means.shape[1])),
            ])
            self.weights = np.concatenate([
                self.weights[~close],
                [ weight ],
                np.zeros((self.weights.shape[0] - (np.sum(~close) + 1),)),
            ])
            return False
        else:
            self.means = np.concatenate([
                self.means[1:],
                [ mean ],
            ])
            self.weights = np.concatenate([
                self.weights[1:],
                [ 1 ],
            ])
            return True

benchmark(ClusterFilterAug2, 0.55)

bench: ClusterFilterAug2

heterogeneous class tests:
iter    0: kept 297/300 (99.00%)
iter    1: kept 168/300 (56.00%)
iter    2: kept 296/300 (98.67%)
iter    3: kept 293/300 (97.67%)
iter    4: kept 294/300 (98.00%)
iter    5: kept 295/300 (98.33%)
iter    6: kept 292/300 (97.33%)
iter    7: kept 297/300 (99.00%)
iter    8: kept 282/300 (94.00%)
iter    9: kept 296/300 (98.67%)
iter   10: kept 295/300 (98.33%)
iter   11: kept 293/300 (97.67%)
iter   12: kept 238/300 (79.33%)
iter   13: kept 291/300 (97.00%)
iter   14: kept 296/300 (98.67%)
iter   15: kept 280/300 (93.33%)
mean: 93.81 std: 10.82

homogenous class tests:
         HandSaw: kept  262/300 (87.33%)
             Dog: kept  299/300 (99.67%)
        Clapping: kept  278/300 (92.67%)
        GunNoise: kept  164/300 (54.67%)
           Train: kept   83/300 (27.67%)
        Aircraft: kept   73/300 (24.33%)
           Noise: kept   36/300 (12.00%)
          Engine: kept   37/300 (12.33%)
         Unknown: kept    9/300 ( 3.00%)
  

In [284]:
class ClusterFilterAug:
    def __init__(self, max_clusters, embedding_size, thresh):
        self.means = np.zeros((max_clusters, embedding_size)) + np.inf
        self.radii = np.zeros((max_clusters,))
        self.max_clusters = max_clusters
        self.base_radius = thresh
    def insert(self, mean, std):
        assert mean.shape == self.means.shape[1:] and mean.shape == std.shape and self.means.shape[0] == self.max_clusters and self.radii.shape == (self.max_clusters,)
        l2_norm = np.sqrt(np.sum((self.means - mean) ** 2, axis = 1))
        close = l2_norm <= self.radii
        
        if np.any(close):
            center = np.mean(self.means[close], axis = 0)
            radius = np.sqrt(np.sum(self.radii[close]**2))

#             center = (np.sum(self.means[close], axis = 0) + mean) / (np.sum(close) + 1)
#             radius = np.sqrt(np.sum(self.radii[close]**2) + self.base_radius**2)
        
            self.means = np.concatenate([
                self.means[~close],
                [ center ],
                np.zeros((self.means.shape[0] - (np.sum(~close) + 1), self.means.shape[1])) + np.inf,
            ])
            self.radii = np.concatenate([
                self.radii[~close],
                [ radius ],
                np.zeros((self.radii.shape[0] - (np.sum(~close) + 1),)),
            ])
            return False
        else:
            self.means = np.concatenate([
                self.means[1:],
                [ mean ],
            ])
            self.radii = np.concatenate([
                self.radii[1:],
                [ self.base_radius ],
            ])
            return True

benchmark(ClusterFilterAug, .7)

bench: ClusterFilterAug

heterogeneous class tests:
iter    0: kept 289/300 (96.33%)
iter    1: kept 288/300 (96.00%)
iter    2: kept 289/300 (96.33%)
iter    3: kept 287/300 (95.67%)
iter    4: kept 285/300 (95.00%)
iter    5: kept 290/300 (96.67%)
iter    6: kept 287/300 (95.67%)
iter    7: kept 284/300 (94.67%)
iter    8: kept 290/300 (96.67%)
iter    9: kept 283/300 (94.33%)
iter   10: kept 285/300 (95.00%)
iter   11: kept 293/300 (97.67%)
iter   12: kept 288/300 (96.00%)
iter   13: kept 291/300 (97.00%)
iter   14: kept 291/300 (97.00%)
iter   15: kept 283/300 (94.33%)
mean: 95.90 std:  0.98

homogenous class tests:
         HandSaw: kept  279/300 (93.00%)
             Dog: kept  292/300 (97.33%)
        Clapping: kept  263/300 (87.67%)
        GunNoise: kept  243/300 (81.00%)
           Train: kept  259/300 (86.33%)
        Aircraft: kept  266/300 (88.67%)
           Noise: kept  114/300 (38.00%)
          Engine: kept  167/300 (55.67%)
         Unknown: kept   54/300 (18.00%)
   

In [213]:
class ClusterFilter:
    def __init__(self, max_clusters, embedding_size, thresh):
        self.means = np.zeros((max_clusters, embedding_size)) + np.inf
        self.pos = 0
        self.thresh = thresh
    def insert(self, mean, std):
        assert mean.shape == self.means.shape[1:] and mean.shape == std.shape
        l2_norm = np.sqrt(np.sum((self.means - mean) ** 2, axis = 1))
        keep = np.min(l2_norm) > self.thresh
        self.means[self.pos,:] = mean
        self.pos = (self.pos + 1) % self.means.shape[0]
        return keep

benchmark(ClusterFilter, 1.4)

bench: ClusterFilter

heterogeneous class tests:
iter    0: kept 202/300 (67.33%)
iter    1: kept 200/300 (66.67%)
iter    2: kept 205/300 (68.33%)
iter    3: kept 208/300 (69.33%)
iter    4: kept 192/300 (64.00%)
iter    5: kept 202/300 (67.33%)
iter    6: kept 203/300 (67.67%)
iter    7: kept 208/300 (69.33%)
iter    8: kept 215/300 (71.67%)
iter    9: kept 201/300 (67.00%)
iter   10: kept 212/300 (70.67%)
iter   11: kept 206/300 (68.67%)
iter   12: kept 204/300 (68.00%)
iter   13: kept 211/300 (70.33%)
iter   14: kept 211/300 (70.33%)
iter   15: kept 203/300 (67.67%)
mean: 68.40 std:  1.82

homogenous class tests:
        Laughing: kept  248/300 (82.67%)
         Thunder: kept  140/300 (46.67%)
         Gunshot: kept   11/300 ( 3.67%)
             Cat: kept  214/300 (71.33%)
           Siren: kept  194/300 (64.67%)
      Jackhammer: kept   66/300 (22.00%)
             Dog: kept  284/300 (94.67%)
         Insects: kept   61/300 (20.33%)
  VehicleExhaust: kept   93/300 (31.00%)
      

In [214]:
class DistributionFilter:
    def __init__(self, max_clusters, embedding_size, thresh):
        self.means = np.zeros((max_clusters, embedding_size))
        self.stds = np.zeros((max_clusters, embedding_size))
        self.pos = 0
        self.thresh = thresh
    def insert(self, mean, std):
        assert mean.shape == self.means.shape[1:] and mean.shape == std.shape
        kl_div = np.sum(-0.5 * (1 + 2 * np.log(np.maximum(self.stds / std, 1e-20)) - (std**2 + (self.means - mean)**2) / std**2), axis = 1)
        keep = np.min(kl_div) > self.thresh
        self.means[self.pos,:] = mean
        self.stds[self.pos,:] = std
        self.pos = (self.pos + 1) % self.means.shape[0]
        return keep

benchmark(DistributionFilter, 200)

bench: DistributionFilter

heterogeneous class tests:
iter    0: kept 212/300 (70.67%)
iter    1: kept 221/300 (73.67%)
iter    2: kept 216/300 (72.00%)
iter    3: kept 211/300 (70.33%)
iter    4: kept 227/300 (75.67%)
iter    5: kept 212/300 (70.67%)
iter    6: kept 226/300 (75.33%)
iter    7: kept 217/300 (72.33%)
iter    8: kept 210/300 (70.00%)
iter    9: kept 226/300 (75.33%)
iter   10: kept 224/300 (74.67%)
iter   11: kept 223/300 (74.33%)
iter   12: kept 216/300 (72.00%)
iter   13: kept 213/300 (71.00%)
iter   14: kept 220/300 (73.33%)
iter   15: kept 220/300 (73.33%)
mean: 72.79 std:  1.87

homogenous class tests:
        Laughing: kept  234/300 (78.00%)
         Thunder: kept  141/300 (47.00%)
         Gunshot: kept   13/300 ( 4.33%)
             Cat: kept  223/300 (74.33%)
           Siren: kept  205/300 (68.33%)
      Jackhammer: kept   84/300 (28.00%)
             Dog: kept  273/300 (91.00%)
         Insects: kept   83/300 (27.67%)
  VehicleExhaust: kept  102/300 (34.00%)
 