In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('..')

from src import *

import math
import torch
import torchaudio
import librosa
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tick

from tqdm.notebook import tqdm
from easydict import EasyDict
from IPython.display import HTML

device = torch.device(f'cuda:0' if torch.cuda.is_available() else 'cpu')

In [7]:
def optimize(views, events_start_time, events_end_time, e_p_s, energy, is_rear=True, window_len=0.5):
    
    if is_rear:
        mask = views == 'rear'
        subset = events_start_time[mask] * e_p_s
    else:
        mask = views != 'rear'
        subset = events_end_time[mask] * e_p_s

    window_len = window_len * e_p_s

    delta_best = None
    sum_of_energies_best = 0

    deltas = np.arange(0, 5.1, 0.1)

    for delta in deltas:
        delta = delta * e_p_s

        if is_rear:
            window_from = subset - delta - window_len
        else:
            window_from = subset + delta - window_len
        window_from = np.clip(window_from, 0, len(energy)).astype(int)

        if is_rear:
            window_till = subset - delta + window_len
        else:
            window_till = subset + delta + window_len
        window_till = np.clip(window_till, 0, len(energy)).astype(int)

        sum_of_energies = 0

        for i, j in zip(window_from, window_till):
            sum_of_energies += energy[i: j].sum().item()

        if sum_of_energies > sum_of_energies_best:
            sum_of_energies_best = sum_of_energies
            delta_best = delta

    return delta_best / e_p_s

In [8]:
def create_labels(file):
    signal = load_audio(f'data/audio/{file}.MP4.wav')
    signal_length = int(len(signal) / 44100)

    csv = load_csv(f'{file}.MP4')
    events = load_events_from_csv(csv)
    views = load_views_from_csv(csv)
    events_start_time, events_end_time = load_event_time_from_csv(csv)

    params = EasyDict()
    params.n_fft = 1024
    params.n_mels = 64
    params.hop_length = 128
    params.sr = 44100

    s = torch.stft(signal, n_fft=params.n_fft, hop_length=params.hop_length)
    energy = s[..., 0].pow(2)
    energy = energy.sum(0)

    e_p_s = len(energy) / signal_length
    
    mask = views == 'rear'
    output_rear = optimize(views, events_start_time, events_end_time, e_p_s, energy, is_rear=True)
    estimated_labels_1 = events_start_time[mask] - output_rear

    mask = views != 'rear'
    output_front = optimize(views, events_start_time, events_end_time, e_p_s, energy, is_rear=False)
    estimated_labels_2 = events_end_time[mask] + output_front
    
    print(f'{file}: {output_rear:.2f}, {output_front:.2f}')
    
    
    estimated_labels = np.concatenate([estimated_labels_1, estimated_labels_2])
    estimated_labels.sort()
    estimated_labels = np.clip(estimated_labels, 0, signal_length)
    estimated_labels = np.round(estimated_labels, 2)
    estimated_labels.tolist()
    
    np.savetxt(f'data/labels/{file}.MP4.txt', estimated_labels, fmt='%s')

In [None]:
files = [
    '20190819-Kutna Hora-L1-out-MVI_0007',
    '20190819-Kutna Hora-L3-in-MVI_0005',
    '20190819-Kutna Hora-L3-out-MVI_0008',
    '20190819-Kutna Hora-L4-in-MVI_0013',
    '20190819-Kutna Hora-L5-in-MVI_0003',
    '20190819-Kutna Hora-L6-out-MVI_0017',
    '20190819-Kutna Hora-L7-out-MVI_0032',
    '20190819-Kutna Hora-L8-in-MVI_0045',
    '20190819-Kutna Hora-L9-in-MVI_0043',
    '20190819-Kutna Hora-L10-in-MVI_0029',
    '20190819-Kutna Hora-L10-out-SDV_1888',
    '20190819-Kutna Hora-L13-in-MVI_0006',
    '20190819-Kutna Hora-L13-out-MVI_0018',
    '20190819-Kutna Hora-L14-in-SDV_0012',
    '20190819-Kutna Hora-L14-out-MVI_0005',
    '20190819-Kutna Hora-L15-out-MVI_0012',
    '20190819-Kutna Hora-L16-out-MVI_0003',
    '20190819-Kutna Hora-L18-in-MVI_0030',
    '20190819-Kutna Hora-L18-out-MVI_0045',
    '20190819-Ricany-L2-in-MVI_0006',
    '20190819-Ricany-L2-out-MVI_0005',
    '20190819-Ricany-L3-in-MVI_0006',
    '20190819-Ricany-L3-out-MVI_0014',
    '20190819-Ricany-L6-in-MVI_0008',
    '20190819-Ricany-L6-out-MVI_0011',
    '20190819-Ricany-L7-in-MVI_0008',
    '20190819-Ricany-L7-out-MVI_0013',
    '20190819-Ricany-L8-in-MVI_0009',
    '20190819-Ricany-L8-out-MVI_0013',
    '20190819-Ricany-L9-in-MVI_0008',
    '20190819-Ricany-L9-out-MVI_0011',
    '20190819-Kutna Hora-L2-in-MVI_0030',
    '20190819-Kutna Hora-L16-in-MVI_0038',
    '20190819-Ricany-L5-out-MVI_0008',
    '20190819-Kutna Hora-L17-in-MVI_0036',
    '20190819-Ricany-L5-in-MVI_0007',
    '20190819-Kutna Hora-L17-out-MVI_0040',
    '20190819-Ricany-L1-in-MVI_0006',
    '20190819-Kutna Hora-L11-in-MVI_0002',
    '20190819-Ricany-L1-out-MVI_0009',
    '20190819-Ricany-L4-in-MVI_0007',
    '20190819-Ricany-L4-out-MVI_0012',
]

In [9]:
for file in files:
    create_labels(file)

20190819-Kutna Hora-L1-out-MVI_0007: 5.00, 3.00
20190819-Kutna Hora-L3-in-MVI_0005: 2.90, 1.40
20190819-Kutna Hora-L3-out-MVI_0008: 0.80, 0.10
20190819-Kutna Hora-L4-in-MVI_0013: 1.60, 0.60
20190819-Kutna Hora-L5-in-MVI_0003: 0.20, 0.40
20190819-Kutna Hora-L6-out-MVI_0017: 5.00, 3.20
20190819-Kutna Hora-L7-out-MVI_0032: 3.00, 2.20
20190819-Kutna Hora-L8-in-MVI_0045: 4.20, 2.10
20190819-Kutna Hora-L9-in-MVI_0043: 3.70, 1.80
20190819-Kutna Hora-L10-in-MVI_0029: 3.00, 1.00
20190819-Kutna Hora-L10-out-SDV_1888: 5.00, 2.50
20190819-Kutna Hora-L13-in-MVI_0006: 1.00, 0.30
20190819-Kutna Hora-L13-out-MVI_0018: 1.20, 0.40
20190819-Kutna Hora-L14-in-SDV_0012: 3.50, 1.20
20190819-Kutna Hora-L14-out-MVI_0005: 2.90, 1.10
20190819-Kutna Hora-L15-out-MVI_0012: 0.90, 0.60
20190819-Kutna Hora-L16-out-MVI_0003: 2.70, 1.20
20190819-Kutna Hora-L18-in-MVI_0030: 4.80, 2.80
20190819-Kutna Hora-L18-out-MVI_0045: 1.90, 0.80
20190819-Ricany-L2-in-MVI_0006: 4.90, 1.80
20190819-Ricany-L2-out-MVI_0005: 5.00, 2.30
