In [1]:
import IPython
from pvrecorder import PvRecorder
import wave
import struct
import time
import sounddevice as sd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
recorder = PvRecorder(device_index=0, frame_length=512)
sound = []

recorder.start()
print('recording')
t_0 = time.time()
while time.time()-t_0<4:
    frame = recorder.read()
    sound.extend(frame)
        # Do something ...
recorder.stop()
recorder.delete()
fs = 16000

In [3]:
path = 'test.wav' 
with wave.open(path, 'w') as f:
                    f.setparams((1, 2, fs, 512, "NONE", "NONE"))
                    f.writeframes(struct.pack("h" * len(sound), *sound))

In [4]:
data = np.array(sound).astype(np.int16)
for i in range(100):
    fft = np.fft.fft(data)*0.1
    data = np.fft.ifft(fft)
    data = ((2**(16-4)) * data/data.max()).astype(np.int16)
   

In [5]:
data = np.array(sound).astype(np.int16)
fft = np.fft.fft(data)
fft

array([4201059.              +0.j        ,
       2474761.95954388-2917289.45706843j,
       -403651.47203084-2523971.78928035j, ...,
       -874580.41377833 +653212.6588909j ,
       -403651.47203084+2523971.78928035j,
       2474761.95954388+2917289.45706843j])

In [6]:
plt.plot(data)

[<matplotlib.lines.Line2D at 0x10b68d970>]

In [7]:
plt.plot(fft)
  

[<matplotlib.lines.Line2D at 0x10b7697f0>]

In [8]:
roll = np.roll(fft,100)
plt.plot(roll)

[<matplotlib.lines.Line2D at 0x10b9e1e20>]

In [9]:
ifft = np.fft.ifft(roll)
plt.plot(ifft)

[<matplotlib.lines.Line2D at 0x10ba66760>]

In [10]:
fft

array([4201059.              +0.j        ,
       2474761.95954388-2917289.45706843j,
       -403651.47203084-2523971.78928035j, ...,
       -874580.41377833 +653212.6588909j ,
       -403651.47203084+2523971.78928035j,
       2474761.95954388+2917289.45706843j])

In [11]:
roll = np.roll(fft,10)
data = np.fft.ifft(roll)
data = ((2**(16-4)) * data/data.max()).astype(np.int16)

In [12]:
plt.plot(data*0.9)

[<matplotlib.lines.Line2D at 0x10ba86280>]

In [13]:
!env | grep env

In [14]:
from scipy.io.wavfile import read
import wave
import struct
import ipywidgets as widgets
import IPython
from pvrecorder import PvRecorder
import wave
import struct
import time
import sounddevice as sd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [15]:
class Response:
    def __init__(self,frame):
        self.frame_length=frame
        self.path =  './test.wav'
        self.t_delta=1
        self.fs = 16000
        self.root = Path('./')
        
    def set_time(self,delta_sec:int):
        self.t_delta=delta_sec
    
    def createdirs(self):
        if not self.dir.exists():
            self.dir.mkdir(exist_ok=True)
        for sound_class in self.class_dirs:
            sound_dir = self.dir/sound_class
            sound_dir.mkdir(exist_ok=True)

            
    def get_classes(self,sound_classes:tuple[str,str]):
        self.dir = self.root/'data'  
        self.class_dirs = sound_classes
        self.createdirs()
        self.classes={sound:{'class':idx,'count':len(list((self.dir/sound).iterdir()))} for idx,sound in enumerate(sound_classes)}
        print(self.classes)
        self.set_widgets()
        
        
    def set_widgets(self):
        
        actions=[
            widgets.Button(description=f'record {name}') for name,entry in self.classes.items()]
        for act in actions:
            act.on_click(self.record)
            
            
        save = widgets.Button(description='save \U0001F4BE')
        play = widgets.Button(description='play ▶️')
        
        delete = widgets.Button(description='delete \U0000274C')
        save.on_click(self.save)
        play.on_click(self.play)
        delete.on_click(self.drop_recording)
        self.out = widgets.Output()
        actions+=[play,save,delete,self.out]
        acts = tuple(actions)
        self.vbox=widgets.VBox(children=acts)
        display(self.vbox)
        
    def record(self,button:widgets.Button):
        with self.out:
            key = button.description.split(' ')[1]
            self.state = key
            self.classes[key]['count']+=1
            self.classes[key][f'record {self.classes[key]["count"]}']=np.array([])
#             print(dir(self.out))
#             print(self.out._Output__counter)
            self.recorder = PvRecorder(device_index=0, frame_length=self.frame_length)
            self.recorder.start()
            t_0 = time.time()
            record = np.array([]).astype(np.int16)
            while time.time()-t_0<self.t_delta:
                frame = self.recorder.read()
                record = np.append(record,np.array(frame)).astype(np.int16)
            self.recorder.stop()
            self.recorder.delete()
            self.classes[key][f'record {self.classes[key]["count"]}']=record
            

    def save(self,_):
        path = self.root/'data'
        if not path.exists():
            path.mkdir(exist_ok=True)
        with self.out:
            for state in self.classes:
                class_dir = path/state
                if not class_dir.exists():
                    class_dir.mkdir(exist_ok=True)
                for key in self.classes[state]:
                    if 'record' in key:
                        rec = self.classes[state][key]
                        fid = class_dir/f'{state}_{key}.wav'
                        with wave.open(str(fid), 'w') as f:
                            f.setparams((1, 2, self.fs, 512, "NONE", "NONE"))
                            f.writeframes(struct.pack("h" * len(rec), *rec))
        
    def play(self,_):
        try:
            count = self.classes[self.state]['count']
            print(self.classes[self.state][f'record {count}'])
            for key in self.classes[self.state]:
                if 'record' in key:
                    sd.play(self.classes[self.state][key], self.fs)
                    sd.wait()
        except(KeyError,AttributeError):
            print('no recordings')

    def drop_recording(self,_):
        try:
            if self.classes[self.state]['count']!=0:
                self.classes[self.state].popitem()
                self.classes[self.state]['count']-=1
        except(KeyError,AttributeError):
            print('no recordings')

In [16]:
sample = Response(frame=512)
sample.set_time(delta_sec=1)
sample.get_classes(('yes','no','background'))

In [17]:
!tree data

In [18]:
duration = 10  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=2)

In [19]:
duration = 10  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)

In [20]:
myrecording

array([[-4.1938017e-04],
       [-1.0445807e-03],
       [-1.0570143e-03],
       ...,
       [ 1.4677829e-05],
       [-8.7498025e-05],
       [-1.8448371e-04]], dtype=float32)

In [21]:
duration = 10  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)

In [22]:
myrecording

array([[-0.00039643],
       [-0.00076067],
       [-0.00062098],
       ...,
       [ 0.        ],
       [ 0.        ],
       [ 0.        ]], dtype=float32)

In [23]:
duration = 100  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)

In [24]:
myrecording

array([[0.0003985 ],
       [0.00076165],
       [0.00063807],
       ...,
       [0.        ],
       [0.        ],
       [0.        ]], dtype=float32)

In [25]:
duration = 10000  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)

In [26]:
myrecording

array([[-0.00199623],
       [-0.00369275],
       [-0.0032514 ],
       ...,
       [ 0.        ],
       [ 0.        ],
       [ 0.        ]], dtype=float32)

In [27]:
duration = 10000  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)

In [28]:
myrecording

array([[-0.00087767],
       [-0.0020228 ],
       [-0.00194919],
       ...,
       [ 0.        ],
       [ 0.        ],
       [ 0.        ]], dtype=float32)

In [29]:
duration = 1000000  # seconds
fs = 16000
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1)

In [30]:
myrecording

array([[-0.00022264],
       [-0.00044777],
       [-0.00046475],
       ...,
       [ 0.        ],
       [ 0.        ],
       [ 0.        ]], dtype=float32)

In [31]:
recorder = PvRecorder(device_index=0, frame_length=512)
sound = []

recorder.start()
print('recording')
t_0 = time.time()
while time.time()-t_0<4:
    frame = recorder.read()
    sound.extend(frame)
        # Do something ...
recorder.stop()
recorder.delete()
fs = 16000

In [32]:
path = 'test.wav' 
with wave.open(path, 'w') as f:
                    f.setparams((1, 2, fs, 512, "NONE", "NONE"))
                    f.writeframes(struct.pack("h" * len(sound), *sound))

In [33]:
data = np.array(sound).astype(np.int16)
for i in range(100):
    fft = np.fft.fft(data)*0.1
    data = np.fft.ifft(fft)
    data = ((2**(16-4)) * data/data.max()).astype(np.int16)
   

In [34]:
data = np.array(sound).astype(np.int16)
fft = np.fft.fft(data)
fft

array([4170520.              +0.j        ,
       2354788.77439706-2943617.63309619j,
       -433773.97847462-2509651.29182733j, ...,
       -975257.2988287  +665655.97495314j,
       -433773.97847462+2509651.29182733j,
       2354788.77439706+2943617.63309619j])

In [35]:
plt.plot(data)

[<matplotlib.lines.Line2D at 0x1282e0ca0>]

In [36]:
class Response:
    def __init__(self,frame):
        self.frame_length=frame
        self.path =  './test.wav'
        self.t_delta=1
        self.fs = 16000
        self.root = Path('./')
        
    def set_time(self,delta_sec:int):
        self.t_delta=delta_sec
    
    def createdirs(self):
        if not self.dir.exists():
            self.dir.mkdir(exist_ok=True)
        for sound_class in self.class_dirs:
            sound_dir = self.dir/sound_class
            sound_dir.mkdir(exist_ok=True)

            
    def get_classes(self,sound_classes:tuple[str,str]):
        self.dir = self.root/'data'  
        self.class_dirs = sound_classes
        self.createdirs()
        self.classes={sound:{'class':idx,'count':len(list((self.dir/sound).iterdir()))} for idx,sound in enumerate(sound_classes)}
        print(self.classes)
        self.set_widgets()
        
        
    def set_widgets(self):
        
        actions=[
            widgets.Button(description=f'record {name}') for name,entry in self.classes.items()]
        for act in actions:
            act.on_click(self.record)
            
            
        save = widgets.Button(description='save \U0001F4BE')
        play = widgets.Button(description='play ▶️')
        
        delete = widgets.Button(description='delete \U0000274C')
        save.on_click(self.save)
        play.on_click(self.play)
        delete.on_click(self.drop_recording)
        self.out = widgets.Output()
        actions+=[play,save,delete,self.out]
        acts = tuple(actions)
        self.vbox=widgets.VBox(children=acts)
        display(self.vbox)
        
    def record(self,button:widgets.Button):
        with self.out:
            key = button.description.split(' ')[1]
            self.state = key
            self.classes[key]['count']+=1
            self.classes[key][f'record {self.classes[key]["count"]}']=np.array([])
#             print(dir(self.out))
#             print(self.out._Output__counter)
            self.recorder = PvRecorder(device_index=0, frame_length=self.frame_length)
            self.recorder.start()
            t_0 = time.time()
            record = np.array([]).astype(np.int16)
            while time.time()-t_0<self.t_delta:
                frame = self.recorder.read()
                record = np.append(record,np.array(frame)).astype(np.int16)
            self.recorder.stop()
            self.recorder.delete()
            self.classes[key][f'record {self.classes[key]["count"]}']=record
            

    def save(self,_):
        path = self.root/'data'
        if not path.exists():
            path.mkdir(exist_ok=True)
        with self.out:
            for state in self.classes:
                class_dir = path/state
                if not class_dir.exists():
                    class_dir.mkdir(exist_ok=True)
                for key in self.classes[state]:
                    if 'record' in key:
                        rec = self.classes[state][key]
                        fid = class_dir/f'{state}_{key}.wav'
                        with wave.open(str(fid), 'w') as f:
                            f.setparams((1, 2, self.fs, 512, "NONE", "NONE"))
                            f.writeframes(struct.pack("h" * len(rec), *rec))
        
    def play(self,_):
        try:
            count = self.classes[self.state]['count']
            print(self.classes[self.state][f'record {count}'])
            for key in self.classes[self.state]:
                if 'record' in key:
                    sd.play(self.classes[self.state][key], self.fs)
                    sd.wait()
        except(KeyError,AttributeError):
            print('no recordings')

    def drop_recording(self,_):
        try:
            if self.classes[self.state]['count']!=0:
                self.classes[self.state].popitem()
                self.classes[self.state]['count']-=1
        except(KeyError,AttributeError):
            print('no recordings')

In [37]:
!tree data

In [38]:
import wave
import numpy as np
import matplotlib.pyplot as plt
def read_wav(fid:str):
    # Read file to get buffer                                                                                               
    ifile = wave.open(fid)
    samples = ifile.getnframes()
    audio = ifile.readframes(samples)

    # Convert buffer to float32 using NumPy                                                                                 
    audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
    return audio_as_np_int16

In [39]:
!pwd

In [40]:
 !mkdir datasets
 !wget 'https://github.com/karoldvl/ESC-50/archive/master.zip' -P ~/pico/audio-classifier/datasets
 !unzip -q ~/datasets/master.zip -d ./datasets/

In [41]:
import pandas as pd
esc50_csv = './datasets/ESC-50-master/meta/esc50.csv'
base_data_path = './datasets/ESC-50-master/audio/'

df = pd.read_csv(esc50_csv)
df.head(100)

             filename  fold  target        category  esc10  src_file take
0    1-100032-A-0.wav     1       0             dog   True    100032    A
1   1-100038-A-14.wav     1      14  chirping_birds  False    100038    A
2   1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A
3   1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B
4   1-101296-A-19.wav     1      19    thunderstorm  False    101296    A
..                ...   ...     ...             ...    ...       ...  ...
95   1-20133-A-39.wav     1      39  glass_breaking  False     20133    A
96   1-202111-A-3.wav     1       3             cow  False    202111    A
97   1-20545-A-28.wav     1      28         snoring  False     20545    A
98   1-20736-A-18.wav     1      18    toilet_flush  False     20736    A
99   1-208757-A-2.wav     1       2             pig  False    208757    A

[100 rows x 7 columns]

In [42]:
import tensorflow as tf
import tensorflow_io as tfio

In [43]:
sample_rate = 16000
chans = 1
sound = read_wav('./data/yes/yes_record 3.wav')
sound = sound.astype(np.float32, order='F') / 32768.0

In [44]:
file_contents = tf.io.read_file('./data/yes/yes_record 5.wav')
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=chans)
squeeze_wav = tf.squeeze(wav, axis=-1)
spect = tf.signal.stft(squeeze_wav, frame_length=512, frame_step=128)

In [45]:
wav

<tf.Tensor: shape=(16384, 1), dtype=float32, numpy=
array([[ 0.        ],
       [ 0.        ],
       [ 0.        ],
       ...,
       [-0.00704956],
       [-0.00631714],
       [-0.00570679]], dtype=float32)>

In [46]:
import scipy

In [47]:
import cmsisdsp
from numpy import pi as PI

window_size = 512
step_size = 64

hanning_window_f32 = np.zeros(window_size)
for i in range(window_size):
  hanning_window_f32[i] = 0.5 * (1 - cmsisdsp.arm_cos_f32(2 * PI * i / window_size ))
hanning_window_q15 = cmsisdsp.arm_float_to_q15(hanning_window_f32)
rfftq15 = cmsisdsp.arm_rfft_instance_q15()
status = cmsisdsp.arm_rfft_init_q15(rfftq15, window_size, 0, 1)

def get_arm_spectrogram(waveform):
  num_frames = int(1 + (len(waveform) - window_size) // step_size)
  fft_size = int(window_size // 2 + 1)
  # Convert the audio to q15
  waveform_q15 = cmsisdsp.arm_float_to_q15(waveform)
  # Create empty spectrogram array
  spectrogram_q15 = np.empty((num_frames, fft_size), dtype = np.int16)
  start_index = 0
  for index in range(num_frames):
    # Take the window from the waveform.
    window = waveform_q15[start_index:start_index + window_size]
    # Apply the Hanning Window.
    window = cmsisdsp.arm_mult_q15(window, hanning_window_q15)
    # Calculate the FFT, shift by 7 according to docs
    window = cmsisdsp.arm_rfft_q15(rfftq15, window)
    # Take the absolute value of the FFT and add to the Spectrogram.
    spectrogram_q15[index] = cmsisdsp.arm_cmplx_mag_q15(window)[:fft_size]
    # Increase the start index of the window by the overlap amount.
    start_index += step_size
  # Convert to numpy output ready for keras
  return cmsisdsp.arm_q15_to_float(spectrogram_q15).reshape(num_frames,fft_size) * 512

In [48]:
spct = get_arm_spectrogram(wav)

In [49]:
spect

<tf.Tensor: shape=(125, 257), dtype=complex64, numpy=
array([[-1.1825697e+00+0.0000000e+00j,  7.9305696e-01-3.3905774e-02j,
        -2.6453486e-01-1.7655689e-01j, ...,
         8.3236247e-03-5.0675496e-03j, -7.6637864e-03+3.2727597e-03j,
         5.6102276e-03+0.0000000e+00j],
       [-1.1571851e+00+0.0000000e+00j,  5.1088202e-01+7.2440378e-02j,
         8.9413024e-02+5.3528726e-02j, ...,
        -2.6634298e-03-8.3412975e-05j,  4.6035647e-03+5.5335462e-03j,
        -3.3004284e-03+0.0000000e+00j],
       [-1.1590379e+00+0.0000000e+00j,  5.0499785e-01-6.7214325e-02j,
         3.9693721e-02-8.3785877e-02j, ...,
        -2.0413008e-03+1.2983121e-03j,  4.9695969e-03-2.0664409e-03j,
        -7.0039034e-03+0.0000000e+00j],
       ...,
       [-1.9084167e+00+0.0000000e+00j,  1.0758590e+00+9.9544629e-02j,
        -1.5023670e-01-7.5578287e-02j, ...,
         5.2796602e-03+1.4986992e-03j, -6.4264536e-03-7.1737394e-03j,
         4.9974322e-03+0.0000000e+00j],
       [-1.7120256e+00+0.0000000e+00j,

In [50]:
plt.plot(spct)

[<matplotlib.lines.Line2D at 0x2dc232760>,
 <matplotlib.lines.Line2D at 0x1185c3b80>,
 <matplotlib.lines.Line2D at 0x1282e35e0>,
 <matplotlib.lines.Line2D at 0x2daf49c40>,
 <matplotlib.lines.Line2D at 0x2dc2327c0>,
 <matplotlib.lines.Line2D at 0x2dc232970>,
 <matplotlib.lines.Line2D at 0x2dc232a60>,
 <matplotlib.lines.Line2D at 0x2dc232b50>,
 <matplotlib.lines.Line2D at 0x2dc232c40>,
 <matplotlib.lines.Line2D at 0x2dc232d30>,
 <matplotlib.lines.Line2D at 0x2dc232e20>,
 <matplotlib.lines.Line2D at 0x2dc232790>,
 <matplotlib.lines.Line2D at 0x2dc232f10>,
 <matplotlib.lines.Line2D at 0x2dc243100>,
 <matplotlib.lines.Line2D at 0x2dc2431f0>,
 <matplotlib.lines.Line2D at 0x2dc2432e0>,
 <matplotlib.lines.Line2D at 0x2dc2433d0>,
 <matplotlib.lines.Line2D at 0x2dc2434c0>,
 <matplotlib.lines.Line2D at 0x2dc2435b0>,
 <matplotlib.lines.Line2D at 0x2dc2436a0>,
 <matplotlib.lines.Line2D at 0x2dc243790>,
 <matplotlib.lines.Line2D at 0x2dc243880>,
 <matplotlib.lines.Line2D at 0x2dc243970>,
 <matplotli

In [51]:
!pip install pydub

In [52]:
fid = "./datasets/ESC-50-master/audio/1-100032-A-0.wav"
# times between which to extract the wave from
start = 0 # seconds
end = 1 # seconds

def segment(fid:str, chunk:int):
    # file to extract the snippet from
    data = [ ]
    with wave.open(fid, "rb") as infile:
        # get file data
        nchannels = infile.getnchannels()
        sampwidth = infile.getsampwidth()
        framerate = infile.getframerate()
        # set position in wave to start of segment
        for sec in range(4):
            infile.setpos(int(sec * framerate))
            data.append(infile.readframes(chunk * framerate))   
    return data, (nchannels, sampwidth , framerate)
        

In [53]:
data = segment(fid = "./datasets/ESC-50-master/audio/1-100032-A-0.wav",
       chunk=1)

in_paranet = Path('./datasets/ESC-50-master/audio/')
out_data_dir = Path('ESC-50')
out_data_dir.mkdir(exist_ok=True)
all_out = [ ]
for idx, row in df.iterrows():
    out_dir = out_data_dir/row.category
    out_dir.mkdir(exist_ok=True)
    data, vals = segment(fid = str(in_paranet/row.filename),
       chunk=1)
    chans, samp_width , rate = vals
    out_fids = [ ]
    for idx,sound in enumerate(data):
        out_fid = out_dir/f'{idx}_{row.filename}'
        out_fids.append(out_fid)
        with wave.open(str(out_fid), 'w') as outfile:
            outfile.setnchannels(chans)
            outfile.setsampwidth(samp_width)
            outfile.setframerate(rate)
            outfile.setnframes(int(len(sound) /  samp_width))
            outfile.writeframes(sound)
    all_out.append(out_fids)
        
    
    
    

In [54]:
df['all_out']=all_out 

In [55]:
fid = df[0:1].all_out.values[0][2]
file_contents = tf.io.read_file(str(fid))
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=chans)

In [56]:
spct = get_arm_spectrogram(wav)

In [57]:
np.std(spct)

0.6403931

In [58]:

def plot_spectrogram(spectrogram,sample_rate, vmax=None):
  transposed_spectrogram = tf.transpose(spectrogram)

  fig = plt.figure(figsize=(8,6))
  height = transposed_spectrogram.shape[0]
  X = np.arange(transposed_spectrogram.shape[1])
  Y = np.arange(height * int(sample_rate / 256), step=int(sample_rate / 256))

  im = plt.pcolormesh(X, Y, tf.transpose(spectrogram), vmax=vmax)

  fig.colorbar(im)
  plt.show()


plot_spectrogram(spct,16000)

In [59]:
df

               filename  fold  target        category  esc10  src_file take  \
0      1-100032-A-0.wav     1       0             dog   True    100032    A   
1     1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   
2     1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A   
3     1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B   
4     1-101296-A-19.wav     1      19    thunderstorm  False    101296    A   
...                 ...   ...     ...             ...    ...       ...  ...   
1995   5-263831-B-6.wav     5       6             hen  False    263831    B   
1996  5-263902-A-36.wav     5      36  vacuum_cleaner  False    263902    A   
1997   5-51149-A-25.wav     5      25       footsteps  False     51149    A   
1998    5-61635-A-8.wav     5       8           sheep  False     61635    A   
1999     5-9032-A-0.wav     5       0             dog   True      9032    A   

                                                all

In [60]:
all_out[0]

[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
 PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
 PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
 PosixPath('ESC-50/dog/3_1-100032-A-0.wav')]

In [61]:
all_out[...,0]

In [62]:
all_out[...,1]

In [63]:
all_out[...:,1]

In [64]:
all_out[...:1]

In [65]:
all_out[1,...]

In [66]:
all_out[1:,...]

In [67]:
all_out

[[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/3_1-100032-A-0.wav')],
 [PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/3_1-100038-A-14.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-A-36.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-B-36.wav')],
 [PosixPath('ESC-50/thunderstorm/0_1-101296-A-19.wav'),
  PosixPath('ESC-50/thunderstorm/1_1-101296-A-19.wav'),
  Po

In [68]:
all_out[:]

[[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/3_1-100032-A-0.wav')],
 [PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/3_1-100038-A-14.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-A-36.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-B-36.wav')],
 [PosixPath('ESC-50/thunderstorm/0_1-101296-A-19.wav'),
  PosixPath('ESC-50/thunderstorm/1_1-101296-A-19.wav'),
  Po

In [69]:
all_out[:1]

[[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/3_1-100032-A-0.wav')]]

In [70]:
all_out[:,:1]

In [71]:
all_out[,:,:1]

In [72]:
all_out[::1]

[[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/3_1-100032-A-0.wav')],
 [PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/3_1-100038-A-14.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-A-36.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-B-36.wav')],
 [PosixPath('ESC-50/thunderstorm/0_1-101296-A-19.wav'),
  PosixPath('ESC-50/thunderstorm/1_1-101296-A-19.wav'),
  Po

In [73]:
all_out[::,1]

In [74]:
all_out[::0]

In [75]:
all_out[::1]

[[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/3_1-100032-A-0.wav')],
 [PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/3_1-100038-A-14.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-A-36.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-B-36.wav')],
 [PosixPath('ESC-50/thunderstorm/0_1-101296-A-19.wav'),
  PosixPath('ESC-50/thunderstorm/1_1-101296-A-19.wav'),
  Po

In [76]:
all_out.T

In [77]:
all_out.t

In [78]:
all_out

[[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
  PosixPath('ESC-50/dog/3_1-100032-A-0.wav')],
 [PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
  PosixPath('ESC-50/chirping_birds/3_1-100038-A-14.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-A-36.wav')],
 [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/1_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/2_1-100210-B-36.wav'),
  PosixPath('ESC-50/vacuum_cleaner/3_1-100210-B-36.wav')],
 [PosixPath('ESC-50/thunderstorm/0_1-101296-A-19.wav'),
  PosixPath('ESC-50/thunderstorm/1_1-101296-A-19.wav'),
  Po

In [79]:
np.array(all_out)

array([[PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
        PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
        PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
        PosixPath('ESC-50/dog/3_1-100032-A-0.wav')],
       [PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
        PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
        PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
        PosixPath('ESC-50/chirping_birds/3_1-100038-A-14.wav')],
       [PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'),
        PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'),
        PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'),
        PosixPath('ESC-50/vacuum_cleaner/3_1-100210-A-36.wav')],
       ...,
       [PosixPath('ESC-50/footsteps/0_5-51149-A-25.wav'),
        PosixPath('ESC-50/footsteps/1_5-51149-A-25.wav'),
        PosixPath('ESC-50/footsteps/2_5-51149-A-25.wav'),
        PosixPath('ESC-50/footsteps/3_5-51149-A-25.wav')],
       [PosixPath('ESC-50/sheep/

In [80]:
np.array(all_out)[...,0]

array([PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
       PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
       PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'), ...,
       PosixPath('ESC-50/footsteps/0_5-51149-A-25.wav'),
       PosixPath('ESC-50/sheep/0_5-61635-A-8.wav'),
       PosixPath('ESC-50/dog/0_5-9032-A-0.wav')], dtype=object)

In [81]:
sec_files = np.array(all_out)[...,0]

In [82]:
sec_files = np.array(all_out)

In [83]:
sec_files.shape

(2000, 4)

In [84]:
sec_files.shape[-1]

4

In [85]:
sec_files = np.array(all_out)
for files in range(sec_files.shape[-1]):
    df[f'{files}_{files+1}_sec']=sec_files[...,files]

In [86]:
df

               filename  fold  target        category  esc10  src_file take  \
0      1-100032-A-0.wav     1       0             dog   True    100032    A   
1     1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   
2     1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A   
3     1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B   
4     1-101296-A-19.wav     1      19    thunderstorm  False    101296    A   
...                 ...   ...     ...             ...    ...       ...  ...   
1995   5-263831-B-6.wav     5       6             hen  False    263831    B   
1996  5-263902-A-36.wav     5      36  vacuum_cleaner  False    263902    A   
1997   5-51149-A-25.wav     5      25       footsteps  False     51149    A   
1998    5-61635-A-8.wav     5       8           sheep  False     61635    A   
1999     5-9032-A-0.wav     5       0             dog   True      9032    A   

                                                all

In [87]:
sec_files = np.array(all_out)
for files in range(sec_files.shape[-1]):
    df[f'{files}_{files+1}_sec']=sec_files[...,files].astype(str)

In [88]:
df

               filename  fold  target        category  esc10  src_file take  \
0      1-100032-A-0.wav     1       0             dog   True    100032    A   
1     1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   
2     1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A   
3     1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B   
4     1-101296-A-19.wav     1      19    thunderstorm  False    101296    A   
...                 ...   ...     ...             ...    ...       ...  ...   
1995   5-263831-B-6.wav     5       6             hen  False    263831    B   
1996  5-263902-A-36.wav     5      36  vacuum_cleaner  False    263902    A   
1997   5-51149-A-25.wav     5      25       footsteps  False     51149    A   
1998    5-61635-A-8.wav     5       8           sheep  False     61635    A   
1999     5-9032-A-0.wav     5       0             dog   True      9032    A   

                                                all

In [89]:
sec_files[...,0]

array([PosixPath('ESC-50/dog/0_1-100032-A-0.wav'),
       PosixPath('ESC-50/chirping_birds/0_1-100038-A-14.wav'),
       PosixPath('ESC-50/vacuum_cleaner/0_1-100210-A-36.wav'), ...,
       PosixPath('ESC-50/footsteps/0_5-51149-A-25.wav'),
       PosixPath('ESC-50/sheep/0_5-61635-A-8.wav'),
       PosixPath('ESC-50/dog/0_5-9032-A-0.wav')], dtype=object)

In [90]:
sec_files[...,1]

array([PosixPath('ESC-50/dog/1_1-100032-A-0.wav'),
       PosixPath('ESC-50/chirping_birds/1_1-100038-A-14.wav'),
       PosixPath('ESC-50/vacuum_cleaner/1_1-100210-A-36.wav'), ...,
       PosixPath('ESC-50/footsteps/1_5-51149-A-25.wav'),
       PosixPath('ESC-50/sheep/1_5-61635-A-8.wav'),
       PosixPath('ESC-50/dog/1_5-9032-A-0.wav')], dtype=object)

In [91]:
sec_files[...,2]

array([PosixPath('ESC-50/dog/2_1-100032-A-0.wav'),
       PosixPath('ESC-50/chirping_birds/2_1-100038-A-14.wav'),
       PosixPath('ESC-50/vacuum_cleaner/2_1-100210-A-36.wav'), ...,
       PosixPath('ESC-50/footsteps/2_5-51149-A-25.wav'),
       PosixPath('ESC-50/sheep/2_5-61635-A-8.wav'),
       PosixPath('ESC-50/dog/2_5-9032-A-0.wav')], dtype=object)

In [92]:
df

               filename  fold  target        category  esc10  src_file take  \
0      1-100032-A-0.wav     1       0             dog   True    100032    A   
1     1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   
2     1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A   
3     1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B   
4     1-101296-A-19.wav     1      19    thunderstorm  False    101296    A   
...                 ...   ...     ...             ...    ...       ...  ...   
1995   5-263831-B-6.wav     5       6             hen  False    263831    B   
1996  5-263902-A-36.wav     5      36  vacuum_cleaner  False    263902    A   
1997   5-51149-A-25.wav     5      25       footsteps  False     51149    A   
1998    5-61635-A-8.wav     5       8           sheep  False     61635    A   
1999     5-9032-A-0.wav     5       0             dog   True      9032    A   

                                                all

In [93]:
table = wandb.Table

In [94]:
import IPython
from pvrecorder import PvRecorder
import wave
import struct
import time
import sounddevice as sd
import numpy as np
import matplotlib.pyplot as plt
import wandb

In [95]:
!pip install wandb

In [96]:
!pip install wandb

In [97]:
sec_files = np.array(all_out).astype(str)
for files in range(sec_files.shape[-1]):
    df[f'{files}_{files+1}_sec']=sec_files[...,files].astype(str)

In [98]:
sec_files

array([['ESC-50/dog/0_1-100032-A-0.wav', 'ESC-50/dog/1_1-100032-A-0.wav',
        'ESC-50/dog/2_1-100032-A-0.wav', 'ESC-50/dog/3_1-100032-A-0.wav'],
       ['ESC-50/chirping_birds/0_1-100038-A-14.wav',
        'ESC-50/chirping_birds/1_1-100038-A-14.wav',
        'ESC-50/chirping_birds/2_1-100038-A-14.wav',
        'ESC-50/chirping_birds/3_1-100038-A-14.wav'],
       ['ESC-50/vacuum_cleaner/0_1-100210-A-36.wav',
        'ESC-50/vacuum_cleaner/1_1-100210-A-36.wav',
        'ESC-50/vacuum_cleaner/2_1-100210-A-36.wav',
        'ESC-50/vacuum_cleaner/3_1-100210-A-36.wav'],
       ...,
       ['ESC-50/footsteps/0_5-51149-A-25.wav',
        'ESC-50/footsteps/1_5-51149-A-25.wav',
        'ESC-50/footsteps/2_5-51149-A-25.wav',
        'ESC-50/footsteps/3_5-51149-A-25.wav'],
       ['ESC-50/sheep/0_5-61635-A-8.wav',
        'ESC-50/sheep/1_5-61635-A-8.wav',
        'ESC-50/sheep/2_5-61635-A-8.wav',
        'ESC-50/sheep/3_5-61635-A-8.wav'],
       ['ESC-50/dog/0_5-9032-A-0.wav', 'ESC-50/dog/1_5-

In [99]:
import pandas as pd
esc50_csv = './datasets/ESC-50-master/meta/esc50.csv'
base_data_path = './datasets/ESC-50-master/audio/'

df = pd.read_csv(esc50_csv)
df.head(100)

             filename  fold  target        category  esc10  src_file take
0    1-100032-A-0.wav     1       0             dog   True    100032    A
1   1-100038-A-14.wav     1      14  chirping_birds  False    100038    A
2   1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A
3   1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B
4   1-101296-A-19.wav     1      19    thunderstorm  False    101296    A
..                ...   ...     ...             ...    ...       ...  ...
95   1-20133-A-39.wav     1      39  glass_breaking  False     20133    A
96   1-202111-A-3.wav     1       3             cow  False    202111    A
97   1-20545-A-28.wav     1      28         snoring  False     20545    A
98   1-20736-A-18.wav     1      18    toilet_flush  False     20736    A
99   1-208757-A-2.wav     1       2             pig  False    208757    A

[100 rows x 7 columns]

In [100]:
import tensorflow as tf
import tensorflow_io as tfio

In [101]:
sample_rate = 16000
chans = 1
sound = read_wav('./data/yes/yes_record 3.wav')
sound = sound.astype(np.float32, order='F') / 32768.0

In [102]:
file_contents = tf.io.read_file('./data/yes/yes_record 5.wav')
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=chans)
squeeze_wav = tf.squeeze(wav, axis=-1)
spect = tf.signal.stft(squeeze_wav, frame_length=512, frame_step=128)

In [103]:
wav

<tf.Tensor: shape=(16384, 1), dtype=float32, numpy=
array([[ 0.        ],
       [ 0.        ],
       [ 0.        ],
       ...,
       [-0.00704956],
       [-0.00631714],
       [-0.00570679]], dtype=float32)>

In [104]:
import scipy

In [105]:
import cmsisdsp
from numpy import pi as PI

window_size = 512
step_size = 64

hanning_window_f32 = np.zeros(window_size)
for i in range(window_size):
  hanning_window_f32[i] = 0.5 * (1 - cmsisdsp.arm_cos_f32(2 * PI * i / window_size ))
hanning_window_q15 = cmsisdsp.arm_float_to_q15(hanning_window_f32)
rfftq15 = cmsisdsp.arm_rfft_instance_q15()
status = cmsisdsp.arm_rfft_init_q15(rfftq15, window_size, 0, 1)

def get_arm_spectrogram(waveform):
  num_frames = int(1 + (len(waveform) - window_size) // step_size)
  fft_size = int(window_size // 2 + 1)
  # Convert the audio to q15
  waveform_q15 = cmsisdsp.arm_float_to_q15(waveform)
  # Create empty spectrogram array
  spectrogram_q15 = np.empty((num_frames, fft_size), dtype = np.int16)
  start_index = 0
  for index in range(num_frames):
    # Take the window from the waveform.
    window = waveform_q15[start_index:start_index + window_size]
    # Apply the Hanning Window.
    window = cmsisdsp.arm_mult_q15(window, hanning_window_q15)
    # Calculate the FFT, shift by 7 according to docs
    window = cmsisdsp.arm_rfft_q15(rfftq15, window)
    # Take the absolute value of the FFT and add to the Spectrogram.
    spectrogram_q15[index] = cmsisdsp.arm_cmplx_mag_q15(window)[:fft_size]
    # Increase the start index of the window by the overlap amount.
    start_index += step_size
  # Convert to numpy output ready for keras
  return cmsisdsp.arm_q15_to_float(spectrogram_q15).reshape(num_frames,fft_size) * 512

In [106]:
spct = get_arm_spectrogram(wav)

In [107]:
spect

<tf.Tensor: shape=(125, 257), dtype=complex64, numpy=
array([[-1.1825697e+00+0.0000000e+00j,  7.9305696e-01-3.3905774e-02j,
        -2.6453486e-01-1.7655689e-01j, ...,
         8.3236247e-03-5.0675496e-03j, -7.6637864e-03+3.2727597e-03j,
         5.6102276e-03+0.0000000e+00j],
       [-1.1571851e+00+0.0000000e+00j,  5.1088202e-01+7.2440378e-02j,
         8.9413024e-02+5.3528726e-02j, ...,
        -2.6634298e-03-8.3412975e-05j,  4.6035647e-03+5.5335462e-03j,
        -3.3004284e-03+0.0000000e+00j],
       [-1.1590379e+00+0.0000000e+00j,  5.0499785e-01-6.7214325e-02j,
         3.9693721e-02-8.3785877e-02j, ...,
        -2.0413008e-03+1.2983121e-03j,  4.9695969e-03-2.0664409e-03j,
        -7.0039034e-03+0.0000000e+00j],
       ...,
       [-1.9084167e+00+0.0000000e+00j,  1.0758590e+00+9.9544629e-02j,
        -1.5023670e-01-7.5578287e-02j, ...,
         5.2796602e-03+1.4986992e-03j, -6.4264536e-03-7.1737394e-03j,
         4.9974322e-03+0.0000000e+00j],
       [-1.7120256e+00+0.0000000e+00j,

In [108]:
plt.plot(spct)

[<matplotlib.lines.Line2D at 0x2e5d59490>,
 <matplotlib.lines.Line2D at 0x2e5d595e0>,
 <matplotlib.lines.Line2D at 0x2e5d59550>,
 <matplotlib.lines.Line2D at 0x2e5d59670>,
 <matplotlib.lines.Line2D at 0x2e5d59880>,
 <matplotlib.lines.Line2D at 0x2e5d599a0>,
 <matplotlib.lines.Line2D at 0x2e5d59a00>,
 <matplotlib.lines.Line2D at 0x2e5d59b20>,
 <matplotlib.lines.Line2D at 0x2e5d59d30>,
 <matplotlib.lines.Line2D at 0x2e5d59e50>,
 <matplotlib.lines.Line2D at 0x2e5d59eb0>,
 <matplotlib.lines.Line2D at 0x2e5d595b0>,
 <matplotlib.lines.Line2D at 0x2e5d52070>,
 <matplotlib.lines.Line2D at 0x2e5d52310>,
 <matplotlib.lines.Line2D at 0x2e5d52430>,
 <matplotlib.lines.Line2D at 0x2e5d52490>,
 <matplotlib.lines.Line2D at 0x2e5d525b0>,
 <matplotlib.lines.Line2D at 0x2e5d527c0>,
 <matplotlib.lines.Line2D at 0x2e5d528e0>,
 <matplotlib.lines.Line2D at 0x2e5d52940>,
 <matplotlib.lines.Line2D at 0x2e5d52a60>,
 <matplotlib.lines.Line2D at 0x2e5d52c70>,
 <matplotlib.lines.Line2D at 0x2e5d52d90>,
 <matplotli

In [109]:
!pip install pydub

In [110]:
fid = "./datasets/ESC-50-master/audio/1-100032-A-0.wav"
# times between which to extract the wave from
start = 0 # seconds
end = 1 # seconds

def segment(fid:str, chunk:int):
    # file to extract the snippet from
    data = [ ]
    with wave.open(fid, "rb") as infile:
        # get file data
        nchannels = infile.getnchannels()
        sampwidth = infile.getsampwidth()
        framerate = infile.getframerate()
        # set position in wave to start of segment
        for sec in range(4):
            infile.setpos(int(sec * framerate))
            data.append(infile.readframes(chunk * framerate))   
    return data, (nchannels, sampwidth , framerate)
        

In [111]:
data = segment(fid = "./datasets/ESC-50-master/audio/1-100032-A-0.wav",
       chunk=1)

in_paranet = Path('./datasets/ESC-50-master/audio/')
out_data_dir = Path('ESC-50')
out_data_dir.mkdir(exist_ok=True)
all_out = [ ]
for idx, row in df.iterrows():
    out_dir = out_data_dir/row.category
    out_dir.mkdir(exist_ok=True)
    data, vals = segment(fid = str(in_paranet/row.filename),
       chunk=1)
    chans, samp_width , rate = vals
    out_fids = [ ]
    for idx,sound in enumerate(data):
        out_fid = out_dir/f'{idx}_{row.filename}'
        out_fids.append(out_fid)
        with wave.open(str(out_fid), 'w') as outfile:
            outfile.setnchannels(chans)
            outfile.setsampwidth(samp_width)
            outfile.setframerate(rate)
            outfile.setnframes(int(len(sound) /  samp_width))
            outfile.writeframes(sound)
    all_out.append(out_fids)
        
    
    
    

In [112]:
sec_files = np.array(all_out).astype(str)
for files in range(sec_files.shape[-1]):
    df[f'{files}_{files+1}_sec']=sec_files[...,files].astype(str)

In [113]:
df

               filename  fold  target        category  esc10  src_file take  \
0      1-100032-A-0.wav     1       0             dog   True    100032    A   
1     1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   
2     1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A   
3     1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B   
4     1-101296-A-19.wav     1      19    thunderstorm  False    101296    A   
...                 ...   ...     ...             ...    ...       ...  ...   
1995   5-263831-B-6.wav     5       6             hen  False    263831    B   
1996  5-263902-A-36.wav     5      36  vacuum_cleaner  False    263902    A   
1997   5-51149-A-25.wav     5      25       footsteps  False     51149    A   
1998    5-61635-A-8.wav     5       8           sheep  False     61635    A   
1999     5-9032-A-0.wav     5       0             dog   True      9032    A   

                                        0_1_sec  \


In [114]:
df.columns

Index(['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take',
       '0_1_sec', '1_2_sec', '2_3_sec', '3_4_sec'],
      dtype='object')

In [115]:
list(df.columns)

['filename',
 'fold',
 'target',
 'category',
 'esc10',
 'src_file',
 'take',
 '0_1_sec',
 '1_2_sec',
 '2_3_sec',
 '3_4_sec']

In [116]:
columns = list(df.columns)

In [117]:
table = wandb.Table(columns=columns)

In [118]:
import IPython
from pvrecorder import PvRecorder
import wave
import struct
import time
import sounddevice as sd
import numpy as np
import matplotlib.pyplot as plt
import wandb

In [119]:
columns = list(df.columns)

In [120]:
table = wandb.Table(columns=columns)

In [121]:
df[1:2]

            filename  fold  target        category  esc10  src_file take  \
1  1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   

                                     0_1_sec  \
1  ESC-50/chirping_birds/0_1-100038-A-14.wav   

                                     1_2_sec  \
1  ESC-50/chirping_birds/1_1-100038-A-14.wav   

                                     2_3_sec  \
1  ESC-50/chirping_birds/2_1-100038-A-14.wav   

                                     3_4_sec  
1  ESC-50/chirping_birds/3_1-100038-A-14.wav  

In [122]:
df[1:2][0]

In [123]:
df.iloc(1)

<pandas.core.indexing._iLocIndexer at 0x2ea922590>

In [124]:
df(df.iloc(1))

In [125]:
df[df.iloc(1)]

In [126]:
list(df.iloc(1))

[0        1-100032-A-0.wav
 1       1-100038-A-14.wav
 2       1-100210-A-36.wav
 3       1-100210-B-36.wav
 4       1-101296-A-19.wav
               ...        
 1995     5-263831-B-6.wav
 1996    5-263902-A-36.wav
 1997     5-51149-A-25.wav
 1998      5-61635-A-8.wav
 1999       5-9032-A-0.wav
 Name: filename, Length: 2000, dtype: object,
 0       1
 1       1
 2       1
 3       1
 4       1
        ..
 1995    5
 1996    5
 1997    5
 1998    5
 1999    5
 Name: fold, Length: 2000, dtype: int64,
 0        0
 1       14
 2       36
 3       36
 4       19
         ..
 1995     6
 1996    36
 1997    25
 1998     8
 1999     0
 Name: target, Length: 2000, dtype: int64,
 0                  dog
 1       chirping_birds
 2       vacuum_cleaner
 3       vacuum_cleaner
 4         thunderstorm
              ...      
 1995               hen
 1996    vacuum_cleaner
 1997         footsteps
 1998             sheep
 1999               dog
 Name: category, Length: 2000, dtype: object,
 0        

In [127]:
df[1:2].values

array([['1-100038-A-14.wav', 1, 14, 'chirping_birds', False, 100038, 'A',
        'ESC-50/chirping_birds/0_1-100038-A-14.wav',
        'ESC-50/chirping_birds/1_1-100038-A-14.wav',
        'ESC-50/chirping_birds/2_1-100038-A-14.wav',
        'ESC-50/chirping_birds/3_1-100038-A-14.wav']], dtype=object)

In [128]:
df[1:2].values[:-1]

array([], shape=(0, 11), dtype=object)

In [129]:
df[1:2].values[0][:-1]

array(['1-100038-A-14.wav', 1, 14, 'chirping_birds', False, 100038, 'A',
       'ESC-50/chirping_birds/0_1-100038-A-14.wav',
       'ESC-50/chirping_birds/1_1-100038-A-14.wav',
       'ESC-50/chirping_birds/2_1-100038-A-14.wav'], dtype=object)

In [130]:
df[1:2].values[0]

array(['1-100038-A-14.wav', 1, 14, 'chirping_birds', False, 100038, 'A',
       'ESC-50/chirping_birds/0_1-100038-A-14.wav',
       'ESC-50/chirping_birds/1_1-100038-A-14.wav',
       'ESC-50/chirping_birds/2_1-100038-A-14.wav',
       'ESC-50/chirping_birds/3_1-100038-A-14.wav'], dtype=object)

In [131]:
df[1:2].values[0][:-1]

array(['1-100038-A-14.wav', 1, 14, 'chirping_birds', False, 100038, 'A',
       'ESC-50/chirping_birds/0_1-100038-A-14.wav',
       'ESC-50/chirping_birds/1_1-100038-A-14.wav',
       'ESC-50/chirping_birds/2_1-100038-A-14.wav'], dtype=object)

In [132]:
df[1:2].values[0][-1]

'ESC-50/chirping_birds/3_1-100038-A-14.wav'

In [133]:
wandb.Audio(df[1:2].values[0][-1])

<wandb.data_types.Audio at 0x2eb7d19a0>

In [134]:
len(columns)

11

In [135]:
df

               filename  fold  target        category  esc10  src_file take  \
0      1-100032-A-0.wav     1       0             dog   True    100032    A   
1     1-100038-A-14.wav     1      14  chirping_birds  False    100038    A   
2     1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A   
3     1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B   
4     1-101296-A-19.wav     1      19    thunderstorm  False    101296    A   
...                 ...   ...     ...             ...    ...       ...  ...   
1995   5-263831-B-6.wav     5       6             hen  False    263831    B   
1996  5-263902-A-36.wav     5      36  vacuum_cleaner  False    263902    A   
1997   5-51149-A-25.wav     5      25       footsteps  False     51149    A   
1998    5-61635-A-8.wav     5       8           sheep  False     61635    A   
1999     5-9032-A-0.wav     5       0             dog   True      9032    A   

                                        0_1_sec  \


In [136]:
table = wandb.Table(data=df,columns=columns)

In [137]:
df.columns

Index(['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take',
       '0_1_sec', '1_2_sec', '2_3_sec', '3_4_sec'],
      dtype='object')

In [138]:
df.columns[-4:]

Index(['0_1_sec', '1_2_sec', '2_3_sec', '3_4_sec'], dtype='object')

In [139]:
for col in df.columns[-4:]:
    df[col]

In [140]:
for col in df.columns[-4:]:
    df[col].values
    

In [141]:
for col in df.columns[-4:]:
    sounds = [wandb.Audio(fid) for fid df[col].values]
    table.add_column(name=f'sound_{col}',data=sounds)

In [142]:
for col in df.columns[-4:]:
    sounds = [wandb.Audio(fid) for fid in df[col].values]
    table.add_column(name=f'sound_{col}',data=sounds)

In [143]:
run = wandb.init(entity='tiny-ml',project = 'wake_word')

In [144]:
run.log({'all_sound_table':table})

In [145]:
run.finish()