## Dataset downsampling and format
### To test:
1. Compare size of ogg and mp3 files
2. Check how quickly ogg and mp3 are opened (decompressed)
3. Downsampling to 32 kHz (compare size)
4. torch.load() vs librosa.load()

### Why?
1. ogg is used in all birdclef competition on kaggle
2. ogg is promising to be smaller while not loosing quality



## Sample 100 recordings

In [1]:
from birdclassification.preprocessing.filtering import initial_filter
import shutil

SOURCE = '/media/jacek/E753-A120/xeno-canto/'
SRC = '/home/jacek/Documents/mp3/'

In [3]:
df = initial_filter()
sample = df.sample(n = 100, random_state=123)
sample['filepath'] = df.apply(lambda x: f"{SOURCE}{str(x['id'])}.mp3" , axis=1)
sample

  df = pd.read_csv('../data/xeno_canto_recordings.csv')


Unnamed: 0.1,Unnamed: 0,id,gen,sp,ssp,group,en,rec,cnt,loc,...,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Latin name,filepath
134932,134932,708143,Alauda,arvensis,,birds,Eurasian Skylark,Georg Amann,Austria,"Rankweil, Feldkirch, Vorarlberg",...,,,,,,,,,Alauda arvensis,/media/jacek/E753-A120/xeno-canto/708143.mp3
89233,89233,619297,Jynx,torquilla,torquilla,birds,Eurasian Wryneck,Karol Łanocha,Poland,"Gmina Kęty (near Kęty), Powiat oświęcimski, M...",...,,,,,,,,,Jynx torquilla,/media/jacek/E753-A120/xeno-canto/619297.mp3
190242,190242,391049,Sitta,europaea,,birds,Eurasian Nuthatch,Lars Edenius,Sweden,"Baggböle, Umeå, Västerbottens län",...,,,,,,,,,Sitta europaea,/media/jacek/E753-A120/xeno-canto/391049.mp3
70153,70153,701523,Nycticorax,nycticorax,,birds,Black-crowned Night Heron,Dominique Guillerme,France,"Chassey, Côte-d'Or, Bourgogne-Franche-Comté",...,,,,,,,,,Nycticorax nycticorax,/media/jacek/E753-A120/xeno-canto/701523.mp3
7051,7051,590644,Bucephala,clangula,,birds,Common Goldeneye,Simon Elliott,United Kingdom,"Highland, Scotland",...,,,,,,,,,Bucephala clangula,/media/jacek/E753-A120/xeno-canto/590644.mp3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171045,171045,564408,Aegithalos,caudatus,,birds,Long-tailed Tit,Falco,France,"Montgardin, Hautes-Alpes, Provence-Alpes-Côte ...",...,,,,,,,,,Aegithalos caudatus,/media/jacek/E753-A120/xeno-canto/564408.mp3
107631,107631,87701,Coloeus,monedula,,birds,Western Jackdaw,Volker Arnold,Germany,"Heide-Ruesdorf, Dithmarschen, Schleswig-Holstein",...,44100,,,,,,,,Coloeus monedula,/media/jacek/E753-A120/xeno-canto/87701.mp3
242467,242467,284310,Anthus,pratensis,,birds,Meadow Pipit,Joost van Bruggen,Netherlands,"Arnhem, Arnhem, Gelderland",...,,,,,,,,,Anthus pratensis,/media/jacek/E753-A120/xeno-canto/284310.mp3
10809,10809,183095,Anas,platyrhynchos,,birds,Mallard,Albert Lastukhin,Kazakhstan,"Shieli District, Kyzylorda Province",...,,,,,,,,,Anas platyrhynchos,/media/jacek/E753-A120/xeno-canto/183095.mp3


## Copy sample of 100 files to a folder

In [None]:
for filepath in sample['filepath']:
    shutil.copy(filepath, SRC + filepath.split(sep = '/')[-1])

## Conversion

In [None]:
import os
from pydub import AudioSegment
from pydub.utils import mediainfo

SRC = '/home/jacek/Documents/format_test/source'
DST = '/home/jacek/Documents/format_test/ogg_same_bitrate_32khz'

files = os.listdir(SRC)
for file in files:
    name = file.split(".")[0]
    if name != '':
        try:
            sound = AudioSegment.from_file(f"{SRC}/{name}.mp3")
            sound = sound.set_frame_rate(32000)
            original_bitrate = mediainfo(f"{SRC}/{name}.mp3")['bit_rate']
            sound.export(f"{DST}/{name}.ogg", format="ogg", bitrate=original_bitrate)
        except Exception as error:
            print("An exception occurred:", error)
            print(name)
    

#### source directory size: 224,4MB

#### Same bitrate 
1. original dir -> mp3 with the same bitrate: 2m 9s, 191,3 MB
2. original dir -> ogg with the same bitrate: 1m 56s,  159,6MB  (9 files empty)

#### Resampled to 32khz (no lock on bitrate)
1. original dir -> mp3_32khz : 1m 44s 72,3MB (it lowers bitrate, usually 42kbps)
2. original dir -> ogg_32khz: 1m 39s  71,2 MB (lowers usually to 112 or 76kbps)

#### Same bitrate and resampled to 32khz
1. original dir -> mp3   1m 56s 191,4MB
2. original dir -> ogg   1,28s  145,4MB (12 files empty)

## Result: ogg compress files better (~85% of mp3 file in the same bitrate) 


## Compare load(), librosa vs torch

In [4]:
import librosa
import os
import torchaudio
from birdclassification.preprocessing.utils import timer

@timer
def read_librosa(dir):
    file_list = os.listdir(dir)
    for file in file_list:
        filepath = dir + file
        waveform, sample_rate = librosa.load(filepath)

@timer
def read_torch(dir):
    file_list = os.listdir(dir)
    for file in file_list:
        filepath = dir + file
        waveform, sample_rate = torchaudio.load(filepath)

In [8]:
dir = '/home/jacek/Documents/format_test/source/'
read_librosa(dir)
print("------------------------------------------")
read_torch(dir)

Note: Illegal Audio-MPEG-Header 0x494f4e00 at offset 397500.
Note: Trying to resync...
Note: Hit end of (available) data during resync.
Note: Illegal Audio-MPEG-Header 0x52450050 at offset 1271767.
Note: Trying to resync...
Note: Hit end of (available) data during resync.


Function: 'read_librosa', execution time: 13.58786392211914
----------------
Function: 'read_torch', execution time: 9.357446193695068


In [9]:
dir = '/home/jacek/Documents/format_test/mp3_32khz/'
read_librosa(dir)
print("------------------------------------------")
read_torch(dir)

Function: 'read_librosa', execution time: 10.40324091911316
------------------------------------------
Function: 'read_torch', execution time: 6.297319650650024


In [10]:
dir = '/home/jacek/Documents/format_test/mp3_same_bitrate/'
read_librosa(dir)
print("------------------------------------------")
read_torch(dir)

Function: 'read_librosa', execution time: 14.751846313476562
------------------------------------------
Function: 'read_torch', execution time: 10.022835493087769


In [11]:
dir = '/home/jacek/Documents/format_test/mp3_same_bitrate_32khz/'
read_librosa(dir)
print("------------------------------------------")
read_torch(dir)

Function: 'read_librosa', execution time: 12.236756324768066
------------------------------------------
Function: 'read_torch', execution time: 7.645618438720703


In [13]:
dir = '/home/jacek/Documents/format_test/ogg_32khz/'
read_librosa(dir)
print("------------------------------------------")
read_torch(dir)

Function: 'read_librosa', execution time: 14.018277645111084
------------------------------------------
Function: 'read_torch', execution time: 6.054214000701904


## Test load and resample - torch

In [52]:
@timer
def load_resample_convert_ogg_torch(dir, save_dir):
    file_list = os.listdir(dir)
    for file in file_list:
        filepath = dir + file
        audio, sampling_rate = torchaudio.load(filepath)
        torchaudio.save(save_dir + file, audio, sample_rate = 32000, compression = 5)
        
source= '/home/jacek/Documents/format_test/source/'
save_dir = '/home/jacek/Documents/format_test/ogg_same_bitrate_32khz_16bits_c5/'
load_resample_convert_ogg_torch(source, save_dir)

TypeError: get_save_func.<locals>.save() got an unexpected keyword argument 'compression'

### ogg 32khz 124s
### ogg 32khz 16 bits 112s

In [50]:
source= '/home/jacek/Documents/format_test/source/'
save_dir = '/home/jacek/Documents/format_test/ogg_same_bitrate_32khz_16bits/'

file_list = os.listdir(dir)
for file in file_list:
    filepath = dir + file
    print(torchaudio.info(filepath))

AudioMetaData(sample_rate=32000, num_frames=2492352, num_channels=1, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=2268800, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=2195904, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=2849664, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=4029120, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=2019264, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=1836864, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=437056, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=7955648, num_channels=2, bits_per_sample=0, encoding=VORBIS)
AudioMetaData(sample_rate=32000, num_frames=142016, num_