In [1]:
import numpy as np
import pandas as pd
import librosa
import os
from tqdm.notebook import tqdm

Преобразовываем аудиозаписи следующим образом:

Cначала вычисляем их спектрограммы. Каждая представляет собой матицу 64х(длина записи).

Затем для каждого из 64 признаков считаем его агрегированные статистики по всей записи: среднее, дисперсию, медиану и максимальное значение.

Также, считаем длину записи как отдельный признак.

Итого получаем 257 признаков.

In [2]:
def count_aggregates(x):
    statfuns = [np.mean, np.var, np.median, np.max]
    return np.concatenate([np.asarray([x.shape[1]])]+[fun(x, axis=1) for fun in statfuns])

def make_aggregate_dataset(folder):
    data = []
    index = list(map(lambda x: x[:-4], filter(lambda x: x[-4:] == '.wav', os.listdir(folder))))
    for name in tqdm(index):
        X, sr = librosa.load(f'{folder}/{name}.wav')
        ms = librosa.feature.melspectrogram(y=X, sr=sr, n_mels=64)
        data.append(count_aggregates(ms))
    data = pd.DataFrame(np.asarray(data))
    data['name'] = index
    data = data.set_index('name')
    return data

In [3]:
train_data = make_aggregate_dataset('train')
train_data

  0%|          | 0/13936 [00:00<?, ?it/s]

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,247,248,249,250,251,252,253,254,255,256
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
c539d24c9e9217f096fafa7e965a2130,155.0,0.003569,0.009412,0.248963,1.189497,1.991287,0.783827,1.873589,5.113715,2.858875,...,0.278252,0.216057,0.124924,0.007460,2.515192e-07,6.419239e-08,5.490138e-08,4.916784e-08,4.556300e-08,4.352738e-08
5d3e96c9f3426d2644c415bbbe8bac5e,276.0,0.018508,0.354308,0.433853,0.159767,0.466421,0.572810,0.360204,0.373809,0.191825,...,0.013532,0.011367,0.007838,0.000310,4.128188e-07,5.281182e-09,4.304997e-09,3.719303e-09,3.360143e-09,3.160844e-09
45564cc479a7532f5c6f882b47a5625f,263.0,0.004545,0.016298,2.921972,2.060477,0.460327,0.672636,1.558972,1.104020,1.342315,...,0.368393,0.276750,0.080671,0.003780,6.744531e-07,2.006724e-08,1.734142e-08,1.564368e-08,1.456778e-08,1.395694e-08
10686c5d46d33fb5c67a7746c003bca5,276.0,0.003432,0.001623,0.002987,0.152193,0.362804,0.073334,0.002451,0.020032,0.037054,...,0.020139,0.051038,0.052316,0.011558,5.682839e-06,1.321561e-10,1.006587e-10,8.267059e-11,7.199730e-11,6.619347e-11
af87cc25b5e7f6d2eb02a7f0e2ae27bc,163.0,0.003128,1.226130,1.790056,0.308300,0.429498,0.399735,0.689673,1.180185,0.831539,...,0.001748,0.003324,0.003075,0.000186,1.230124e-07,3.720133e-09,3.077354e-09,2.686848e-09,2.446150e-09,2.312184e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
d9bd130fb3f2194dfdcd8330fe93466a,303.0,0.000522,0.002441,0.014984,0.178721,0.269521,0.154428,0.221718,0.542099,0.303091,...,0.627987,0.227311,0.012624,0.001978,1.173059e-06,3.970491e-11,3.458816e-11,3.143040e-11,2.942500e-11,2.828351e-11
91bb2e83a8796f8edc9fdd3cb7de5900,261.0,0.058988,1.137335,0.199752,1.605771,0.280331,0.489685,0.246574,0.466328,0.066109,...,0.002452,0.004804,0.001098,0.000047,8.681123e-08,2.524847e-08,2.081440e-08,1.818730e-08,1.658141e-08,1.569022e-08
aa7db97159ad9daf2e9737ee2108f8b5,193.0,0.003513,0.007401,0.362313,1.328883,0.443982,0.295911,1.514980,1.827430,0.323049,...,0.112336,0.007006,0.002448,0.000188,4.520689e-07,1.413509e-10,1.094980e-10,9.091615e-11,7.982612e-11,7.378891e-11
1c9afe5cee82f1144f415b7b3787b6dc,336.0,0.007588,0.001390,0.175199,0.589421,0.265479,0.022933,0.221303,0.353021,0.087246,...,0.012377,0.022215,0.004733,0.000999,4.078461e-07,1.281971e-09,1.137568e-09,1.043595e-09,9.822712e-10,9.468503e-10


In [4]:
test_data = make_aggregate_dataset('test')
test_data

  0%|          | 0/3413 [00:00<?, ?it/s]

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,247,248,249,250,251,252,253,254,255,256
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
515ebe278b1248ba2607e4ea84a27b87,283.0,0.008414,0.056326,0.055790,0.139059,0.102118,0.092215,0.429970,0.274850,0.070965,...,0.040358,0.017069,0.007135,0.000414,9.706617e-08,1.911226e-08,1.667526e-08,1.515681e-08,1.419162e-08,1.364219e-08
1da5cca4bf0df6d234a00ac9cbb797d4,190.0,0.010759,0.018247,0.122245,0.626290,0.625021,0.303279,0.296530,0.246423,0.330745,...,0.012122,0.015518,0.009234,0.000475,5.419678e-08,7.805895e-09,7.084507e-09,6.590844e-09,6.259669e-09,6.065302e-09
91ee09100086a342fc9e49e1da284f06,238.0,0.005794,0.009558,0.265105,2.651803,2.522284,0.501943,0.205158,1.253264,1.521439,...,0.128384,0.073514,0.020125,0.001249,4.029231e-07,1.983162e-08,1.692761e-08,1.515190e-08,1.403972e-08,1.341266e-08
780697c1d8fb971efe14b308256a6016,295.0,0.002646,0.003515,0.041386,1.817238,2.971652,0.134421,0.021950,0.082355,0.167210,...,0.222773,0.088273,0.040419,0.004061,1.508368e-06,5.361935e-09,4.604611e-09,4.138920e-09,3.846286e-09,3.680962e-09
93ea971c23c08c29e98ccfce9b15dee3,310.0,0.051773,0.797993,0.567108,0.935295,0.497550,0.372959,0.446828,0.330738,0.695834,...,0.024364,0.028542,0.011256,0.001556,1.093401e-06,6.126168e-09,5.358080e-09,4.868147e-09,4.553436e-09,4.373406e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
d33c8f04ec537f1a04829a75f3f9a34a,261.0,0.001387,0.004454,0.412675,4.074273,2.006324,1.052827,0.385075,0.890793,0.170163,...,0.053114,0.078203,0.111935,0.005705,1.435712e-06,2.031740e-11,9.978140e-12,5.021210e-12,2.446409e-12,1.166463e-12
12a243ae5afaff66a00b21a6c2b3696e,309.0,0.001633,0.005053,0.344950,1.620214,0.830081,0.375926,0.245675,1.090751,0.816431,...,0.150669,0.061406,0.015269,0.001122,7.164182e-07,4.801139e-09,4.187691e-09,3.801891e-09,3.555644e-09,3.415202e-09
8b52dcb76415f3dd331cbf7cdb056d5c,304.0,0.008507,0.296859,1.078270,0.339759,0.258624,0.750191,0.875059,0.157014,0.188050,...,0.014890,0.011933,0.027728,0.004260,1.100334e-06,1.371877e-10,1.043542e-10,8.603641e-11,7.534507e-11,6.958963e-11
f59acd9ee5152c9f209c49e2fde423ad,157.0,0.001473,0.004658,0.121798,0.554223,1.313624,0.394440,0.176449,0.350410,2.696216,...,0.060091,0.023301,0.013430,0.001384,2.795362e-07,1.430362e-10,1.196741e-10,1.067693e-10,9.912227e-11,9.493316e-11


In [5]:
train_data.to_csv('train_data.csv', header=False)
test_data.to_csv('test_data.csv', header=False)