In [1]:
# Подключение библиотек
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

import keras
from keras import models
from keras import layers

import warnings
warnings.filterwarnings('ignore')

In [5]:
# Задание шапки таблицы данных для обучения
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [7]:
# Извлечение функций
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'angry chilled happy neutral sad'.split()
for g in genres:
    for filename in os.listdir(f'C:\\Users\\user\\Desktop\\Train\\{g}'):
        songname = f'C:\\Users\\user\\Desktop\\Train\\{g}\\{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse=librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'        
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [2]:
# Преобразование полученного файла функций
data = pd.read_csv('data.csv').reset_index()
data = data.drop(['index'],axis=1)
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,001.wav,0.44932,0.061361,3079.059976,2693.275778,6049.869999,0.229912,-343.794189,30.688906,-15.840018,...,-2.452909,-4.215964,-0.071347,-3.41641,-5.418313,-2.700627,1.247998,0.751982,4.828207,angry
1,002.wav,0.50607,0.057922,2367.790483,2384.544363,4229.604727,0.131504,-366.752838,28.327007,-18.157299,...,-5.445252,-0.062126,0.066857,-6.336366,1.053317,-7.168151,-1.407118,1.191806,2.260072,angry
2,003.wav,0.432045,0.023526,2591.701724,2619.853698,5499.470355,0.163465,-474.65863,34.974876,-7.841065,...,-2.695741,-3.05449,-1.559706,-3.396726,-1.685482,-0.118342,1.202329,1.822596,7.559172,angry
3,004.wav,0.482492,0.058055,2525.448793,2413.145172,4952.198543,0.136912,-349.897095,43.33046,-18.259544,...,0.638473,-2.003618,2.220423,-5.797532,3.32122,-0.82945,2.31482,5.49183,8.821283,angry
4,005.wav,0.388032,0.058289,3115.824434,2492.394799,5793.514316,0.182514,-388.422485,34.102715,-20.510458,...,-5.72095,-7.264971,-5.371935,-2.434712,-7.158056,-0.002204,2.57134,-0.628871,3.901053,angry


In [3]:
# Преобразование данных для обучения
scaler = StandardScaler()
X = scaler.fit_transform(data.drop(['filename', 'label'],axis=1).values)

In [4]:
X

array([[-0.40763205,  3.80280416,  0.66277582, ...,  0.64427888,
         0.90513809,  1.59496332],
       [ 0.4640073 ,  3.54989011, -1.36836336, ..., -0.22661084,
         1.04311311,  0.8381416 ],
       [-0.6729604 ,  1.02057738, -0.72895045, ...,  0.62929918,
         1.24099467,  2.39977089],
       ...,
       [ 1.00352492, -0.56213561,  0.57679699, ..., -0.54916428,
         0.05821169,  0.23895281],
       [ 1.07467689, -0.55117232,  0.79667249, ..., -1.17219894,
        -0.39330505,  0.01063524],
       [-2.3324741 , -0.50044242, -2.2444935 , ..., -0.39164112,
        -1.10656368, -0.48902932]])

In [5]:
# Составление вектора данных с ярлыками возможных категорий данных
encoder = LabelEncoder()
y = encoder.fit_transform(data['label'])

In [6]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [7]:
# Разделение данных на выборки для обучения
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
# Архитектура модели

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
# model.add(layers.Dropout(0.5))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               13824     
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 64)                8256      
                                                                 
 dense_4 (Dense)             (None, 10)                650       
                                                                 
Total params: 186,954
Trainable params: 186,954
Non-trainable params: 0
_________________________________________________________________


In [9]:
# Точность модели перед тренировкой
score = model.evaluate(X_train, y_train, verbose=0)
accuracy = 100*score[1]
print(accuracy)

12.66375482082367


In [10]:
history = model.fit(X_train, y_train, epochs=30, batch_size=128, validation_data=(X_test, y_test), verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [11]:
score = model.evaluate(X_train, y_train, verbose=0)
print("Точность обучения: {0:.2%}".format(score[1]))
score = model.evaluate(X_test, y_test, verbose=0)
print("Точность тестирования: {0:.2%}".format(score[1]))

Точность обучения: 89.37%
Точность тестирования: 65.12%


In [12]:
# Создание файла с результами обучения на исходных данных

In [14]:
# Задание шапки таблицы данных для обучения
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [15]:
# Извлечение функций
file = open('data_test_lr2.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'angry chilled happy neutral sad'.split()
for g in genres:
    for filename in os.listdir(f'C:\\Users\\user\\Desktop\\Train_test\\{g}'):
        songname = f'C:\\Users\\user\\Desktop\\Train_test\\{g}\\{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse=librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'        
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data_test_lr2.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [17]:
# Преобразование полученного файла функций
data_test = pd.read_csv('data_test_lr2.csv').reset_index()
data_test = data_test.drop(['index'],axis=1)
data_test.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,001.wav,0.44932,0.061361,3079.059976,2693.275778,6049.869999,0.229912,-343.794189,30.688906,-15.840018,...,-2.452909,-4.215964,-0.071347,-3.41641,-5.418313,-2.700627,1.247998,0.751982,4.828207,angry
1,002.wav,0.50607,0.057922,2367.790483,2384.544363,4229.604727,0.131504,-366.752838,28.327007,-18.157299,...,-5.445252,-0.062126,0.066857,-6.336366,1.053317,-7.168151,-1.407118,1.191806,2.260072,angry
2,003.wav,0.432045,0.023526,2591.701724,2619.853698,5499.470355,0.163465,-474.65863,34.974876,-7.841065,...,-2.695741,-3.05449,-1.559706,-3.396726,-1.685482,-0.118342,1.202329,1.822596,7.559172,angry
3,004.wav,0.482492,0.058055,2525.448793,2413.145172,4952.198543,0.136912,-349.897095,43.33046,-18.259544,...,0.638473,-2.003618,2.220423,-5.797532,3.32122,-0.82945,2.31482,5.49183,8.821283,angry
4,005.wav,0.388032,0.058289,3115.824434,2492.394799,5793.514316,0.182514,-388.422485,34.102715,-20.510458,...,-5.72095,-7.264971,-5.371935,-2.434712,-7.158056,-0.002204,2.57134,-0.628871,3.901053,angry


In [18]:
# Преобразование данных для обучения
scaler = StandardScaler()
X_test = scaler.fit_transform(data_test.drop(['filename', 'label'],axis=1).values)

In [19]:
X_test

array([[-0.40763205,  3.80280416,  0.66277582, ...,  0.64427888,
         0.90513809,  1.59496332],
       [ 0.4640073 ,  3.54989011, -1.36836336, ..., -0.22661084,
         1.04311311,  0.8381416 ],
       [-0.6729604 ,  1.02057738, -0.72895045, ...,  0.62929918,
         1.24099467,  2.39977089],
       ...,
       [ 1.00352492, -0.56213561,  0.57679699, ..., -0.54916428,
         0.05821169,  0.23895281],
       [ 1.07467689, -0.55117232,  0.79667249, ..., -1.17219894,
        -0.39330505,  0.01063524],
       [-2.3324741 , -0.50044242, -2.2444935 , ..., -0.39164112,
        -1.10656368, -0.48902932]])

In [20]:
# Предсказанные метки классов
predictions=model.predict(X_test) 
predict_classes=np.argmax(predictions,axis=1)
print(predict_classes)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0
 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 4 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 3 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 3 1 3 1 1 3 1 1 3 1 1 1 3 3 3 1 3 1 3 1 1 1 1 1 1 3
 3 1 1 1 1 3 1 3 1 1 1 3 1 3 1 1 1 1 1 1 1 3 1 3 1 3 1 1 1 1 3 1 1 1 1 1 3
 1 1 1 1 1 3 1 1 1 3 1 1 1 3 1 3 1 1 1 1 3 3 3 1 0 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 

In [21]:
# Предсказанный скор
def predict_prob(number):
    return np.sum(number[0])

predict_score = np.array(list(map(predict_prob, predictions)))
print(predict_score) 

[9.99993801e-01 9.99510527e-01 9.99863029e-01 9.99998808e-01
 9.98272896e-01 9.64488268e-01 9.99999881e-01 9.99987602e-01
 9.99976873e-01 9.99496460e-01 9.99999881e-01 9.95965600e-01
 9.99981403e-01 9.99713600e-01 9.99988794e-01 9.99996185e-01
 9.99020219e-01 9.99998450e-01 9.99988317e-01 1.00000000e+00
 9.99789298e-01 9.97175813e-01 1.00000000e+00 9.99999881e-01
 9.99762118e-01 9.99953151e-01 9.99996662e-01 9.99997139e-01
 9.99981642e-01 9.99622464e-01 9.99967217e-01 9.99534845e-01
 9.99787867e-01 9.99990344e-01 9.99758661e-01 9.99512672e-01
 9.99995112e-01 9.99979615e-01 1.00000000e+00 9.99999523e-01
 9.97333407e-01 9.58524764e-01 9.99946952e-01 9.89480019e-01
 9.69066978e-01 9.99911666e-01 9.99995232e-01 9.99999523e-01
 9.99987006e-01 9.99974370e-01 9.99833226e-01 9.99997377e-01
 9.99889731e-01 9.99790728e-01 9.96166468e-01 9.99997258e-01
 9.99999881e-01 9.99961019e-01 9.99954462e-01 9.97690439e-01
 9.99987006e-01 9.99983549e-01 9.99964356e-01 9.99935627e-01
 9.99996662e-01 9.999620

In [22]:
# Формирование данных в необходимом формате
data_test['predict_classes']=predict_classes
data_test['predict_score']=predict_score

result=data_test[['filename','label', 'predict_score', 'predict_classes']]

In [23]:
result.head()

Unnamed: 0,filename,label,predict_score,predict_classes
0,001.wav,angry,0.999994,0
1,002.wav,angry,0.999511,0
2,003.wav,angry,0.999863,0
3,004.wav,angry,0.999999,0
4,005.wav,angry,0.998273,0


In [24]:
mapping = {0:'angry', 1:'chilled', 2:'happy', 3:'neutral', 4:'sad'}
result['predict_classes'] = result['predict_classes'].map(mapping)

In [25]:
result.head()

Unnamed: 0,filename,label,predict_score,predict_classes
0,001.wav,angry,0.999994,angry
1,002.wav,angry,0.999511,angry
2,003.wav,angry,0.999863,angry
3,004.wav,angry,0.999999,angry
4,005.wav,angry,0.998273,angry


In [26]:
result.to_csv('result_lr2.csv')