In [1]:
import numpy as np
import os
from os import listdir
import librosa as lb
import pandas as pd
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
if os.path.exists('data')==False:
    os.mkdir('data')

## Making a Experiment Folder

In [33]:
exp = 'exp-9'

In [34]:
if os.path.exists(os.path.join('data',exp))==False:
    os.mkdir(os.path.join('data',exp))
else:
    print('Folder is exists')

In [35]:
dataset = 'dataset/Merge-Data/'

## Extracting Coswara Data Feature

In [36]:
df_coswara = pd.read_csv('csv_files/normalized_data/Coswara_dataset.csv')
df_coswara.head()

Unnamed: 0,filename,label
0,ir3jqGSKT2ManVTAAzj6Ew7orBD2_heavy.wav,positive
1,ZzKpUtFGzsVEK7E1wsVGRftKdx93.wav,positive
2,TkOj2GRygbe2J9HxfMWk02A4e7v1.wav,positive
3,ottC8aetxhRQfKD00RCJc5LWS622_heavy.wav,positive
4,IFMid2LmeCTSQAFaL8wTSUg20No2.wav,positive


In [None]:
img_coswara = []
label_coswara = []
filename_coswara = []

to_hot_one = {'positive':0, 'negative':1}

for index,row in df_coswara.iterrows():
    name = row['filename']
    label = row['label']
    filename = os.path.join(dataset,name)
    scaler = MinMaxScaler()
    
    try:
        print(name, label)
        sound, sample_rate = lb.load(filename, sr=None)
        sound = lb.util.pad_center(sound, 30*sample_rate)
        stft = np.abs(lb.stft(sound))

        mfccs = np.mean(lb.feature.mfcc(y=sound, sr=sample_rate, n_mfcc=40),axis=1)
        mfccs_norm = scaler.fit_transform(np.reshape(mfccs,[mfccs.shape[0],1]))

        chroma = np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate),axis=1)
        chroma_norm = scaler.fit_transform(np.reshape(chroma,[chroma.shape[0],1]))

        mel = np.mean(lb.feature.melspectrogram(sound, sr=sample_rate),axis=1)
        mel_norm = scaler.fit_transform(np.reshape(mel,[mel.shape[0],1]))

        contrast = np.mean(lb.feature.spectral_contrast(S=stft, sr=sample_rate),axis=1)
        contrast_norm = scaler.fit_transform(np.reshape(contrast,[contrast.shape[0],1]))

        tonnetz = np.mean(lb.feature.tonnetz(y=lb.effects.harmonic(sound), sr=sample_rate),axis=1)
        tonnetz_norm = scaler.fit_transform(np.reshape(tonnetz,[tonnetz.shape[0],1]))

        concat = np.concatenate((mfccs,chroma,mel,contrast,tonnetz))

        img_coswara.append(concat)
        label_coswara.append(to_hot_one[label])
        filename_coswara.append(name)
    except:
        None

ir3jqGSKT2ManVTAAzj6Ew7orBD2_heavy.wav positive
ZzKpUtFGzsVEK7E1wsVGRftKdx93.wav positive
TkOj2GRygbe2J9HxfMWk02A4e7v1.wav positive
ottC8aetxhRQfKD00RCJc5LWS622_heavy.wav positive
IFMid2LmeCTSQAFaL8wTSUg20No2.wav positive
xYCukI9rgMeslwjIYWheFtyZ6zk1_heavy.wav positive
Iz9X0QRimVT2R7AGhD6jZM0ryPp1.wav positive
CGXh77ubexNuv0UUmOHVCnAgurY2.wav positive
BDygu23lyUbBq2NlqfzRxGoMm9B2_heavy.wav positive
cd57GKgW74bBSqmusnCkc8bXujs2.wav positive
2lsppspH44WH3Scc1ojisHaXJzp1_heavy.wav positive
Hge7rEPBdLcy4ckCtvUFfl4p0Ne2_heavy.wav positive
EUjlCZyR0xMbOH31Je8knI3UdKO2.wav positive
LF7wSEDpWafkz5bwtxnFuho9iWz1.wav positive
y0TkG5sfyhadW3VSct50RTl3HN02.wav positive
t5wo2qvXdBehDj8JuF9pGWbb3Rt1.wav positive
rB4ewmXSc9P5IPhLffnBasjYU1s2.wav positive
h4T9i8JwCEbfw4hzNowalt393Wp1_heavy.wav positive
rAWA4P9I7iYPCIvEheITe8FCRKP2_heavy.wav positive
BRVYzpZ9X1TGGLnjYEHIn3VEJ5j1_heavy.wav positive
lVHnQ9UNkAYmjNm7wL885LzpsOk2_heavy.wav positive
pWFMPFBys1bBerYz5Si4Gb8brGn1.wav positive
Z8ulmLPAsmhYfYuq



vXpopcJJEugOInwUtJkEnHgoVhk1.wav positive
IgwjLDmZRyTv03Mq7VdUYL6O5fV2.wav positive
Fh3hBZZRh2UXXicTNGMei8ikbSu2_heavy.wav positive
uRtWGtWi3eVHJ0JhF0ycETkX0ms2.wav positive
4qLi2eWkYGasKhmmPPiyFAGUqBr2_heavy.wav positive
xyNunpsL01N3hkRYCo7pfIkSDgf2_heavy.wav positive
x8PvE190nSZUtv8raWxfg76ek6p2_heavy.wav positive
gpreuyMvtSa8HJhGaliJ6E1Hwx53_heavy.wav positive
tdqx6pwgAaeiofZLQHT82VZcUf33.wav positive
ptuwF1pj4ibIoVuZmt4nKb5naiZ2_heavy.wav positive
0pqMnxeSybfQnGV9nHW3P69FSzE3_heavy.wav positive
W9VOaBOebvQ2uxduqV8wZF4ue572.wav positive
RtRqhfGUUffisj4mpxA9nTYe2cw2.wav positive
YE3Hs5yD9BeIzT8cctZEBKGJIXN2.wav positive




HATVkqrdXhV2eIiwy5iIP7V0bjo2_heavy.wav positive
5SHNxnq02ygR1L7LRTR3gASXpHb2_heavy.wav positive
8PvWScAB6UbnfrkQrpbqLpu7O0z1_heavy.wav positive
RUnaVFVJiLh22WD0dBl3TeJgmMI3_heavy.wav positive
H9aG9NV6Wae3ATv7xapsyAYdIJy2.wav positive
AlM8enXWaFZ96vN2KM98agcXJvx2.wav positive
euld60aqc2OtnNoKYx9P4WDzVM92.wav positive
iYM9uJMyO1UoXTo1tc7rXvsI4bm1_heavy.wav positive
3qA9Ym3caTbMsegKE0yUvLdapHi1_heavy.wav positive
W9VOaBOebvQ2uxduqV8wZF4ue572_heavy.wav positive
Tvdak0zflIVWdvWsNj7l9Ur0k3m1.wav positive
gxbBHDQWiSWdAnKyqT0BMuH9T2k1.wav positive
0Js6ZUZQ9NUnu568Fh7B6mZ1R8o1.wav positive




UhYCvhOCn2erbzJc5faN4ek3D1I2.wav positive
tDjXrO1dI4O35JJXPL3ihLvnAzl1.wav positive
6aOGyUJRIcXSEWNRCkzUodJimux1.wav positive
xgdd8l7OfrMOkvrskW5s6fxk3mg2.wav positive
Y635Ri3QFMcOKC0ckXgW2r0xJ422_heavy.wav positive
lQ1cBTWdazLLa7xWAiSCOmZ709s1_heavy.wav positive
OCFaQR2Fa4S8cFiEJE2HueZH0TH3_heavy.wav positive
iYwmYc9CdlSuzqGwIlXNWI6eFpm1.wav positive
3CwioNQVDBQ6CttLyFVRJpMpVHk2.wav positive
8fdUOI5UtkZBCVFXNT1a4hl3rX33_heavy.wav positive
MBFsLtO6sIf2SGCEfNT2K91gLcy1_heavy.wav positive
xH2X6KXzfefgZFURY27gKT30ysR2_heavy.wav positive
uAEOVPr487bFnks4dvytsxJQZP02_heavy.wav positive
AVbhXf3KiUf9ZmkjvWzDOxevAPH2.wav positive
pi4VnHGXBRXiKcAd1ukYE0d7As42_heavy.wav positive
iS29Ewab4XQHypUcZLygL7AYH6J2_heavy.wav positive
s1x2Ze9iVZNf6oFZDAZ4S9x3Cky2_heavy.wav positive
v4G9xCIR9DdWAiFRSNd4XbwPUP32.wav positive
1aPZz1jeoBcw9B046d504mn4cq82.wav positive
2vH4QMrMaMMm5oZYScY0b36Z8MC3.wav positive
GoGf76VxSgOiEBDUkcP7LONacII2_heavy.wav positive
AlM8enXWaFZ96vN2KM98agcXJvx2_heavy.wav positive
rCsX



EUjlCZyR0xMbOH31Je8knI3UdKO2_heavy.wav positive
QwS7dzEJ48f6hh4eQny6ywozazg2_heavy.wav positive
fX1IzMBBKwXISMTgnZUFQa6o5Rh2.wav positive
fWuphwJwitW9dSX0gNJBZX7YnN52_heavy.wav positive
uwgjET5xm8NHkfQt7LsUpV0xC1W2.wav positive
CGXh77ubexNuv0UUmOHVCnAgurY2_heavy.wav positive
q9AJkEfjEtQYaUFVbbFl1cN7TKH3.wav positive
Uju7jUdiN7VA8j1dI3MQoktzzIV2.wav positive
hCix5UmABWZVW40elRFgkGabPa52_heavy.wav positive
UXhG3vgoxkWtd4Meky1nm0sRgMV2_heavy.wav positive
NAL6D2MCyQbiZUGtdQixl39IH3X2.wav positive
dQEgRb1WIAOmomgTx4amrxbQ2513_heavy.wav positive
FzJhVfH5jFOJ4hf1CWLSQOo8csD3_heavy.wav positive
Iz9X0QRimVT2R7AGhD6jZM0ryPp1_heavy.wav positive
LF7wSEDpWafkz5bwtxnFuho9iWz1_heavy.wav positive
Fx2gbIoPlJXrK5OD8FMeepOGlLa2_heavy.wav positive
YpViFd6bZEaMT5i5DUd18Qh6NQI3_heavy.wav positive
9sAOoNazDOXPars5nzlLzOwuYVW2.wav positive
vKxrwOtywRNOu6bZQTR8sGj0tbv2_heavy.wav positive
y0TkG5sfyhadW3VSct50RTl3HN02_heavy.wav positive
uAEOVPr487bFnks4dvytsxJQZP02.wav positive
Kj5Pw7GpqlhOhtfYKysgdFZcijG3_heavy



CUCqq4q5SQa3fDkpraekej7fNza2.wav positive
S0ZhYGoCVfXWMHzPGZ41PvHYiw12.wav positive
rR7IyyhZigMCIvfd2q2cjaGAIXY2_heavy.wav positive
6kMlitGRc3XF2Nt13Wl3LOs4wMl1_heavy.wav positive
aDAVgxAXQOdOakIroRdHwpL867v2.wav positive
H9aG9NV6Wae3ATv7xapsyAYdIJy2_heavy.wav positive
i0EyWP227GaL7ZQRqCS0iysTxcf2_heavy.wav positive
RUZ93kkyD0hJ1v7awXutr6RgF943_heavy.wav positive
0nP3dtfFxhMihttAMq1eoho5KaU2.wav positive
E3nlERKGBPcSO2PjEDJ6GrRKiy73_heavy.wav positive
IjI3eQUHkrYdJSB4nWeXeNoryNl2.wav positive
OUFazbHbbtRTQcRP5BWBKzeZpvw1_heavy.wav positive
txGfgoOqEuPeMPnrjxaRU8i7XjU2.wav positive
G5wWAa7nvkOR9QvfsAt7tsd5qa82.wav positive
2tjg2NfgJPaYdgTZRDUBhWm8F133.wav positive
IFMid2LmeCTSQAFaL8wTSUg20No2_heavy.wav positive
C1h80BY2J5eZgP3UMb4PTJXyKWy2.wav positive
EuEoeANAhadcy5vzBikn3LxlGKh1_heavy.wav positive
6T43bddKoKfG7MwnJWvrPZSsyrc2.wav positive
BDygu23lyUbBq2NlqfzRxGoMm9B2.wav positive
lpohIzcDNEg3DR38ku43WHiCkC42_heavy.wav positive
7lfdqN3JS8ZUOw42lMsIHbyCajA3_heavy.wav positive
B3QbrMswB6



BG1rEPoPYKUAHCYVzPrcB3I6VUn2.wav positive
aHJPsm6esHeW5apkd8KAFdia9zj2.wav positive
ptuwF1pj4ibIoVuZmt4nKb5naiZ2.wav positive
EbWP0XeMpnbrTsljro0dOGKIell1_heavy.wav positive
Ued1DKV2NkUnzWjJcPMxA6Q261r1_heavy.wav positive
ZaeW7vuikQgf7984JVqXYOtUQMs2.wav positive
gDBsK8LcY0Om4DujIs4lFSsktWu1_heavy.wav positive
M9CdGPOCUFPWV7KnLgvStPgjLrF3_heavy.wav positive
GBwlY1I67YdQ47ceNFdpK87kEr33.wav positive
RmWwhKI8kjPi5qN9H740rfm0jHA2_heavy.wav positive
Fi2lLmV3LHR5brvXs0z1JSKDvnI2.wav positive
8JuoYlwoiUgaSSUUKQnB7F7fG5S2_heavy.wav positive
KFQ0ksPwkDcLUUjbVBYWGWr9ei62_heavy.wav positive
DRoZujDy6CWVFL0UefbHEgCcD2l2.wav positive
o8LJfYAChnd5Jv0WVa6Ni7jpHf92.wav positive
75OYElXmxdQTQne9breAIqnSo7Z2_heavy.wav positive
rB4ewmXSc9P5IPhLffnBasjYU1s2_heavy.wav positive
NVzhTN4CXebH2lQVFkBTyl6ue5h2_heavy.wav positive
BVcBPtxcv3cqEdIkhjM6YenlWtb2.wav positive
46bIeJk9J8XrD4ND2IPRMQzjJBt2_heavy.wav positive
HK3FfcI5SyXc7CtQMaQiMQ2ia1n2_heavy.wav positive
saaw7uHQJQesN9AmgKyXNFAaXB82.wav positive
etg5



AWMx6dOBlFPdjWa86wkLjsGZ6by2.wav positive
HK3FfcI5SyXc7CtQMaQiMQ2ia1n2.wav positive
6ZovArshO1MTe2tpxFWrHmORa2R2_heavy.wav positive
FC4RKq4W8uRrjmin8n2BXC4SrnU2.wav positive
zbIXdZwHjgb2uDtKqIgcSnr7pyD2.wav positive
x89c2X4Q5LdiRvU9XWVkU1XXLE13.wav positive
S0ZhYGoCVfXWMHzPGZ41PvHYiw12_heavy.wav positive
4bnHQCj8tnSEx37AR5HiQAH2vB32_heavy.wav positive
M43FaHpw1ScIi97eG6LBx2fBaJ92.wav positive
IimA0GnOm7ZR4nEr8k0kG4eaR7A3_heavy.wav positive
d7w3B2YcJ3TLx58ryhiASEtwaAu1.wav positive
ffrNAGEUQEfL39VAP9Eau1a3UUX2.wav positive
a96ovf71KqfbVIG8ZF5m6DaRE9Z2.wav positive
0Js6ZUZQ9NUnu568Fh7B6mZ1R8o1_heavy.wav positive




saaw7uHQJQesN9AmgKyXNFAaXB82_heavy.wav positive
8Vz9jwmadkRfjbwrHwGZ2MsUtcz1_heavy.wav positive
nSVDxo9HNmPPggf6WSuAGg9XU6p1_heavy.wav positive
Y7RB11Ha8VNbBDjGoYVuIxnpYKA2_heavy.wav positive
fX1IzMBBKwXISMTgnZUFQa6o5Rh2_heavy.wav positive
31euepHD0deCxTd2nJ1wzXCk5EF3_heavy.wav positive
YHHZYBxPHsUWYITjtMRAixs3DX42.wav positive
YE3Hs5yD9BeIzT8cctZEBKGJIXN2_heavy.wav positive




LTk52ATmpEVOJSTjuNtEZHJx6943.wav positive
JUTgBmtV3PabkG7XQANi7iHlUBy2_heavy.wav positive
C1jXleNkfzaI6sbPJL883EGGRLj2.wav positive
bx564wHvi9d0HyJTtdi0haZuU493.wav positive
Cd7fkliPM2ZUM1skytvoU2vD8jK2.wav positive
9sAOoNazDOXPars5nzlLzOwuYVW2_heavy.wav positive
2j5I1dnLtBN8hyD149k2dZ19foE3_heavy.wav positive
ebtxeRmhQhQ78ZtgSjN7g637e6v1.wav positive
OaOT17qcMJhtsIymSrSJh12d4iE2_heavy.wav positive
vXpopcJJEugOInwUtJkEnHgoVhk1_heavy.wav positive
RRbKCE4xfybmt8BOImiVxHfbFjO2.wav positive
VLrOagtGVFYxWiEnMVFn1BnSWHM2_heavy.wav positive
n4iInpDT2BR5xr3Y6yNqOxvvFUm2_heavy.wav positive
MwT3St0uIWhMDTJwcQIRmcFUwk62_heavy.wav positive
EnEYPaPTh9SeTTD55fxFRNPItqI2_heavy.wav positive
DGWrVMD5yZc5jcjMzblC4kaBLmK2_heavy.wav positive
P25gmf6a64UuD7XtIDeL9VQMzXb2.wav positive
1NQvmLMrJyTwrmbNwAm6wDT4wpz2_heavy.wav positive




1NQvmLMrJyTwrmbNwAm6wDT4wpz2.wav positive




Ny1eG1jTBwWIudUg5hGhCchHKXC2_heavy.wav positive
Y9vdHTL7XdWJVBGtktKW9gkw7yS2_heavy.wav positive
2vH4QMrMaMMm5oZYScY0b36Z8MC3_heavy.wav positive
Zmjv6hT2Q1VfJhGdGh9VkGmpHL03_heavy.wav positive
QjBZv868nydJzk0ZzwgKDHSG6Q82_heavy.wav positive
lN6Y7317RFS0apVdnffs95djXIj1.wav positive
ZIzrdWNQK1Xhn6o6jrzSVBMdphp2.wav positive
3FC5FIdqx5S5mB69VSxhduD1q0H3.wav positive
9hXEs9OejdVxG6JJGCyKQpqVvy43.wav positive
IHOunHyVaoO7AR6Z0QJgIa31BvS2.wav positive
eR2gfqeYAjdgzpwtCDXUqts9yIc2_heavy.wav positive




joORHxc0iCTwgP7Uh5SM5N7rnnf2.wav positive
E4JCh6HgOVRX04QDLVotx8SgXVp1_heavy.wav positive
46bIeJk9J8XrD4ND2IPRMQzjJBt2.wav positive
uRtWGtWi3eVHJ0JhF0ycETkX0ms2_heavy.wav positive
MF5f99sONpcYl5fYsVzh32XwSmm2.wav positive
2j5I1dnLtBN8hyD149k2dZ19foE3.wav positive
Ny1eG1jTBwWIudUg5hGhCchHKXC2.wav positive
RUZ93kkyD0hJ1v7awXutr6RgF943.wav positive
0sIeyohqXMOGTqPA7RiiuyJ3AOt2.wav positive
BRVYzpZ9X1TGGLnjYEHIn3VEJ5j1.wav positive
0KSi2atlmsXNcGMfpDNzIjJSvC23_heavy.wav positive
b7mMUQm5bObj1jwGFaNyaR07pt83_heavy.wav positive
JsTOkdqknPeCRi6i63Cfu1ciuuh2.wav positive
75OYElXmxdQTQne9breAIqnSo7Z2.wav positive
ZaeW7vuikQgf7984JVqXYOtUQMs2_heavy.wav positive
tkM8UQsV2hVGrkSwelg3AyFiXoy2.wav positive
dFtGnzYqh1NVAwQUQ3wkysqAe3n1.wav positive
lQ1cBTWdazLLa7xWAiSCOmZ709s1.wav positive
kBFDtvAVY9QYbi7YHYgd7tNpsWx1.wav positive
xce3uTKiuNSSnnAWLJMorxTjHzI2_heavy.wav positive
UZKdFMia0JWykTWi6T2PoXmS3nt2.wav positive
yL9QW3T2VueXfL3yvsmXsM2hL4v1_heavy.wav positive
08tlSbvXpAggZavIGW994adhh1m1.wav p



CkBv7ZLYRIcugw93mPtPkbWxC412.wav positive
zM5GsorprLYElGQ3uJwnU0g0Fwr1_heavy.wav positive
lpohIzcDNEg3DR38ku43WHiCkC42.wav positive
lN6Y7317RFS0apVdnffs95djXIj1_heavy.wav positive
dvRr97gwv7Y34YqaccErnWcYRY53_heavy.wav positive
h5e7Ff7hWON2RPULdDCxad7xpGv2_heavy.wav positive
imaxvQ1daYhtvhpulzgQy4t6VMl2_heavy.wav positive
qJvoAC408vP9fwBWQUAHHxKI39k1.wav positive
333NjqA1TfZJuICEdXSkPhVz0LA3.wav positive
XwPxN3JASwgk13QZUviNawM7k0v1.wav positive
Qcliznd3z1VdWmJOZh9nvlstTYv1.wav positive
wEMYsNrzUvXhlQHYnFAF6U5a0Py1_heavy.wav positive
jyxqyMzm2IXXaETlfHjV1K6ju492.wav positive
2tjg2NfgJPaYdgTZRDUBhWm8F133_heavy.wav positive
QjBZv868nydJzk0ZzwgKDHSG6Q82.wav positive
XFlqN1sDBKNXyuORhVagkJnr5V33.wav positive
cLcriOknOebRZSFfeQlcUNYzbZL2.wav positive
khBKxUEe1rQxPqpo4pGylAZjsEt1_heavy.wav positive
Cd7fkliPM2ZUM1skytvoU2vD8jK2_heavy.wav positive
YHHZYBxPHsUWYITjtMRAixs3DX42_heavy.wav positive
euld60aqc2OtnNoKYx9P4WDzVM92_heavy.wav positive
05acPS4aRGfvuOfku11Za8zve8i2.wav positive
T190nKNqKo



iOx0zPyPiMZjCWXuf3RGu8HKq8k1.wav positive
0MVnLUuWMBdzmvRDgLvi4TRl4Zf1_heavy.wav positive
ZpsUydPUXygszeJSVtTFjbsqoNQ2.wav positive
AVbhXf3KiUf9ZmkjvWzDOxevAPH2_heavy.wav positive
Ojcz6oRGwLdnOTR7ptVULuG4s8b2_heavy.wav positive
PoxsEOv7MmbHaY4qT3wNcK8GWrV2_heavy.wav positive
1e8i6Q47ewbzrTiKqIeOLEvPv2Z2_heavy.wav positive
BZHvK60v1LY9cBfLvYkBkJxdfNz2.wav positive
dvRr97gwv7Y34YqaccErnWcYRY53.wav positive
nRH1xhm8zjOVzAPYuBMgGMQtes22.wav positive
SmtHDspJwGYS9FYtjXd1IwsYRZc2.wav positive
H7CA9UCGWhXOL2ycjqtRBHNXMsk1.wav positive
kcQhvQN6eeS8DUCNmpADYBr33h93.wav positive
ZzKpUtFGzsVEK7E1wsVGRftKdx93_heavy.wav positive
poQ11Wmw16VIjjkc6dAPeAWzf962.wav positive
QBWDBFqhumZl3w2sxMa87Qm8bVt1.wav positive
Okal6T0guFZi4Slm4k4MEyuCnyv1.wav positive
E3nlERKGBPcSO2PjEDJ6GrRKiy73.wav positive
jAP3y51Zjcd3gkB80PuRAfSXTGa2_heavy.wav positive
DqztVX8gWrOi4il3xouSTV4FwyC3.wav positive
TeWit59ewCYq9lQyAu8gAdUoqQc2_heavy.wav positive
XjrvY8BduPbProBlAHGDHpD2iPa2_heavy.wav positive
OaOT17qcMJhtsIymSrSJh1

In [None]:
np.save(os.path.join('data',exp,'img_coswara.npy'),img_coswara)
np.save(os.path.join('data',exp,'label_coswara.npy'),label_coswara)
np.save(os.path.join('data',exp,'filename_coswara.npy'),filename_coswara)

## Extracting Coughvid Data Feature

In [None]:
df_coughvid = pd.read_csv('csv_files/normalized_data/Coughvid_dataset.csv')
df_coughvid.head()

In [None]:
img_coughvid = []
label_coughvid = []
filename_coughvid = []

to_hot_one = {'positive':0, 'negative':1}

for index,row in df_coughvid.iterrows():
    name = row['filename']
    label = row['label']
    filename = os.path.join(dataset,name)
    scaler = MinMaxScaler()
    
    try:
        print(name, label)
        sound, sample_rate = lb.load(filename, sr=None)
        sound = lb.util.pad_center(sound, 30*sample_rate)
        stft = np.abs(lb.stft(sound))

        mfccs = np.mean(lb.feature.mfcc(y=sound, sr=sample_rate, n_mfcc=40),axis=1)
        mfccs_norm = scaler.fit_transform(np.reshape(mfccs,[mfccs.shape[0],1]))

        chroma = np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate),axis=1)
        chroma_norm = scaler.fit_transform(np.reshape(chroma,[chroma.shape[0],1]))

        mel = np.mean(lb.feature.melspectrogram(sound, sr=sample_rate),axis=1)
        mel_norm = scaler.fit_transform(np.reshape(mel,[mel.shape[0],1]))

        contrast = np.mean(lb.feature.spectral_contrast(S=stft, sr=sample_rate),axis=1)
        contrast_norm = scaler.fit_transform(np.reshape(contrast,[contrast.shape[0],1]))

        tonnetz = np.mean(lb.feature.tonnetz(y=lb.effects.harmonic(sound), sr=sample_rate),axis=1)
        tonnetz_norm = scaler.fit_transform(np.reshape(tonnetz,[tonnetz.shape[0],1]))

        concat = np.concatenate((mfccs,chroma,mel,contrast,tonnetz))

        img_coughvid.append(concat)
        label_coughvid.append(to_hot_one[label])
        filename_coughvid.append(name)
    
    except:
        None

In [None]:
np.save(os.path.join('data',exp,'img_coughvid.npy'),img_coughvid)
np.save(os.path.join('data',exp,'label_coughvid.npy'),label_coughvid)
np.save(os.path.join('data',exp,'filename_coughvid.npy'),filename_coughvid)

## Extracting Compare Data Feature 

In [None]:
df_compare = pd.read_csv('csv_files/normalized_data/Compare_dataset.csv')
df_compare.head()

In [None]:
img_compare = []
label_compare = []
filename_compare = []

to_hot_one = {'positive':0, 'negative':1}

for index,row in df_compare.iterrows():
    name = row['filename']
    label = row['label']
    filename = os.path.join(dataset,name)
    scaler = MinMaxScaler()
    
    try:
        print(name, label)
        sound, sample_rate = lb.load(filename, sr=None)
        sound = lb.util.pad_center(sound, 30*sample_rate)
        stft = np.abs(lb.stft(sound))

        mfccs = np.mean(lb.feature.mfcc(y=sound, sr=sample_rate, n_mfcc=40),axis=1)
        mfccs_norm = scaler.fit_transform(np.reshape(mfccs,[mfccs.shape[0],1]))

        chroma = np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate),axis=1)
        chroma_norm = scaler.fit_transform(np.reshape(chroma,[chroma.shape[0],1]))

        mel = np.mean(lb.feature.melspectrogram(sound, sr=sample_rate),axis=1)
        mel_norm = scaler.fit_transform(np.reshape(mel,[mel.shape[0],1]))

        contrast = np.mean(lb.feature.spectral_contrast(S=stft, sr=sample_rate),axis=1)
        contrast_norm = scaler.fit_transform(np.reshape(contrast,[contrast.shape[0],1]))

        tonnetz = np.mean(lb.feature.tonnetz(y=lb.effects.harmonic(sound), sr=sample_rate),axis=1)
        tonnetz_norm = scaler.fit_transform(np.reshape(tonnetz,[tonnetz.shape[0],1]))

        concat = np.concatenate((mfccs,chroma,mel,contrast,tonnetz))
        
        img_compare.append(concat)
        label_compare.append(to_hot_one[label])
        filename_compare.append(name)
    except:
        print('None')

In [None]:
np.save(os.path.join('data',exp,'img_compare.npy'),img_compare)
np.save(os.path.join('data',exp,'label_compare.npy'),label_compare)
np.save(os.path.join('data',exp,'filename_compare.npy'),filename_compare)

## Load Data

In [15]:
img_coswara = np.load(os.path.join('data',exp,'img_coswara.npy'))
label_coswara = np.load(os.path.join('data',exp,'label_coswara.npy'))
filename_coswara = np.load(os.path.join('data',exp,'filename_coswara.npy'))

img_coughvid = np.load(os.path.join('data',exp,'img_coughvid.npy'))
label_coughvid = np.load(os.path.join('data',exp,'label_coughvid.npy'))
filename_coughvid= np.load(os.path.join('data',exp,'filename_coughvid.npy'))

img_compare = np.load(os.path.join('data',exp,'img_compare.npy'))
label_compare = np.load(os.path.join('data',exp,'label_compare.npy'))
filename_compare= np.load(os.path.join('data',exp,'filename_compare.npy'))

## Normalize

In [16]:
filename = np.concatenate((filename_coswara,filename_coughvid,filename_compare))
img = np.concatenate((img_coswara,img_coughvid,img_compare))
label = np.concatenate((label_coswara,label_coughvid,label_compare))

In [17]:
scaler = StandardScaler()

In [18]:
img = scaler.fit_transform(img)

## Convert to Array

In [19]:
img = np.array(img)
label = np.array(label)
filename = np.array(filename)

In [20]:
print(img.shape)

(2569, 193)


## Split data to Train, Devel, & Test data

In [21]:
df_train = pd.read_csv('csv_files/experiment_data/train.csv')
img_train = []
label_train = []

for index, row in df_train.iterrows():
    name = row['filename']
    index, = np.where(filename == name)
    num = list(index)
    if num != []:
        num = num[0]
        img_train.append(img[num])
        label_train.append(label[num])

In [22]:
img_train = np.array(img_train)
label_train = np.array(label_train)

In [23]:
df_devel = pd.read_csv('csv_files/experiment_data/devel.csv')
img_devel = []
label_devel = []

for index, row in df_devel.iterrows():
    name = row['filename']
    index, = np.where(filename == name)
    num = list(index)
    if num != []:
        num = num[0]
        img_devel.append(img[num])
        label_devel.append(label[num])

In [24]:
img_devel = np.array(img_devel)
label_devel = np.array(label_devel)

In [25]:
df_test = pd.read_csv('csv_files/experiment_data/test.csv')
img_test = []
label_test = []

for index, row in df_test.iterrows():
    name = row['filename']
    index, = np.where(filename == name)
    num = list(index)
    if num != []:
        num = num[0]
        img_test.append(img[num])
        label_test.append(label[num])

In [26]:
img_test = np.array(img_test)
label_test = np.array(label_test)

## Save data array

In [27]:
if os.path.exists(os.path.join('data',exp,'array'))==False:
    os.mkdir(os.path.join('data',exp,'array'))

In [28]:
np.save(os.path.join('data',exp,'array','img_train.npy'),img_train)
np.save(os.path.join('data',exp,'array','label_train.npy'),label_train)

np.save(os.path.join('data',exp,'array','img_devel.npy'),img_devel)
np.save(os.path.join('data',exp,'array','label_devel.npy'),label_devel)

np.save(os.path.join('data',exp,'array','img_test.npy'),img_test)
np.save(os.path.join('data',exp,'array','label_test.npy'),label_test)

In [29]:
img_train = np.load(os.path.join('data',exp,'array','img_train.npy'))
label_train = np.load(os.path.join('data',exp,'array','label_train.npy'))

img_val = np.load(os.path.join('data',exp,'array','img_devel.npy'))
label_val = np.load(os.path.join('data',exp,'array','label_devel.npy'))

img_test = np.load(os.path.join('data',exp,'array','img_test.npy'))
label_test = np.load(os.path.join('data',exp,'array','label_test.npy'))

## Augmented using SMOTE

In [30]:
oversample = SMOTE()
img_train_smote, label_train_smote = oversample.fit_resample(img_train, label_train)

In [31]:
oversample = SMOTE()
img_val_smote, label_val_smote = oversample.fit_resample(np.reshape(img_val,[img_val.shape[0],img_val.shape[1]]), label_val)

In [32]:
np.save(os.path.join('data',exp,'array','img_train_smote.npy'),img_train_smote)
np.save(os.path.join('data',exp,'array','label_train_smote.npy'),label_train_smote)

np.save(os.path.join('data',exp,'array','img_devel_smote.npy'),img_val_smote)
np.save(os.path.join('data',exp,'array','label_devel_smote.npy'),label_val_smote)