In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import os
from collections import OrderedDict

In [None]:
if os.path.exists('csv_files')==False:
    os.mkdir('csv_files')

## Create Coswara's CSV File 

In [None]:
names   = ['ID','STATUS','DIR','DataSet']
join_by = pd.read_csv('dataset/Coswara-Data/combined_data.csv')

import glob
df_list = []
for path in tqdm(glob.glob('dataset/Coswara-Data/*/*/cough-shallow.wav')):
    temp = pd.DataFrame(columns=['id','DIR'])
    temp['id'] = [path.split('/')[-2]]
    temp['DIR'] = [path]
    temp = pd.merge(left=temp,right=join_by,on='id',how='inner')
    temp = temp[['id','covid_status','DIR']]
    df_list.append(temp.rename(columns={'id':'ID','covid_status':'STATUS','DIR':'DIR'}))
CosData=pd.concat(df_list)
CosData['DataSet'] = 'coswara'
CosData   = CosData.sample(frac=1).reset_index(drop=True)
CosData.to_csv('csv_files/Coswara_dataset.csv', index=False)
CosData.head()

In [None]:
status = []
for index,row in CosData.iterrows():
    cat = row['STATUS']
    status.append(cat)
category = list(OrderedDict.fromkeys(status))
print(category)

In [None]:
names   = ['ID','STATUS','DIR','DataSet']
join_by = pd.read_csv('dataset/Coswara-Data/combined_data.csv')

import glob
df_list = []
for path in tqdm(glob.glob('dataset/Coswara-Data/*/*/cough-heavy.wav')):
    temp = pd.DataFrame(columns=['id','DIR'])
    temp['id'] = [path.split('/')[-2]]
    temp['DIR'] = [path]
    temp = pd.merge(left=temp,right=join_by,on='id',how='inner')
    temp = temp[['id','covid_status','DIR']]
    df_list.append(temp.rename(columns={'id':'ID','covid_status':'STATUS','DIR':'DIR'}))
HevData=pd.concat(df_list)
HevData['DataSet'] = 'coswara'
HevData   = HevData.sample(frac=1).reset_index(drop=True)
HevData.to_csv('csv_files/Coswara_heavy_dataset.csv', index=False)
HevData.head()

## Create Coughvid's CSV File

In [None]:
coughvid  = 'dataset/Coughvid-Data/public_dataset/'

VidData   = pd.read_csv(os.path.join(coughvid,'metadata_compiled.csv'),header=0)
VidData   = VidData.loc[VidData['cough_detected'] >= 0.0][['uuid','status']]
VidData.dropna(inplace=True)

extradata = VidData.loc[VidData['status']=='COVID-19']
notradata = VidData.loc[VidData['status']!='COVID-19']

VidData   = pd.concat([extradata,notradata],ignore_index= True)
VidData['DIR'] = coughvid + VidData['uuid'] + '.webm'
VidData['DataSet'] = 'coughvid'
VidData.rename(columns={'uuid': 'ID', 'status': 'STATUS'}, inplace=True)
VidData   = VidData.sample(frac=1).reset_index(drop=True)
VidData.to_csv('csv_files/Coughvid_dataset.csv', index=False)
VidData.head()

In [None]:
status = []
for index,row in VidData.iterrows():
    cat = row['STATUS']
    status.append(cat)
category = list(OrderedDict.fromkeys(status))
print(category)

## Create Compare's CSV File

In [None]:
compare = 'dataset/Compare-Data/dist/wav'
df = pd.read_csv('dataset/Compare-Data/metaData_CCS.csv')
df_list = []
for index,row in tqdm(df.iterrows()):
    temp = pd.DataFrame(columns=['ID','STATUS','DIR','DataSet'])
    temp['ID'] = [row['Uid']]
    temp['STATUS'] = [row['label']]
    fn = row['filename']
    temp['DIR'] = [os.path.join(compare,fn)]
    temp['DataSet'] = ['compare']
    df_list.append(temp)
CompData = pd.concat(df_list)
CompData = CompData.sample(frac=1).reset_index(drop=True)
CompData.to_csv('csv_files/Compare_dataset.csv', index=False)
CompData.head()
    

## Merge CSV 

In [None]:
CosData = pd.read_csv('csv_files/Coswara_dataset.csv')
HevData = pd.read_csv('csv_files/Coswara_heavy_dataset.csv')
VidData = pd.read_csv('csv_files/Coughvid_dataset.csv')
CompData = pd.read_csv('csv_files/Compare_dataset.csv')
PosData = pd.read_csv('csv_files/Coswara_positive_heavy_dataset.csv')
Pos_Data = pd.read_csv('csv_files/Coswara_positive_shallow_dataset.csv')
Posvid_Data = pd.read_csv('csv_files/Coughvid_positive_dataset.csv')

In [None]:
MergeData = pd.concat([Pos_Data, PosData, Posvid_Data, CompData])
MergeData   = MergeData.sample(frac=1).reset_index(drop=True)
df_list = []
for index,row in tqdm(MergeData.iterrows()):
    temp = pd.DataFrame(columns=['ID','STATUS','DIR','DataSet'])
    diagnose = row['STATUS']
    if diagnose == 'healthy' or diagnose == 'negative':
        status = 'negative'
        temp['ID'] = [row['ID']]
        temp['STATUS'] = [status]
        temp['DIR'] = [row['DIR']]
        temp['DataSet'] = [row['DataSet']] 
    elif diagnose == 'positive_moderate' or diagnose == 'positive_mild' or diagnose == 'positive_asymp' or diagnose == 'positive' or diagnose == 'COVID-19':
        status = 'positive'
        temp['ID'] = [row['ID']]
        temp['STATUS'] = [status]
        temp['DIR'] = [row['DIR']]
        temp['DataSet'] = [row['DataSet']]
    df_list.append(temp)
MergeData = pd.concat(df_list)
MergeData = MergeData.sample(frac=1).reset_index(drop=True)
MergeData.to_csv('csv_files/Merge_dataset.csv', index=False)
MergeData.head()

In [None]:
df = pd.read_csv('csv_files/Merge_dataset.csv')
df_list = []
for index,row in tqdm(df.iterrows()):
    temp = pd.DataFrame(columns=['ID','STATUS','DIR','DataSet'])
    diagnose = row['STATUS']
    dataset = row['DataSet']
    paths = row['DIR']
    if dataset == 'coswara':
        if diagnose == 'positive':
            pt = paths.split('/')[-1]
            if pt == 'cough-heavy.wav':
                temp['ID'] = [row['ID']]
                temp['STATUS'] = [diagnose]
                temp['DIR'] = [paths]
                temp['DataSet'] = [row['DataSet']] 

    df_list.append(temp)
PosData = pd.concat(df_list)
PosData = PosData.sample(frac=1).reset_index(drop=True)
PosData.to_csv('csv_files/Coswara_positive_heavy_dataset.csv', index=False)
PosData.head()

In [None]:
df = pd.read_csv('csv_files/Merge_dataset.csv')
df_list = []
for index,row in tqdm(df.iterrows()):
    temp = pd.DataFrame(columns=['ID','STATUS','DIR','DataSet'])
    diagnose = row['STATUS']
    dataset = row['DataSet']
    paths = row['DIR']
    if dataset == 'coswara':
        if diagnose == 'positive':
            pt = paths.split('/')[-1]
            if pt == 'cough-shallow.wav':
                temp['ID'] = [row['ID']]
                temp['STATUS'] = [diagnose]
                temp['DIR'] = [paths]
                temp['DataSet'] = [row['DataSet']] 

    df_list.append(temp)
Pos_Data = pd.concat(df_list)
Pos_Data = Pos_Data.sample(frac=1).reset_index(drop=True)
Pos_Data.to_csv('csv_files/Coswara_positive_shallow_dataset.csv', index=False)
Pos_Data.head()

In [None]:
df = pd.read_csv('csv_files/Merge_dataset.csv')
df_list = []
for index,row in tqdm(df.iterrows()):
    temp = pd.DataFrame(columns=['ID','STATUS','DIR','DataSet'])
    diagnose = row['STATUS']
    dataset = row['DataSet']
    dirs  = row['DIR']
    if dataset == 'coughvid':
        if diagnose == 'positive':
            temp['ID'] = [row['ID']]
            temp['STATUS'] = [diagnose]
            temp['DIR'] = [dirs]
            temp['DataSet'] = [row['DataSet']] 
    df_list.append(temp)
Posvid_Data = pd.concat(df_list)
Posvid_Data = Posvid_Data.sample(frac=1).reset_index(drop=True)
Posvid_Data.to_csv('csv_files/Coughvid_positive_dataset.csv', index=False)
Posvid_Data.head()

In [None]:
df = pd.read_csv('csv_files/Merge_dataset.csv')
df_list = []
for index,row in df.iterrows():
    dataset = row['DataSet']
    cat = row['STATUS']
    dirs = row['DIR']
    paths = dirs.split('/')[-1]
    if dataset == 'coswara':
        if cat == 'positive' :
            if paths == 'cough-heavy.wav':
                df_list.append(dataset)

In [None]:
len(df_list)

In [None]:
len(df_list)