# Dataset for Asquire experiments

> prepare master_asquire_corpus.csv file that contains informant of entire pnoi-corpus

In [2]:
import pandas as pd
import numpy as np
import shutil
import glob
import json
import os

mkdir = lambda p: 0 if os.path.exists(p) else (os.makedirs(p), 1)[1]

## STATE VARIABLES

In [3]:
IS_DRY_RUN = False
IS_DUMMY_PATH = False

## PATHS

In [4]:
DATA_PATH = "DATA_DUMMY" if IS_DUMMY_PATH else "report/audio_files_v5"

REPORT_FOLDER = "report"; mkdir(REPORT_FOLDER)

0

## Dataset Class

In [5]:
class DataStaticInfo:

    VER = "*"
    SEP = "_"
    META_SEP = "-"
    EXT_SEP = "."
    ANOT_LABELS = ['cc', 'ss', 'aa', 'yy', 'ee', 'ii', 'xx', 'zz', 'uu', 'oo', 'ii-n']

    fkeys = {
        "APP_CODE": "app_code", # 0
        "SID":"sub_id", # 1
        "FCLASS": "file_class", # 2
        "FCIDX": "file_xindex", # 3
        "SCORE": "score", # 4
        "FFMT": "file_format", # 5
        "FNAME": "file_name", # 6
        "FPATH": "file_path", # 7
        "FMATCH": "file_match" # 8
    }

    

In [6]:
class DataExtractFiles(DataStaticInfo):

    ALL_FILES_DF: pd.DataFrame
    

    def __init__(self, path: str, ver="*") -> None:
        self.VER = ver

        all_files = glob.glob(f"{path}/*/{ver}")

        self.ALL_FILES_DF = self.make_files_df(all_files)


    def file_dict(self, fpath: str) -> dict:
        sub_path, fname = os.path.split(fpath)
        _, sub_id = os.path.split(sub_path)

        match_key = fname.split(self.EXT_SEP)[0]

        fitems = fname.replace(self.EXT_SEP, self.SEP, 1).split(self.SEP) 

        # fitems[1] = sub_id
        fitems += [fname, fpath, match_key]
        
        fdict = dict(zip(self.fkeys.values(), fitems))
        return fdict
    
    def make_files_df(self, all_files: list) -> pd.DataFrame:
        files = [self.file_dict(fp) for fp in all_files]
        return pd.DataFrame(files)
    
    def get_file_extention(self) -> str:
        return self.ALL_FILES_DF[self.fkeys["FFMT"]].unique()
    
    def get_subject_list(self) -> list:
        return self.ALL_FILES_DF[self.fkeys["SID"]].unique()
    
asqdata_files = DataExtractFiles(DATA_PATH, ver="*")


In [7]:
asqdata_files.get_subject_list().__len__() 

KeyError: 'sub_id'

In [None]:
print(len(asqdata_files.ALL_FILES_DF), "files")
asqdata_files.ALL_FILES_DF

3135 files


Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match
0,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA
1,webapp-asquire-mox,anish-02df6c90,breath,0,,txt,webapp-asquire-mox_anish-02df6c90_breath_0_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA
2,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA
3,webapp-asquire-mox,anish-02df6c90,zz,4,,txt,webapp-asquire-mox_anish-02df6c90_zz_4_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_zz_4_NA
4,webapp-asquire-mox,anish-02df6c90,cough,1,,txt,webapp-asquire-mox_anish-02df6c90_cough_1_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA
...,...,...,...,...,...,...,...,...,...
3130,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,txt,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.txt,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2
3131,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,wav,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.wav,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2
3132,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,txt,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0
3133,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0


In [None]:
asqdata_files.get_file_extention()

array(['wav', 'txt', 'json'], dtype=object)

In [None]:
class DataSourceFiles(DataStaticInfo):

    src_data_fname_regx = r"^webapp-asquire-(?!.*\.meta\.).+\.(wav|json|)$"

    ALL_FILES_DF: pd.DataFrame
    SOURCE_FILES_DF: pd.DataFrame

    def __init__(self, all_file_df: pd.DataFrame) -> None:
        self.ALL_FILES_DF = all_file_df

        self.SOURCE_FILES_DF = self.pair_src_anot_files()

    def make_source_files_df(self) -> pd.DataFrame:
        filt = self.ALL_FILES_DF.loc[:, self.fkeys["FNAME"]].str.match(self.src_data_fname_regx)

        df = self.ALL_FILES_DF[filt]
        return df
    
    def anot_path_join(self, p: pd.Series) -> str:
        fpath = os.path.join(os.path.split(p[self.fkeys['FPATH']])[0], p[f"anot--{self.fkeys['FNAME']}"])

        return fpath if os.path.exists(fpath) else False
    
    def pair_src_anot_files(self) -> pd.DataFrame:
        anot_format_mapping = { self.fkeys["FFMT"]: {'wav': 'txt', "json": "meta.json"} }

        source_files_DF = self.make_source_files_df()
    
        anot_file_format_DF = source_files_DF.replace(anot_format_mapping) # anotation file extension

        # rename file_format to anot--file-format
        anot_fmt_column = f"anot--{self.fkeys['FFMT']}"
        anot_file_format_DF.rename(columns={self.fkeys["FFMT"]: anot_fmt_column}, inplace=True)
        anot_file_format_DF = anot_file_format_DF[[anot_fmt_column, self.fkeys["FMATCH"]]]
       
        
        # mergr source and anotation files
        src_data_files_DF = pd.merge(source_files_DF, anot_file_format_DF, on=self.fkeys["FMATCH"], how="left")
        anot_fname_column = f"anot--{self.fkeys['FNAME']}"

        src_data_files_DF[anot_fname_column] = src_data_files_DF[self.fkeys["FMATCH"]] + '.' + src_data_files_DF[anot_fmt_column]


        anot_fpath_column = f"anot--{self.fkeys['FPATH']}"
        anote_filepath_df = src_data_files_DF.apply(self.anot_path_join, axis=1).to_frame(name=anot_fpath_column)

        src_data_files_DF = pd.concat([src_data_files_DF, anote_filepath_df], axis=1)

        return src_data_files_DF

asqdata_source_files = DataSourceFiles(asqdata_files.ALL_FILES_DF)
asqdata_source_files.SOURCE_FILES_DF

Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match,anot--file_format,anot--file_name,anot--file_path
0,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,webapp-asquire-mox_anish-02df6c90_cough_1_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...
1,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA,txt,webapp-asquire-mox_anish-02df6c90_breath_0_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...
2,webapp-asquire-mox,anish-02df6c90,ss,5,,wav,webapp-asquire-mox_anish-02df6c90_ss_5_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_ss_5_NA,txt,webapp-asquire-mox_anish-02df6c90_ss_5_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...
3,webapp-asquire-mox,anish-02df6c90,yee,6,,wav,webapp-asquire-mox_anish-02df6c90_yee_6_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_yee_6_NA,txt,webapp-asquire-mox_anish-02df6c90_yee_6_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...
4,webapp-asquire-mox-na,anish-02df6c90,meta-data,na,na,json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox-na_anish-02df6c90_meta-data...,meta.json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,False
...,...,...,...,...,...,...,...,...,...,...,...,...
1664,webapp-asquire-mox,srinivas-b89febe6,breath,5,,wav,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_breath_5_NA,txt,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...
1665,webapp-asquire-mox,srinivas-b89febe6,aa,2,,wav,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.wav,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA,txt,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.txt,report/audio_files_v5/srinivas-b89febe6/webapp...
1666,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,wav,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.wav,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2,txt,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.txt,report/audio_files_v5/shivendra-49fd0914/webap...
1667,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...


In [None]:
class ProcessMETAFiles(DataStaticInfo):

    SOURCE_FILES_DF: pd.DataFrame
    SOURCE_FILES_META_DF: pd.DataFrame
    
    def __init__(self, all_src_file_df: pd.DataFrame) -> None:
        self.SOURCE_FILES_DF = all_src_file_df
        
        self.SOURCE_FILES_META_DF = self.make_meta_df()

    def read_metadata(self, row):
        file_path = row[self.fkeys["FPATH"]]

        # file_info = { "file_path": file_path }

        with open(file_path, 'r') as f:
            print(f"./{file_path}")
            data = json.load(f)

            bio_data: dict = data["bio"] #| file_info

            bio_data[self.fkeys["SID"]] = data["userId"].replace("_", "-")
            # survey_data_q = data["survey"]["allQuestions"]
            survey_data_a = data["survey"]

            
            bio_data_dict = [{ "qn": k, "Q": f"{k}?", "A": v } for k, v in sorted(bio_data.items())]
            # survey_data_dict_q = [{ "qn": str(q["qno"]), "Q": q["question"] } for q in survey_data_q.values()]
            survey_data_dict = [{ "qn": str(q["qno"]), "Q": q["question"], "A": q["answer"] } for q in survey_data_a]
            # survey_data_q_DF = pd.DataFrame(survey_data_dict_q)
            survey_data_a_DF = pd.DataFrame(bio_data_dict + survey_data_dict)

            

            # survey_data_o_DF = survey_data_q_DF.merge(survey_data_a_DF, how="outer", on=["qn", "Q"])
            survey_data_o_DF = survey_data_a_DF #survey_data_q_DF.merge(survey_data_a_DF, how="outer", on=["qn", "Q"])
            survey_data_o_DF.fillna("-", inplace=True)

            nfile_path = file_path.replace(".json", ".meta.json")
            print(f"./{nfile_path}")
            survey_data_o_DF.set_index('qn').to_json(nfile_path, orient='index')

            return (bio_data, survey_data_o_DF)
        
    def make_meta_df(self) -> pd.DataFrame:
        filt = self.SOURCE_FILES_DF[self.fkeys["FCLASS"]] == "meta-data"
        src_META_data_DF = self.SOURCE_FILES_DF[filt]

        all_META_data_DF = pd.DataFrame([self.read_metadata(row)[0] for _, row in src_META_data_DF.iterrows()])

        src_data_files_DF = self.SOURCE_FILES_DF.copy()
        src_data_files_DF = src_data_files_DF.merge(all_META_data_DF, how="left", on=[self.fkeys["SID"]])

        return src_data_files_DF


asqdata_meta_files = ProcessMETAFiles(asqdata_source_files.SOURCE_FILES_DF)
asqdata_meta_files.make_meta_df() #.columns.to_list()

./report/audio_files_v5/anish-02df6c90/webapp-asquire-mox-na_anish-02df6c90_meta-data_na_na.json
./report/audio_files_v5/anish-02df6c90/webapp-asquire-mox-na_anish-02df6c90_meta-data_na_na.meta.json
./report/audio_files_v5/ananyach-9a6a2c78/webapp-asquire-mox-na_ananyach-9a6a2c78_meta-data_na_na.json
./report/audio_files_v5/ananyach-9a6a2c78/webapp-asquire-mox-na_ananyach-9a6a2c78_meta-data_na_na.meta.json
./report/audio_files_v5/muqaddas-50022e79/webapp-asquire-mox-na_muqaddas-50022e79_meta-data_na_na.json
./report/audio_files_v5/muqaddas-50022e79/webapp-asquire-mox-na_muqaddas-50022e79_meta-data_na_na.meta.json
./report/audio_files_v5/vinay-56bb71de/webapp-asquire-mox-na_vinay-56bb71de_meta-data_na_na.json
./report/audio_files_v5/vinay-56bb71de/webapp-asquire-mox-na_vinay-56bb71de_meta-data_na_na.meta.json
./report/audio_files_v5/ramswaroop-8fd26b2a/webapp-asquire-mox-na_ramswaroop-8fd26b2a_meta-data_na_na.json
./report/audio_files_v5/ramswaroop-8fd26b2a/webapp-asquire-mox-na_ramswar

Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match,anot--file_format,anot--file_name,anot--file_path,age,height,gender,weight
0,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,webapp-asquire-mox_anish-02df6c90_cough_1_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
1,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA,txt,webapp-asquire-mox_anish-02df6c90_breath_0_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
2,webapp-asquire-mox,anish-02df6c90,ss,5,,wav,webapp-asquire-mox_anish-02df6c90_ss_5_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_ss_5_NA,txt,webapp-asquire-mox_anish-02df6c90_ss_5_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
3,webapp-asquire-mox,anish-02df6c90,yee,6,,wav,webapp-asquire-mox_anish-02df6c90_yee_6_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_yee_6_NA,txt,webapp-asquire-mox_anish-02df6c90_yee_6_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
4,webapp-asquire-mox-na,anish-02df6c90,meta-data,na,na,json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox-na_anish-02df6c90_meta-data...,meta.json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,False,20,160,m,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1664,webapp-asquire-mox,srinivas-b89febe6,breath,5,,wav,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_breath_5_NA,txt,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85
1665,webapp-asquire-mox,srinivas-b89febe6,aa,2,,wav,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.wav,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA,txt,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.txt,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85
1666,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,wav,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.wav,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2,txt,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.txt,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78
1667,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78


In [None]:
asqdata_meta_files.SOURCE_FILES_META_DF

Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match,anot--file_format,anot--file_name,anot--file_path,age,height,gender,weight
0,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,webapp-asquire-mox_anish-02df6c90_cough_1_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
1,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA,txt,webapp-asquire-mox_anish-02df6c90_breath_0_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
2,webapp-asquire-mox,anish-02df6c90,ss,5,,wav,webapp-asquire-mox_anish-02df6c90_ss_5_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_ss_5_NA,txt,webapp-asquire-mox_anish-02df6c90_ss_5_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
3,webapp-asquire-mox,anish-02df6c90,yee,6,,wav,webapp-asquire-mox_anish-02df6c90_yee_6_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_yee_6_NA,txt,webapp-asquire-mox_anish-02df6c90_yee_6_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
4,webapp-asquire-mox-na,anish-02df6c90,meta-data,na,na,json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox-na_anish-02df6c90_meta-data...,meta.json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,False,20,160,m,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1664,webapp-asquire-mox,srinivas-b89febe6,breath,5,,wav,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_breath_5_NA,txt,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85
1665,webapp-asquire-mox,srinivas-b89febe6,aa,2,,wav,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.wav,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA,txt,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.txt,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85
1666,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,wav,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.wav,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2,txt,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.txt,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78
1667,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78


In [None]:
import functools

class AnotDF(DataStaticInfo):
  

    def __init__(self, all_files_src_df: pd.DataFrame) -> None:
        # self.VER = ver

        self.ALL_FILES_DF = all_files_src_df


    def filter_file_df(self, keys: list) -> pd.DataFrame:

        df = self.ALL_FILES_DF.copy()
        filts = [df[self.fkeys['FNAME']].str.contains(k) for k in keys]
        filt = functools.reduce(lambda p, c: p & c, filts)

        return df.loc[filt]
    
    def read_aud_anot(self, finfo: pd.Series):
        df = pd.read_csv(finfo[f"anot--{self.fkeys['FPATH']}"], sep="\t", names=["start", "end", "label"])
        df["line_number"] = range(1, len(df) + 1)
        df["dur"] = df["end"] - df["start"]
        return pd.concat([finfo.to_frame().T]*len(df), ignore_index=True, axis=0).join(df)

    def get_aud_anot_df(self):

        filt = self.ALL_FILES_DF[self.fkeys['FFMT']] == "wav"
        audio_anot_file_df = self.ALL_FILES_DF.loc[filt].copy()

        # return audio_anot_file_df
        aud_anot = [
            self.read_aud_anot(af)
            for _, af in audio_anot_file_df.iterrows()
        ]
        
        audio_anot_df = pd.concat(aud_anot).copy()

        audio_anot_df.to_csv("asq_anotes_df.csv", index=False)

        return audio_anot_df


audio_df = AnotDF(asqdata_meta_files.SOURCE_FILES_META_DF)
aud_df_c = audio_df.get_aud_anot_df(); aud_df_c

Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match,anot--file_format,...,anot--file_path,age,height,gender,weight,start,end,label,line_number,dur
0,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,...,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45,1.108703,1.673514,cc,1,0.564811
1,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,...,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45,2.267611,2.769665,cc,2,0.502054
2,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,...,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45,3.321924,3.798876,cc,3,0.476952
0,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA,txt,...,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45,2.179427,2.884962,ii,1,0.705535
1,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA,txt,...,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45,2.884962,3.450357,xx,2,0.565395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,wav,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.wav,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2,txt,...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78,11.066108,15.921470,aa,2,4.855362
0,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78,1.155114,2.026605,ii,1,0.871491
1,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78,2.026605,3.336422,xx,2,1.309817
2,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78,3.336422,4.481222,ii,3,1.144800


In [None]:
aud_df_c["file_class"].value_counts()

breath     1548
cough      1244
aa          523
ee          502
oo          484
uu          473
yee         454
ss          436
zz          434
sent         10
~noise~       5
Name: file_class, dtype: int64

In [None]:
aud_df_c.describe()

Unnamed: 0,start,end,line_number,dur
count,6113.0,6113.0,6113.0,6113.0
mean,14.27786,19.951941,3.957141,5.674081
std,14.425995,16.9253,3.877137,5.785718
min,0.0,0.904702,1.0,0.12733
25%,3.162811,7.924119,2.0,1.1232
50%,10.15093,14.795157,3.0,3.273275
75%,20.3333,26.964876,5.0,8.711493
max,114.899286,140.095914,34.0,38.327059


In [None]:
audio_df.ALL_FILES_DF

Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match,anot--file_format,anot--file_name,anot--file_path,age,height,gender,weight
0,webapp-asquire-mox,anish-02df6c90,cough,1,,wav,webapp-asquire-mox_anish-02df6c90_cough_1_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_cough_1_NA,txt,webapp-asquire-mox_anish-02df6c90_cough_1_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
1,webapp-asquire-mox,anish-02df6c90,breath,0,,wav,webapp-asquire-mox_anish-02df6c90_breath_0_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_breath_0_NA,txt,webapp-asquire-mox_anish-02df6c90_breath_0_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
2,webapp-asquire-mox,anish-02df6c90,ss,5,,wav,webapp-asquire-mox_anish-02df6c90_ss_5_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_ss_5_NA,txt,webapp-asquire-mox_anish-02df6c90_ss_5_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
3,webapp-asquire-mox,anish-02df6c90,yee,6,,wav,webapp-asquire-mox_anish-02df6c90_yee_6_NA.wav,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox_anish-02df6c90_yee_6_NA,txt,webapp-asquire-mox_anish-02df6c90_yee_6_NA.txt,report/audio_files_v5/anish-02df6c90/webapp-as...,20,160,m,45
4,webapp-asquire-mox-na,anish-02df6c90,meta-data,na,na,json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,report/audio_files_v5/anish-02df6c90/webapp-as...,webapp-asquire-mox-na_anish-02df6c90_meta-data...,meta.json,webapp-asquire-mox-na_anish-02df6c90_meta-data...,False,20,160,m,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1664,webapp-asquire-mox,srinivas-b89febe6,breath,5,,wav,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_breath_5_NA,txt,webapp-asquire-mox_srinivas-b89febe6_breath_5_...,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85
1665,webapp-asquire-mox,srinivas-b89febe6,aa,2,,wav,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.wav,report/audio_files_v5/srinivas-b89febe6/webapp...,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA,txt,webapp-asquire-mox_srinivas-b89febe6_aa_2_NA.txt,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85
1666,webapp-asquire-mox,shivendra-49fd0914,aa,4,2,wav,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.wav,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_aa_4_2,txt,webapp-asquire-mox_shivendra-49fd0914_aa_4_2.txt,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78
1667,webapp-asquire-mox,shivendra-49fd0914,breath,2,0,wav,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,webapp-asquire-mox_shivendra-49fd0914_breath_2_0,txt,webapp-asquire-mox_shivendra-49fd0914_breath_2...,report/audio_files_v5/shivendra-49fd0914/webap...,20,170,m,78


In [None]:
asqdata_meta_files.SOURCE_FILES_META_DF.columns.tolist()

['app_code',
 'sub_id',
 'file_class',
 'file_xindex',
 'score',
 'file_format',
 'file_name',
 'file_path',
 'file_match',
 'anot--file_format',
 'anot--file_name',
 'anot--file_path',
 'age',
 'height',
 'gender',
 'weight']

In [None]:
filt = asqdata_meta_files.SOURCE_FILES_META_DF["file_class"] == "undefined"
asqdata_meta_files.SOURCE_FILES_META_DF[filt]

Unnamed: 0,app_code,sub_id,file_class,file_xindex,score,file_format,file_name,file_path,file_match,anot--file_format,anot--file_name,anot--file_path,age,height,gender,weight


In [None]:
import librosa
import numpy as np
import soundfile as sf  

In [None]:
from functools import reduce

class FinalProcessDataFiles(DataStaticInfo):

    merge_cols = [
    # 'app_code',
    'sub_id',

    'age',
    'weight',
    'height',
    'gender',
    # 'score',
    ]


    drop_cols = ["file_format", "file_name", "file_class", "file_xindex", "file_match"]


    SOURCE_FILES_DF: pd.DataFrame
    FCLASS_DF_LIST: list 
    MASTER_DATA_DF: pd.DataFrame
    MASTER_DATA_EXPORT_PATH: str

    def __init__(self, all_src_file_df: pd.DataFrame, export_path: str) -> None:
        self.SOURCE_FILES_DF = all_src_file_df
        self.MASTER_DATA_EXPORT_PATH = export_path

        self.MASTER_DATA_DF = self.merge_fclass_dfs()

    AUDIO_EXPORT_FOLDER = f"{REPORT_FOLDER}/audio_files_v4"

    def merge_audio_files(self,fclass_df: pd.DataFrame):

        subjects = fclass_df[self.fkeys["SID"]].unique()

        merged_fclass_dfs = []
        for sub_id in subjects:

            # test
            # sub_id = "azra-796ced3b"
            # /test


            export_folder = f"{self.AUDIO_EXPORT_FOLDER}/{sub_id}"; mkdir(export_folder)


            filt = fclass_df[self.fkeys["SID"]] == sub_id
            sub_df = fclass_df[filt]

            sub_df = sub_df.sort_values(self.fkeys["FCIDX"])
                
            if len(sub_df) < 1:
                continue


            elif ".wav" in sub_df.iloc[0][self.fkeys["FNAME"]]:

                fname = os.path.splitext(sub_df.iloc[0][self.fkeys["FNAME"]])[0]
                export_fpath = f"{export_folder}/{fname}"

                # if os.path.exists(export_fpath):
                #     continue
                
                extend_duration = 0
                extend_anot_dfs = []
                extended_aud_signal = np.ndarray([])

                for i, file in sub_df.iterrows():

                    data_fpath = file[self.fkeys["FPATH"]]
                    anot_fpath = file[f"anot--{self.fkeys['FPATH']}"]

                    signal = librosa.load(data_fpath, sr=44100)[0]
                    extended_aud_signal = np.append(extended_aud_signal, signal)

                    anot_df = pd.read_csv(anot_fpath, 
                                        names=["start", "end", "label"], 
                                        sep="\t", header=None)

                    # test
                    # sf.write(f"./test_aud_{i}.wav", signal, 44100)
                    # anot_df.to_csv(f"./test_anot_{i}.txt", index=False, header=False, sep="\t")
                    # /test

                    # UPDATE ANNOTATION TIME STAMPS
                    anot_df["start"] +=  extend_duration
                    anot_df["end"] += extend_duration

                    # UPDATE DURATION AFTER EXTENDING SIGNAL
                    extend_duration += (signal.shape[0] / 44100)

                    # ANOTATION DATAFRAMES
                    extend_anot_dfs.append(anot_df)

                extended_aud_signal = librosa.util.normalize(extended_aud_signal)
                extended_anot_df = pd.concat(extend_anot_dfs)
                
                

                sf.write(f"{export_fpath}.wav", extended_aud_signal, 44100)
                extended_anot_df.to_csv(f"{export_fpath}.txt", index=False, header=False, sep="\t")

            else:
                fname = (sub_df.iloc[0][self.fkeys["FNAME"]])
                export_fpath = f"{export_folder}/{fname}"

                shutil.copy(sub_df.iloc[0][self.fkeys["FPATH"]], f"{export_fpath}")
            
                

            merged_fclass_dfs.append(sub_df.head(1))

            # test
            # break

        return pd.concat(merged_fclass_dfs, axis=1) if len(merged_fclass_dfs) > 0 else fclass_df

    def seperate_fclass(self):

        fclass_DFs = []
        fclasses = self.SOURCE_FILES_DF[self.fkeys["FCLASS"]].unique()

        print(fclasses)
        for fclass in fclasses[:]:

            filt = self.SOURCE_FILES_DF[self.fkeys["FCLASS"]] == fclass
            fclass_df = self.SOURCE_FILES_DF[filt]

            # if len(fclass_df) > 0:
            #     print("processing: ", fclass,  end=" ")
            #     fclass_df =  self.merge_audio_files(fclass_df)
            #     print("done")
            

            fclass_df.columns = fclass_df.columns.str.replace("file", f"{fclass}--file")

            fclass_DFs.append(fclass_df)

            #test
            # break

        return fclass_DFs
    


    def merge_fclass_dfs(self):

        fclass_DFs = self.seperate_fclass()
        

        df_merged = reduce(lambda  left, right: pd.merge(left, right, on=self.merge_cols, how='outer'), fclass_DFs)

        df_cols = list(df_merged.columns.delete(df_merged.columns.str.contains("|".join(self.drop_cols))))

        df_merged = df_merged[df_cols]

        df_merged = df_merged.fillna(False)
        df_merged = df_merged.replace(False, "-")

        df_merged.sort_values(["gender", "age"], inplace=True)

        df_merged.reset_index(inplace=True)
        # df_merged.drop(columns=["index"], inplace=True)
        df_merged.index.name = "index"

        # df_merged = df_merged[self.col_order]

        # print(list(df_merged.columns))

        rm_cols = df_merged.columns.str.contains("|".join(["app_code", "score"]))

        df_merged = df_merged.drop(columns=df_merged.columns[rm_cols])
        df_merged.to_csv(self.MASTER_DATA_EXPORT_PATH, index=True)

        return df_merged

export_path = f"{REPORT_FOLDER}/asqcorpus_muster.csv"
pnoidata_final_process = FinalProcessDataFiles(asqdata_meta_files.SOURCE_FILES_META_DF, export_path)
# pnoidata_final_process.seperate_fclass()

# pnoidata_final_process.SOURCE_FILES_DF

pnoidata_final_process.MASTER_DATA_DF

['cough' 'breath' 'ss' 'yee' 'meta-data' 'uu' 'zz' 'aa' 'ee' 'oo' 'sent'
 '~noise~']


  df_merged = reduce(lambda  left, right: pd.merge(left, right, on=self.merge_cols, how='outer'), fclass_DFs)


Unnamed: 0_level_0,index,sub_id,cough--file_path,anot--cough--file_path,age,height,gender,weight,breath--file_path,anot--breath--file_path,...,aa--file_path,anot--aa--file_path,ee--file_path,anot--ee--file_path,oo--file_path,anot--oo--file_path,sent--file_path,anot--sent--file_path,~noise~--file_path,anot--~noise~--file_path
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,162,sonamgurjar-bdb0ad0d,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,12,137,f,18,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,-,-,-,-
1,6,jayasurya-99f989e3,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,13,150,f,35,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,...,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,report/audio_files_v5/jayasurya-99f989e3/webap...,-,-,-,-
2,101,aditisatvika-dd09b820,report/audio_files_v5/aditisatvika-dd09b820/we...,report/audio_files_v5/aditisatvika-dd09b820/we...,16,170,f,40,-,-,...,report/audio_files_v5/aditisatvika-dd09b820/we...,report/audio_files_v5/aditisatvika-dd09b820/we...,report/audio_files_v5/aditisatvika-dd09b820/we...,report/audio_files_v5/aditisatvika-dd09b820/we...,report/audio_files_v5/aditisatvika-dd09b820/we...,report/audio_files_v5/aditisatvika-dd09b820/we...,-,-,-,-
3,50,sheen-3d38047e,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,17,160,f,58,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,...,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,report/audio_files_v5/sheen-3d38047e/webapp-as...,-,-,-,-
4,133,neha-d4225d24,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,17,150,f,41,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,...,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,report/audio_files_v5/neha-d4225d24/webapp-asq...,-,-,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,103,harisk-6f82d610,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,58,175,m,88,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,...,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,report/audio_files_v5/harisk-6f82d610/webapp-a...,-,-,-,-
199,47,arhaam-350fc3e6,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,6,114,m,30,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,...,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,report/audio_files_v5/arhaam-350fc3e6/webapp-a...,-,-,-,-,-,-
200,120,ravisk-cd186836,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,60,177,m,83,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,...,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,report/audio_files_v5/ravisk-cd186836/webapp-a...,-,-,-,-
201,192,srinivas-b89febe6,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,60,172,m,85,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,...,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,report/audio_files_v5/srinivas-b89febe6/webapp...,-,-,-,-


In [None]:
pnoidata_final_process.MASTER_DATA_DF.columns.tolist()

['index',
 'sub_id',
 'cough--file_path',
 'anot--cough--file_path',
 'age',
 'height',
 'gender',
 'weight',
 'breath--file_path',
 'anot--breath--file_path',
 'ss--file_path',
 'anot--ss--file_path',
 'yee--file_path',
 'anot--yee--file_path',
 'meta-data--file_path',
 'anot--meta-data--file_path',
 'uu--file_path',
 'anot--uu--file_path',
 'zz--file_path',
 'anot--zz--file_path',
 'aa--file_path',
 'anot--aa--file_path',
 'ee--file_path',
 'anot--ee--file_path',
 'oo--file_path',
 'anot--oo--file_path',
 'sent--file_path',
 'anot--sent--file_path',
 '~noise~--file_path',
 'anot--~noise~--file_path']

In [None]:
pnoidata_final_process.seperate_fclass()[1]['sub_id'].value_counts()

['cough' 'breath' 'ss' 'yee' 'meta-data' 'uu' 'zz' 'aa' 'ee' 'oo' 'sent'
 '~noise~']


anish-02df6c90              1
takemi-bcc845d0             1
arjundeori-b1524a19         1
dkumar-2377e2ca             1
amishasinha-94b8bf22        1
                           ..
karthiksvasisht-e7d9c436    1
aryan-7638ca98              1
tony-3dc3d463               1
wajiha-0a4be51c             1
shivendra-49fd0914          1
Name: sub_id, Length: 170, dtype: int64

In [None]:
pnoidata_final_process.MASTER_DATA_DF[pnoidata_final_process.MASTER_DATA_DF["sub_id"] == "sonamgurjar-bdb0ad0d"]

Unnamed: 0_level_0,index,sub_id,cough--file_path,anot--cough--file_path,age,height,gender,weight,breath--file_path,anot--breath--file_path,...,aa--file_path,anot--aa--file_path,ee--file_path,anot--ee--file_path,oo--file_path,anot--oo--file_path,sent--file_path,anot--sent--file_path,~noise~--file_path,anot--~noise~--file_path
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,162,sonamgurjar-bdb0ad0d,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,12,137,f,18,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,report/audio_files_v5/sonamgurjar-bdb0ad0d/web...,-,-,-,-


In [None]:
pnoidata_final_process.MASTER_DATA_DF.columns.to_list()

['index',
 'sub_id',
 'cough--file_path',
 'anot--cough--file_path',
 'age',
 'height',
 'gender',
 'weight',
 'breath--file_path',
 'anot--breath--file_path',
 'ss--file_path',
 'anot--ss--file_path',
 'yee--file_path',
 'anot--yee--file_path',
 'meta-data--file_path',
 'anot--meta-data--file_path',
 'uu--file_path',
 'anot--uu--file_path',
 'zz--file_path',
 'anot--zz--file_path',
 'aa--file_path',
 'anot--aa--file_path',
 'ee--file_path',
 'anot--ee--file_path',
 'oo--file_path',
 'anot--oo--file_path',
 'sent--file_path',
 'anot--sent--file_path',
 '~noise~--file_path',
 'anot--~noise~--file_path']

In [None]:
pnoidata_files = DataExtractFiles(DATA_PATH, ver="*")
asqdata_source_files = DataSourceFiles(pnoidata_files.ALL_FILES_DF)
pnoidata_pft_files = ProcessPFTFiles(asqdata_source_files.SOURCE_FILES_DF)
asqdata_meta_files = ProcessMETAFiles(pnoidata_pft_files.SOURCE_FILES_PFT_DF)

export_path = f"{REPORT_FOLDER}/pnoicorpus_muster.csv"
pnoidata_final_process = FinalProcessDataFiles(asqdata_meta_files.SOURCE_FILES_META_DF, export_path)

pnoidata_final_process.MASTER_DATA_DF

NameError: name 'ProcessPFTFiles' is not defined