# PREPARE DATASET FOR PNOI EXPERIMENTS

In [175]:
import pandas as pd
import numpy as np
import functools
import matplotlib.pyplot as plt
import librosa
import os
from tqdm import tqdm

mkdir = lambda p: 0 if os.path.exists(p) else (os.mkdir(p), 1)[1]

## PATHS to dataset

In [176]:
REPORTS = "reports"; mkdir(REPORTS) # Path to the reports folder

PNOI_CORPUS_CSV_NAME = "pnoicorpus_muster.csv" # Name of the csv file

PNOI_CORPUS_CSV_PATH = f"{REPORTS}/{PNOI_CORPUS_CSV_NAME}" # Path to the master csv file

## FILTER dataset

In [177]:
class DataFilter:

    EMPTY_VAL = '-'
    PNOI_MUSTER_DF: pd.DataFrame

    FILTER_DATA_CSV_FNAME = "filtered_dataset.csv"

    PNOI_FILT_DF: pd.DataFrame

    def __init__(self, muster_csv_path: str) -> None:
        pnoi_corpus_DF = pd.read_csv(muster_csv_path)
        pnoi_corpus_DF.columns = pnoi_corpus_DF.columns.str.replace("--file_path", "")

        self.PNOI_MUSTER_DF: pd.DataFrame = pnoi_corpus_DF

        self.PNOI_FILT_DF: pd.DataFrame = self.filter_df()

    def filter_df(self):
        # BREATH AUDIO (BA) Columns
        col_str_match = r"anot--LBA_before|anot--VBA_before" # match string
        ba_cols = self.PNOI_MUSTER_DF.columns[self.PNOI_MUSTER_DF.columns.str.contains(col_str_match)] # get columns that match string
        filts = [self.PNOI_MUSTER_DF[col] != self.EMPTY_VAL for col in ba_cols] # filters for non-empty values
        filt = functools.reduce(lambda p, c: p & c, filts) # combine filters

        pnoi_corpus_filt_DF = self.PNOI_MUSTER_DF[filt] # apply filter
        pnoi_corpus_filt_DF = pnoi_corpus_filt_DF.reset_index(drop=True) # reset index
        pnoi_corpus_filt_DF.to_csv(f"{REPORTS}/{self.FILTER_DATA_CSV_FNAME}") # save filtered dataframe to csv
        print(pnoi_corpus_filt_DF.shape)
        
        return pnoi_corpus_filt_DF
    
pnoidata_filter = DataFilter(PNOI_CORPUS_CSV_PATH)

pnoidata_filter.PNOI_FILT_DF.head()

(25, 47)


Unnamed: 0,index,app_code,sub_id,subjectName,subjectGender,subjectAge,subjectType,subjectHeight,subjectWeight,META,...,LBA_after_RU,anot--LBA_after_RU,LBA_after_LL,anot--LBA_after_LL,LBA_after_RL,anot--LBA_after_RL,PFT_before,anot--PFT_before,PFT_after,anot--PFT_after
0,0,pnoistor_feb2023,shreyamgupta_78aa423a,Shreyam Gupta,Female,19,Control,158,70,DATA_PNOISTOR/pnoistor_feb2023/shreyamgupta_78...,...,-,-,-,-,-,-,DATA_PNOISTOR/pnoistor_feb2023/shreyamgupta_78...,DATA_PNOISTOR/pnoistor_feb2023/shreyamgupta_78...,-,-
1,2,pnoistor_dec01,sannashoukat_5213fe84,Sanna Shoukat,Female,21,Control,166,55,DATA_PNOISTOR/pnoistor_dec01/sannashoukat_5213...,...,-,-,-,-,-,-,DATA_PNOISTOR/pnoistor_dec01/sannashoukat_5213...,DATA_PNOISTOR/pnoistor_dec01/sannashoukat_5213...,-,-
2,3,pnoistor_feb2023,sananaushad_80e84b51,Sana Naushad,Female,21,Control,158,60,DATA_PNOISTOR/pnoistor_feb2023/sananaushad_80e...,...,-,-,-,-,-,-,DATA_PNOISTOR/pnoistor_feb2023/sananaushad_80e...,DATA_PNOISTOR/pnoistor_feb2023/sananaushad_80e...,-,-
3,4,pnoistor_dec01,saikeerthanaarun_3364bc1a,Sai Keerthana Arun,Female,22,Control,172,58,DATA_PNOISTOR/pnoistor_dec01/saikeerthanaarun_...,...,-,-,-,-,-,-,-,-,-,-
4,6,pnoistor_dec01,kumarchowdam_53f32e31,Kumar Chowdam,Male,21,Control,162,60,DATA_PNOISTOR/pnoistor_dec01/kumarchowdam_53f3...,...,-,-,-,-,-,-,DATA_PNOISTOR/pnoistor_dec01/kumarchowdam_53f3...,DATA_PNOISTOR/pnoistor_dec01/kumarchowdam_53f3...,-,-


In [178]:
import soundfile as sf

In [179]:
class DataStaticInfo:

    VER = "*"
    SEP = "-"
    META_SEP = "_"
    EXT_SEP = "."
    ANOT_LABELS = ["aa", "ee", "uu", "oo", "ii", "xx", "bb1", "bb2", "bb3", "bb4"]

    fkeys = {
        "APP_CODE": "app_code",
        "SID":"sub_id",
        "FCLASS": "file_class",
        "FID": "file_ID",
        "COMNT": "file_comment",
        "FFMT": "file_format",
        "FNAME": "file_name",
        "FPATH": "file_path",
        "FMATCH": "file_match"
    }

In [180]:
class AudStaticData(DataStaticInfo):
    EMPTY_VAL = '-'
    FNAME_SEP = "-"
    ANOT_SEP = '\t'
    FS_k = "fs"
    BEGIN_k = "begin"; END_k = "end"; LABEL_k = "label"
    ANOTE_COLS = [BEGIN_k, END_k, LABEL_k]

    LUNG_LOCS = ["LU", "RU", "LL", "RL"]

    LBA_k = "LBA"; VBA_k = "VBA"; BA_k = "BA"
    
    AUD_TAG = "aud--"
    ANOT_TAG = "anot--"
    AUDIO_FPATH_k = f"audio--file_path"
    ANOT_FPATH_k = f"anot--file_path"

In [181]:
class AudioDataProcess(AudStaticData):

    DRY_RUN = False
    
    PAD_DUR = 0.2
    GAP_DUR = 12.0
    GAP_k = "gap"
    DUR_k = "duration"
    

    SIGNAL_k = "signal"
    LABEL_DF_k = "label_df"

    AUD_EXPORT_FOLDER = "data_export7"
    PNOI_SPLIT_AUD_CSV_PATH = "pnoiloc_split_aud.csv"

    
    PNOI_FILT_DF: pd.DataFrame
    PNOI_SPLIT_AUD_DF: pd.DataFrame
    
    def __init__(self, data_df: pd.DataFrame) -> None:
        self.PNOI_FILT_DF = data_df

        self.PNOI_SPLIT_AUD_DF = self.process_audio()


    def anot_breath_loc_chunks(self, audio_fp: str, anot_fp: str) -> list[pd.DataFrame]:
        """
        Split audio file into chunks based on duration b/w breaths in the audio file.
        The breaths are annotated in the annotation file.
        The annotation file is a tsv file exported from audacity.
        """

        anot_df: pd.DataFrame = pd.read_csv(anot_fp, sep=self.ANOT_SEP, names=self.ANOTE_COLS) 
        # Read annotation labeled with audacity tsv file

        anot_df[self.AUDIO_FPATH_k] = audio_fp
        anot_df[self.DUR_k] = anot_df[self.END_k] - anot_df[self.BEGIN_k] 
        # Calculate duration of each label marke in annotation

        anot_df[self.GAP_k] = anot_df[self.BEGIN_k].shift(-1) - anot_df[self.END_k] 
        # Calculate gap between previous label end and next label start: give the gap between labels

        gap_filt = (anot_df[self.GAP_k] > self.GAP_DUR) | (anot_df[self.GAP_k].isnull()) 
        # Filter rows where gap b/w labels is above the threshold (GAP)
        anot_gap_df = anot_df.loc[gap_filt] # Apply filter

        i_splits = sorted(set([0] + list(anot_gap_df.index + 1))) 
        # Get row index of those breakpoints (breath location chunks)

        breath_chunks = [anot_df.iloc[i_splits[n]:i_splits[n+1]] for n in range(len(i_splits) - 1)] 
        # Split chunks using the index of breakpoints

        # assert len(chunks) == 4 # check if there are 4 chunks

        return breath_chunks
    
    def extract_signal_chunk(self, chunks_DF: pd.DataFrame, is_plot=False):

        # Get begin and end values of chunk
        t_begin = chunks_DF.iloc[0][self.BEGIN_k]
        t_end = chunks_DF.iloc[-1][self.END_k]

        # Calculate offset (with Padding)
        offset = (t_begin - self.PAD_DUR)
        dur = (t_end - t_begin + 2*self.PAD_DUR)

        # Offset chunk DF
        chunks_DF.loc[:, self.ANOTE_COLS[:-1]] -= offset

        # label DF
        label_DF = chunks_DF.loc[:, self.ANOTE_COLS]
        
        # Extract audio signal from audio file
        audio_fp = chunks_DF.iloc[0][self.AUDIO_FPATH_k]
        signal, fs = librosa.load(audio_fp, sr=None, mono=True, offset=offset, duration=dur)

        # Normalize signal
        signal = librosa.util.normalize(signal)

        # PLOT signals
        if is_plot:
            plt.title(os.path.basename(audio_fp))
            plt.xlabel("t(s)"); plt.ylabel("amp")
            plt.plot(np.linspace(0, dur, len(signal)), signal)
            plt.stem(chunks_DF.iloc[:][self.BEGIN_k], np.ones(len(chunks_DF)))
            plt.stem(chunks_DF.iloc[:][self.END_k], np.ones(len(chunks_DF)), 'r')
            plt.show()

        return {
            self.FS_k: fs, # sampling frequency
            self.AUDIO_FPATH_k: audio_fp, # audio file path
            self.SIGNAL_k: signal, # audio signal
            self.LABEL_DF_k: label_DF, # label dataframe
            }
    
    def export_audio_signals(self, aud_info: dict, loc_i: int) -> tuple[str]:
        fname = os.path.basename(aud_info[self.AUDIO_FPATH_k]) # get filename

        '''
        ["app_code 0", "sub_id 1", "file_class 2", "file_ID 3", "comment 4", "file_format 5"]'''

        fn_parts = fname.split(self.FNAME_SEP) # split filename into parts
        n_fclass = fn_parts[2] # get filename class
        fn_parts[2] = n_fclass if self.LBA_k in n_fclass else f"{n_fclass}_{self.LUNG_LOCS[loc_i]}" # update filename class
        n_aud_fname = self.FNAME_SEP.join(fn_parts[:-1]) # join filename parts

        #create export folder
        export_folder_path = os.path.join(REPORTS, self.AUD_EXPORT_FOLDER); mkdir(export_folder_path)
        sub_folder_path = os.path.join(export_folder_path, fn_parts[1]); mkdir(sub_folder_path)

        
        # export audio file
        n_aud_fpath = os.path.join(sub_folder_path, f"{n_aud_fname}.wav")
        if not self.DRY_RUN: sf.write(n_aud_fpath, aud_info[self.SIGNAL_k], aud_info[self.FS_k]) # export audio file
        
        # export annotation file
        n_anotpath = os.path.join(sub_folder_path, f"{n_aud_fname}.txt")
        anot_df: pd.DataFrame = aud_info[self.LABEL_DF_k]
        if not self.DRY_RUN: anot_df.to_csv(n_anotpath, sep='\t', index=False, header=False) # export annotation file

        return {
            f"{self.AUDIO_FPATH_k}": n_aud_fpath, 
            f"{self.ANOT_FPATH_k}": n_anotpath,
            f"{self.fkeys['FCLASS']}": fn_parts[2],
            f"{self.fkeys['SID']}": fn_parts[1],
            }
        
    def process_audio(self):

        BA_str_match = f"{self.ANOT_TAG}{self.VBA_k}|{self.ANOT_TAG}{self.LBA_k}"
        filt = self.PNOI_FILT_DF.columns.str.contains(BA_str_match)
        BA_cols = self.PNOI_FILT_DF.columns[filt]
        

        for li, loc in enumerate(self.LUNG_LOCS):
            BA_cols = [ f"{li}~{col}" if loc in f"{'_'}~{col}" else col for col in BA_cols]

        for li, loc in enumerate(["before", "after"]):
            BA_cols = [ f"{li}~{col}" if loc in col else col for col in BA_cols]

        for li, loc in enumerate(["VBA", "LBA"]):
            BA_cols = [ f"{li}~{col}" if loc in col else col for col in BA_cols]

        # print(sorted(BA_cols))

        BA_cols = sorted(BA_cols)

        aud_info_dicts = []
        for _, sub_info in tqdm(self.PNOI_FILT_DF.iloc[:].iterrows()):

            all_breath_chunks = []
            for col in BA_cols:

                col = col.split('~')[-1]
                audio_fp = sub_info[col.replace(self.ANOT_TAG, '')]
                anote_fp = sub_info[col]

                if anote_fp == self.EMPTY_VAL: continue

                ba_chunks = self.anot_breath_loc_chunks(audio_fp, anote_fp)

                # all_breath_chunks.extend(ba_chunks)

                # print(BA_cols_n)
                for ci, chunk in enumerate(ba_chunks):
                    chunk_info = self.extract_signal_chunk(chunk, is_plot=False)

                    aud_info = self.export_audio_signals(chunk_info, ci)

                    aud_info_dicts.append(aud_info)

                    # break        
            # break

        aud_info_df = pd.DataFrame(aud_info_dicts)

        aud_info_df.to_csv(f"{REPORTS}/{self.PNOI_SPLIT_AUD_CSV_PATH}")

        return aud_info_df


        
pnoidata_audproc = AudioDataProcess(pnoidata_filter.PNOI_FILT_DF)

# pnoidata_audproc.process_audio()

  signal, fs = librosa.load(audio_fp, sr=None, mono=True, offset=offset, duration=dur)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
25it [00:25,  1.04s/it]


In [184]:
PNOILOC_SPLIT_AUD_CSV_PATH = "reports/pnoiloc_split_aud.csv"

class SyncBASignals(AudStaticData):

    DRY_RUN = False
    
    FS = 16000
    MATCH = "match"
    EXPORT_FOLDER = "pnoi_sync_data7"

    PLOT_EXPORT_FOLDER = "pnoi_sync_data_plot7"

    PNOI_SYNC_AUD_CSV_PATH = f"{REPORTS}/pnoi_sync_aud.csv"

    PNOI_SPLIT_AUD_DF: pd.DataFrame
    PNOI_SYNC_AUD_DF: pd.DataFrame
    
    def __init__(self, split_aud_csv_path: str) -> None:

        self.PNOI_SPLIT_AUD_DF = pd.read_csv(split_aud_csv_path)

        self.PNOI_SYNC_AUD_DF = self.process_ba_signals()

    # Plot signal and annotation
    def plot_sig(self, aud, anot, fs, scale=1.0):
        t = np.linspace(0, len(aud)/fs, len(aud))
        plt.stem(anot[self.BEGIN_k], np.ones(len(anot))*1.5, 'r')
        plt.stem(anot[self.END_k], np.ones(len(anot))*1.2, 'g')
        plt.plot(t, aud*scale)

    def join_ba_signals(self, rv, is_plot=False):

        # helper functions
        load_audio = lambda fp: librosa.load(fp, sr=self.FS, mono=True)[0] # Load audio signal
        pad_sig = lambda sig, max_len: np.pad(sig, (0, max_len - len(sig)), mode='constant') # Pad signal with zeros
        read_anot = lambda fp: pd.read_csv(fp, sep=self.ANOT_SEP, names=self.ANOTE_COLS) # Read annotation labeled with audacity tsv file

        # Read annotation
        vba_anot = read_anot(rv[f"{self.ANOT_FPATH_k}{self.META_SEP}{self.VBA_k}"])
        lba_anot = read_anot(rv[f"{self.ANOT_FPATH_k}{self.META_SEP}{self.LBA_k}"])

        # Load audio signal
        vba_sig = load_audio(rv[f"{self.AUDIO_FPATH_k}{self.META_SEP}{self.VBA_k}"])
        lba_sig = load_audio(rv[f"{self.AUDIO_FPATH_k}{self.META_SEP}{self.LBA_k}"])

        # Pad signals with zeros to make them equal length
        max_sig_len = max(len(vba_sig), len(lba_sig)) # get max length of the two signals

        vba_sig = pad_sig(vba_sig, max_sig_len)
        lba_sig = pad_sig(lba_sig, max_sig_len)

        

        # Join the two signals
        ba_sig = np.array([vba_sig, lba_sig]).T

        #create export folder
        fname = os.path.basename(rv[f"{self.AUDIO_FPATH_k}{self.META_SEP}{self.VBA_k}"]) # get filename
        n_filename = os.path.splitext(fname.replace(self.VBA_k, self.BA_k))[0] # remove extension and replace vba with ba
        
        export_folder_path = os.path.join(REPORTS, self.EXPORT_FOLDER); mkdir(export_folder_path)
        sub_folder_path = os.path.join(export_folder_path, rv[self.fkeys['SID']]); mkdir(sub_folder_path)

        # Export audio file
        audio_filename = os.path.join(sub_folder_path, f"{n_filename}.wav")
        if not self.DRY_RUN: 
            sf.write(audio_filename, ba_sig, self.FS) # export audio file

        # Export annotation file
        anot_filepath = os.path.join(sub_folder_path, f"{n_filename}.txt")
        if not self.DRY_RUN: 
            vba_anot.to_csv(anot_filepath, sep=self.ANOT_SEP, index=False, header=False)


        # plot signal and annotation
        if is_plot: 
            plt.figure(figsize=(140, 40))

            self.plot_sig(vba_sig, vba_anot, self.FS)
            self.plot_sig(lba_sig, lba_anot, self.FS, scale=0.2)

            plt.title(n_filename, fontsize=128)
            
            plot_folder_path = os.path.join(REPORTS, self.PLOT_EXPORT_FOLDER); mkdir(plot_folder_path)

            plt.savefig(os.path.join(plot_folder_path, f"{n_filename}.png"))
            print(plot_folder_path, f"{n_filename}.png")
            # plt.show()

            plt.close()

        return {
            self.FS_k: self.FS, # sampling frequency
            self.fkeys['SID']: rv[self.fkeys['SID']], # subject ID
            self.AUDIO_FPATH_k: audio_filename, # audio file path
            self.ANOT_FPATH_k: anot_filepath, # label dataframe
            }
    

    def process_ba_signals(self):
        subjects = self.PNOI_SPLIT_AUD_DF[self.fkeys['SID']].unique()

        ba_sync_aud_dicts = []
        for subject in tqdm(subjects[:]):
            filt = self.PNOI_SPLIT_AUD_DF[self.fkeys['SID']] == subject
            sub_df = self.PNOI_SPLIT_AUD_DF[filt]

            ba_match_str = f"{self.VBA_k}|{self.LBA_k}"
            sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)

            filt = sub_df[self.fkeys['FCLASS']].str.contains(self.VBA_k)
            sub_VBA = sub_df.loc[filt]
            sub_LBA = sub_df.loc[~filt]

            merge_cols = [self.MATCH, self.fkeys['SID']]
            merge_sufix = (f"_{self.VBA_k}", f"_{self.LBA_k}")
            sub_df2 = sub_VBA.merge(sub_LBA, how="inner", on=merge_cols, suffixes=merge_sufix)

            for _, rv in sub_df2.iterrows():
                ba_sync_aud_dicts += [self.join_ba_signals(rv, is_plot=True)]



        aud_info_df = pd.DataFrame(ba_sync_aud_dicts)

        aud_info_df.to_csv(f"{self.PNOI_SYNC_AUD_CSV_PATH}")

        return aud_info_df

pnoidata_audsync = SyncBASignals(PNOILOC_SPLIT_AUD_CSV_PATH)

  0%|          | 0/25 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_feb2023-shreyamgupta_78aa423a-BA_before_LU-694c.png
reports/pnoi_sync_data_plot7 pnoistor_feb2023-shreyamgupta_78aa423a-BA_before_RU-694c.png


  4%|▍         | 1/25 [00:13<05:22, 13.46s/it]

reports/pnoi_sync_data_plot7 pnoistor_feb2023-shreyamgupta_78aa423a-BA_before_LL-694c.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_dec01-sannashoukat_5213fe84-BA_before_LU-ad42.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-sannashoukat_5213fe84-BA_before_RU-ad42.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-sannashoukat_5213fe84-BA_before_LL-ad42.png


  8%|▊         | 2/25 [00:41<08:26, 22.02s/it]

reports/pnoi_sync_data_plot7 pnoistor_dec01-sannashoukat_5213fe84-BA_before_RL-ad42.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_feb2023-sananaushad_80e84b51-BA_before_LU-f6e2.png
reports/pnoi_sync_data_plot7 pnoistor_feb2023-sananaushad_80e84b51-BA_before_RU-f6e2.png
reports/pnoi_sync_data_plot7 pnoistor_feb2023-sananaushad_80e84b51-BA_before_LL-f6e2.png


 12%|█▏        | 3/25 [01:25<11:49, 32.26s/it]

reports/pnoi_sync_data_plot7 pnoistor_feb2023-sananaushad_80e84b51-BA_before_RL-f6e2.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_dec01-saikeerthanaarun_3364bc1a-BA_before_LU-cb51.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-saikeerthanaarun_3364bc1a-BA_before_RU-cb51.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-saikeerthanaarun_3364bc1a-BA_before_LL-cb51.png


 16%|█▌        | 4/25 [01:43<09:17, 26.53s/it]

reports/pnoi_sync_data_plot7 pnoistor_dec01-saikeerthanaarun_3364bc1a-BA_before_RL-cb51.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_dec01-kumarchowdam_53f32e31-BA_before_LU-91bc.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-kumarchowdam_53f32e31-BA_before_RU-91bc.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-kumarchowdam_53f32e31-BA_before_LL-91bc.png


 20%|██        | 5/25 [02:04<08:10, 24.53s/it]

reports/pnoi_sync_data_plot7 pnoistor_dec01-kumarchowdam_53f32e31-BA_before_RL-91bc.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_feb2023-darshanvshettar_843c416b-BA_before_LU-2815.png
reports/pnoi_sync_data_plot7 pnoistor_feb2023-darshanvshettar_843c416b-BA_before_RU-2815.png


 24%|██▍       | 6/25 [02:25<07:19, 23.16s/it]

reports/pnoi_sync_data_plot7 pnoistor_feb2023-darshanvshettar_843c416b-BA_before_LL-2815.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_feb2023-atifahmed_ebb5032c-BA_before_LU-fb27.png
reports/pnoi_sync_data_plot7 pnoistor_feb2023-atifahmed_ebb5032c-BA_before_RU-fb27.png
reports/pnoi_sync_data_plot7 pnoistor_feb2023-atifahmed_ebb5032c-BA_before_LL-fb27.png


 28%|██▊       | 7/25 [03:20<10:04, 33.61s/it]

reports/pnoi_sync_data_plot7 pnoistor_feb2023-atifahmed_ebb5032c-BA_before_RL-fb27.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_oct07-johnkiranborugada_e390538c-BA_before_LU-b62f.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-johnkiranborugada_e390538c-BA_before_RU-b62f.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-johnkiranborugada_e390538c-BA_before_LL-b62f.png


 32%|███▏      | 8/25 [03:48<09:01, 31.84s/it]

reports/pnoi_sync_data_plot7 pnoistor_oct07-johnkiranborugada_e390538c-BA_before_RL-b62f.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_oct07-amartyaveer_81b8f33c-BA_before_LU-c03e.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-amartyaveer_81b8f33c-BA_before_RU-c03e.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-amartyaveer_81b8f33c-BA_before_LL-c03e.png


 36%|███▌      | 9/25 [04:14<07:59, 29.94s/it]

reports/pnoi_sync_data_plot7 pnoistor_oct07-amartyaveer_81b8f33c-BA_before_RL-c03e.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_oct07-jesurajabandekar_7273cc8f-BA_before_LU-ed13.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-jesurajabandekar_7273cc8f-BA_before_RU-ed13.png


 40%|████      | 10/25 [04:30<06:25, 25.68s/it]

reports/pnoi_sync_data_plot7 pnoistor_oct07-jesurajabandekar_7273cc8f-BA_before_LL-ed13.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_oct07-saurabhkumar_92bef9b7-BA_before_LU-edb7.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-saurabhkumar_92bef9b7-BA_before_RU-edb7.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-saurabhkumar_92bef9b7-BA_before_LL-edb7.png


 44%|████▍     | 11/25 [04:48<05:29, 23.51s/it]

reports/pnoi_sync_data_plot7 pnoistor_oct07-saurabhkumar_92bef9b7-BA_before_RL-edb7.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_dec01-sagarkumar_b77e6507-BA_before_LU-f8bd.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-sagarkumar_b77e6507-BA_before_RU-f8bd.png
reports/pnoi_sync_data_plot7 pnoistor_dec01-sagarkumar_b77e6507-BA_before_LL-f8bd.png


 48%|████▊     | 12/25 [05:19<05:32, 25.59s/it]

reports/pnoi_sync_data_plot7 pnoistor_dec01-sagarkumar_b77e6507-BA_before_RL-f8bd.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_oct07-muralikadambi_3895019a-BA_before_LU-08c1.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-muralikadambi_3895019a-BA_before_RU-08c1.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-muralikadambi_3895019a-BA_before_LL-08c1.png


 52%|█████▏    | 13/25 [05:34<04:28, 22.39s/it]

reports/pnoi_sync_data_plot7 pnoistor_oct07-muralikadambi_3895019a-BA_before_RL-08c1.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_before_LU-9304.png
reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_before_RU-9304.png
reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_before_LL-9304.png
reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_before_RL-9304.png
reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_after_LU-a97f.png
reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_after_RU-a97f.png
reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_after_LL-a97f.png


 56%|█████▌    | 14/25 [06:13<05:03, 27.62s/it]

reports/pnoi_sync_data_plot7 pnoistor_apr2023-anitajoby_8bf5d711-BA_after_RL-a97f.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_before_LU-c74e.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_before_RU-c74e.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_before_LL-c74e.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_before_RL-c74e.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_after_LU-8748.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_after_RU-8748.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_after_LL-8748.png


 60%|██████    | 15/25 [07:22<06:39, 39.98s/it]

reports/pnoi_sync_data_plot7 pnoistor_may2023-valli_82ee8284-BA_after_RL-8748.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_before_LU-1132.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_before_RU-1132.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_before_LL-1132.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_before_RL-1132.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_after_LU-caee.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_after_RU-caee.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_after_LL-caee.png
reports/pnoi_sync_data_plot7 pnoistor_may2023-shivaganga_46c140d9-BA_after_RL-caee.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_before_LU-a3d0.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_before_RU-a3d0.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_before_LL-a3d0.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_before_RL-a3d0.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_after_LU-b578.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_after_RU-b578.png
reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_after_LL-b578.png


 68%|██████▊   | 17/25 [11:06<09:59, 74.99s/it]

reports/pnoi_sync_data_plot7 pnoistor_oct07-nanjazhakig_7876351b-BA_after_RL-b578.png


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.loc[:, self.MATCH] = sub_df[self.fkeys['FCLASS']].replace(ba_match_str, '', regex=True)


reports/pnoi_sync_data_plot7 pnoistor_dec01-vijayaomkar_b866b679-BA_before_LU-1970.png


In [183]:
pnoidata_audsync.PNOI_SPLIT_AUD_DF

Unnamed: 0.1,Unnamed: 0,audio--file_path,anot--file_path,file_class,sub_id
0,0,reports/data_export7/shreyamgupta_78aa423a/pno...,reports/data_export7/shreyamgupta_78aa423a/pno...,VBA_before_LU,shreyamgupta_78aa423a
1,1,reports/data_export7/shreyamgupta_78aa423a/pno...,reports/data_export7/shreyamgupta_78aa423a/pno...,VBA_before_RU,shreyamgupta_78aa423a
2,2,reports/data_export7/shreyamgupta_78aa423a/pno...,reports/data_export7/shreyamgupta_78aa423a/pno...,VBA_before_LL,shreyamgupta_78aa423a
3,3,reports/data_export7/shreyamgupta_78aa423a/pno...,reports/data_export7/shreyamgupta_78aa423a/pno...,LBA_before_LU,shreyamgupta_78aa423a
4,4,reports/data_export7/shreyamgupta_78aa423a/pno...,reports/data_export7/shreyamgupta_78aa423a/pno...,LBA_before_RU,shreyamgupta_78aa423a
...,...,...,...,...,...
275,275,reports/data_export7/lokeshk_90b4871a/pnoistor...,reports/data_export7/lokeshk_90b4871a/pnoistor...,VBA_before_RL,lokeshk_90b4871a
276,276,reports/data_export7/lokeshk_90b4871a/pnoistor...,reports/data_export7/lokeshk_90b4871a/pnoistor...,LBA_before_LU,lokeshk_90b4871a
277,277,reports/data_export7/lokeshk_90b4871a/pnoistor...,reports/data_export7/lokeshk_90b4871a/pnoistor...,LBA_before_RU,lokeshk_90b4871a
278,278,reports/data_export7/lokeshk_90b4871a/pnoistor...,reports/data_export7/lokeshk_90b4871a/pnoistor...,LBA_before_LL,lokeshk_90b4871a
