In [4]:
# imports
import matplotlib.pyplot as plt 
%matplotlib inline
import pandas as pd
import glob
import os
import csv
import re
import more_itertools as mit

In [None]:
os.getcwd()

In [None]:
# extract all files in the annotation folder and write to a list
path = "../../data/annotations/csv/"

fileScanner = os.scandir(path)
AnnFiles = []
for file in fileScanner:
    if file.is_file():
        AnnFiles.append(file.name)

In [None]:
# show one name
AnnFiles[0:5]

In [None]:
# make sure path is correct
f'../../data/annotations/csv/{AnnFiles[314]}'

In [None]:
# loop over each AnnFiles and transpose files as needed 
import time
start_time = time.time()

for i in range(len(AnnFiles)):
    
    try:
    
        df = pd.read_csv(f'../../data/annotations/csv/{AnnFiles[i]}', header=None)

        if len(df) == 5: 
            df = df.T
            df.to_csv(f'../../data/annotations/csv/{AnnFiles[i]}', header=False, index=False)

        else:
            continue
    except:
        
        print(i)
        
print("--- %s seconds ---" % (time.time() - start_time))

In [15]:
def delete_keys(d):

        del_e = []
        for event in d:
                del_k = [[event, k] for k in d[event] if  (not ('start' in d[event][k] and 'stop' in d[event][k]))]
                if not d[event]:
                        del_e.append(event)
        


        for k in del_k: del d[k[0]][k[1]]
        for e in del_e: del d[e]

      
 

In [27]:
"""Accessing and interacting with Record files"""

class Record():
    """Class for interacting with record files"""

    def __init__(self, record_name: str) -> None:
        """Initialzes the record class"""
        self.record_name = record_name
        # self.uc = {} 
        # self.dece = {}
        # self.decv = {}
        # self.decl = {}
        # self.decp = {}
        # self.acc = {}
        # self.tc = {}
        # self.bc = {}
        self.ann = { 'UC': {},
                     'DEC_EARLY': {},
                     'DEC_VAR': {},
                     'DEC_LATE': {},
                     'DEC_PROLONG': {},
                     'ACC': {},
                     'TACHY': {},
                     'BRADY': {}
                     }

        self.decel_map = {'E':'DEC_EARLY',
              'V':'DEC_VAR',
              'L':'DEC_LATE',
              'P':'DEC_PROLONG'}


        self._signalDf = pd.read_csv(f'../../data/database/signals/{record_name}.csv', na_values=['0.0'])

        # Call to get annotations
        self.__getannotations(self.record_name)

        # call to static method to create uc_dict
  


    def __getannotations(self, record_name) -> None:
        """Gets the annotations from the annotation csv file
        and populates the appropriate annotation list"""

        
        with open(f'../../data/annotations/csv/annotation_{record_name}.csv', newline='',
        encoding='UTF-8') as csvfile:
            annreader = csv.reader(csvfile, delimiter=',')
            for i, row in enumerate(annreader):
                                
                joined_row = ''.join(row)
                ##### transpose joined_row if data is in a single row
                
                if 'UC' in joined_row:
                    for ann in re.findall(r'[\(\)]UC\d+', joined_row):
                        if ann[0] == '(':
                            self.ann['UC'][ann[1:]] = {'start': i} 
                        if ann[0] == ')' and ann[1:] in self.ann['UC'].keys():
                            self.ann['UC'][ann[1:]]['stop'] = i       
                if 'DEC' in joined_row:
                    for ann in re.findall(r'[\(\)]DEC\w+', joined_row):
                        if ann[0] == '(':
                            self.ann[self.decel_map[ann[-1]]][ann[1:]] = {'start': i}
                            # # self.dec[ann[1:]] = [i]  
                            # self.dec[ann[-1]][ann[1:]] = [i] 
                        if ann[0] == ')' and ann[1:] in self.ann[self.decel_map[ann[-1]]].keys():
                            self.ann[self.decel_map[ann[-1]]][ann[1:]]['stop'] = i
                if 'ACC' in joined_row:
                    for ann in re.findall(r'[\(\)]ACC\d+', joined_row):
                        if ann[0] == '(':
                            self.ann['ACC'][ann[1:]] = {'start': i} 
                        if ann[0] == ')' and ann[1:] in self.ann['ACC'].keys():
                            self.ann['ACC'][ann[1:]]['stop'] = i  
                if 'TC' in joined_row:
                    for ann in re.findall(r'[\(\)]TC\d+', joined_row):
                        if ann[0] == '(':
                            self.ann['TACHY'][ann[1:]] = {'start': i}   
                        if ann[0] == ')' and ann[1:] in self.ann['TACHY'].keys():
                            self.ann['TACHY'][ann[1:]]['stop'] = i  
                if 'BC' in joined_row:
                    for ann in re.findall(r'[\(\)]BC\d+', joined_row):
                        if ann[0] == '(':
                            self.ann['BRADY'][ann[1:]] = {'start': i}  
                        if ann[0] == ')' and ann[1:] in self.ann['BRADY'].keys():
                            self.ann['BRADY'][ann[1:]]['stop'] = i
                
        # for event in self.ann: delete_keys(self.ann[event])
        delete_keys(self.ann)
        self.add_labels()

    def add_labels(self):

        event_dict = { 'NONE': 0,
                     'DEC_EARLY': 1,
                     'DEC_VAR': 2,
                     'DEC_LATE': 3,
                     'DEC_PROLONG': 4,
                     'ACC': 5,
                     'TACHY': 6,
                     'BRADY': 7
        }


        for uc_key in self.ann['UC']:
            uc_start, uc_stop = (self.ann['UC'][uc_key]['start'],self.ann['UC'][uc_key]['stop'])

            event_keys = list(self.ann.keys())
            event_keys.remove('UC')
            for event in event_keys:
                for event_key in self.ann[event]:
                    estart, estop = (self.ann[event][event_key]['start'],self.ann[event][event_key]['stop'])
                    if (estart >= uc_start and estart < uc_stop) or (estop >= uc_start and estop < uc_stop):
                        label = event_dict[event]
                        self.ann['UC'][uc_key]['label'] = label
                        # print(label)  
            if not 'label' in self.ann['UC'][uc_key].keys():
                self.ann['UC'][uc_key]['label'] = 0

    def plotUC(self, ucNum: int):
            plotNum = 'UC'+str(ucNum)
            if plotNum in self.uc.keys():
                start, end = self.uc[plotNum]
                self.createPlot(start, end, plotNum)
                plt.show()        

    def createPlot(self, start: int, end: int, plotID: str):
        """Plots the UC contraction and FHR specified by ucNum"""
        
        x = self._signalDf['seconds'][start:end].to_numpy()
        y_uc = self._signalDf['UC'][start:end].interpolate(method='linear').to_numpy()
        y_fhr = self._signalDf['FHR'][start:end].interpolate(method='linear').to_numpy()

#            y_uc = self._signalDf['UC'][start:end].to_numpy()
#            y_fhr = self._signalDf['FHR'][start:end].to_numpy()
        
        # FHR subplot
        fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True)
        axs[0].set_title(f'Record {self.record_name} {plotID}')
        axs[0].plot(x, y_fhr, '#1f77b4')
        axs[0].set_xlim(start+2, end+2)
        axs[0].set_ylabel('FHR')
        axs[0].set_ylim(0, 220)
        # Uterine Contraction subplot
        axs[1].plot(x, y_uc, '#ff7f0e')
        axs[1].set_xlim(start+2, end+2)
        axs[1].set_ylim(0, 140)
        axs[1].set_ylabel('Uterine Contraction')
        fig.set_size_inches(8, 4)

    def _savePlot(self, start, end, plotID):
#         """Plots the UC contraction and FHR specified by ucNum"""
#         plotNum = ucStr
#         if plotNum in self.uc.keys():
#             start, end = self.uc[plotNum]
#             x = self._signalDf['seconds'][start:end].to_numpy()
#             y_uc = self._signalDf['UC'][start:end].interpolate(method='linear').to_numpy()
#             y_fhr = self._signalDf['FHR'][start:end].interpolate(method='linear').to_numpy()
            
#             fig, ax = plt.subplots(nrows=1, ncols=1)
#             ax.set_title(f'Record {self.record_name} {plotNum}')
#             ax.plot(x, y_uc, '#1f77b4', x, y_fhr, '#ff7f0e')
#             ax.set_xlim(start+2, end+2)
#             ax.set_ylim(0, 220)
#             fig.set_size_inches(18, 8)

            self.createPlot(start, end, plotID)

            fig = plt.gcf()
            fig.set_size_inches(18, 8)
            fig.savefig(f"../../data/images/record_{self.record_name}_{ucStr}.png", bbox_inches='tight')
            plt.close(fig)

    def saveImages(self):
        """Saves the images in ../../data/images/{record_name}"""
        missingdata = self.findMissing()
        
        for key in self.uc:
            if int(key[2:]) not in missingdata:
                start, end = self.uc[key]
                self._savePlot(start, end, key)
          
        print(f"Images for Record {self.record_name} have been saved.")
    
                
    def clearImages(self):
        """Deletes the images in ../../data/images/{record_name}"""
        imgs = glob.glob(f'../../data/images/record_{self.record_name}/*.png', recursive=True)

        for img in imgs:
            try:
                os.remove(img)
            except OSError as e:
                print("Error: %s : %s" % (img, e.strerror))
                
    def findMissing(self):
        NA_list = []

        # gets all null index values within contraction times
        for i in range(len(self.ann['UC'])):

            annotation_start = list(self.ann['UC'].items())[i][1][0]
            annotation_end = list(self.ann['UC'].items())[i][1][1]

            rec_df = self._signalDf['FHR'][annotation_start:annotation_end]
            NA_list.append(list(rec_df[rec_df.isna()].index))

        # finds consecutive index of null values
        null_list = []
        for i in range(len(NA_list)): 

            temp_list = NA_list[i]
            null_groups = [list(group) for group in mit.consecutive_groups(temp_list)] #gets consecutive numbers
            null_list.append(null_groups)

        # searches for over 15 seconds of consecutive loss
        uc_list = []
        for i1 in range((len(null_list))):
            for i2 in range(len(null_list[i1])):
                if len(null_list[i1][i2]) > 60:
                    new_val = i1+1 # contraction with missing data
                    uc_list.append(new_val)

        return uc_list

In [28]:
### testing - HK

rec = Record('1035')
rec.ann

{'UC': {'UC1': {'start': 1694, 'stop': 1976, 'label': 0},
  'UC2': {'start': 2519, 'stop': 2747, 'label': 0},
  'UC4': {'start': 3102, 'stop': 3417, 'label': 0},
  'UC3': {'start': 3739, 'stop': 3939, 'label': 0},
  'UC5': {'start': 4800, 'stop': 4974, 'label': 0},
  'UC6': {'start': 5458, 'stop': 5758, 'label': 0},
  'UC7': {'start': 7068, 'stop': 7294, 'label': 0},
  'UC8': {'start': 8743, 'stop': 9226, 'label': 0},
  'UC9': {'start': 9828, 'stop': 9984, 'label': 0},
  'UC10': {'start': 11164, 'stop': 11692, 'label': 0},
  'UC11': {'start': 11833, 'stop': 11999, 'label': 0},
  'UC12': {'start': 12320, 'stop': 12422, 'label': 0},
  'UC13': {'start': 12621, 'stop': 12698, 'label': 1},
  'UC14': {'start': 12911, 'stop': 13004, 'label': 0},
  'UC15': {'start': 13176, 'stop': 13277, 'label': 0},
  'UC16': {'start': 13715, 'stop': 13951, 'label': 5},
  'UC17': {'start': 14039, 'stop': 14303, 'label': 5},
  'UC18': {'start': 14400, 'stop': 14777, 'label': 0},
  'UC19': {'start': 14879, 'sto

In [None]:
if not rec.ann['TACHY']:
    print('Y')

In [None]:
rec.plotUC(17)

In [None]:
record_dict = rec.ann

uc_start, uc_stop = record_dict['UC']['UC13']

# NONE - 0
# ACC  - 1
# DECE - 2
# DECV - 3
# DECL - 4
# DECP - 5

label = 0
event_dict = {'E':2,
              'V':3,
              'L':4,
              'P':5}



def add_labels(self.ann):
for key in record_dict["DEC"]:
    for dec in record_dict["DEC"][key]:
        estart, estop = record_dict["DEC"][key][dec]
        if (estart >= uc_start and estart < uc_stop) or (estop >= uc_start and estop < uc_stop):
            label = event_dict[key]
            self.ann['UC'][]
            print(label)  
      

In [None]:
record_numbers = list(map(lambda i: i[11:-4], AnnFiles))

error_list = []
for i in record_numbers:
    try: 
        rec = Record(i)
        rec.saveImages()

        del rec
    except:
        error_list.append(i)

In [None]:
# deletes bad records (printed above from the loop)
del AnnFiles[80] #DS.store
del AnnFiles[433] #1314 

In [None]:
# get record numbers
record_numbers = list(map(lambda i: i[11:-4], AnnFiles))
len(record_numbers)

In [None]:
# gets every single plot (just prints - need to figure out how to save)

error_list = [] # no errors
for i in record_numbers:
    rec = Record(str(i))
    uc_num = len(rec.ann['UC'])
    try: 
        for i in range(uc_num): 
            rec.plotUC(i)
    except:
        error_list.append(i)

In [None]:
# UCs over 100 (1172 UC21)

rec = Record('1172')
rec.plotUC(21)

In [None]:
rec._signalDf['UC'].max() # can we show above 100?? current y-limit at 100

In [None]:
# missing data - 15 consecutive seconds of data loss for UC or FHR (1018 UC21)
rec = Record('1018')

for i in range(len(list(rec.ann['UC']))):
    rec.plotUC(i)

In [None]:
def findMissing(record):
    rec = Record(str(record))

    NA_list = []

    # gets all null index values within contraction times
    for i in range(len(rec.ann['UC'])):

        annotation_start = list(rec.ann['UC'].items())[i][1][0]
        annotation_end = list(rec.ann['UC'].items())[i][1][1]

        rec_df = rec._signalDf['FHR'][annotation_start:annotation_end]
        NA_list.append(list(rec_df[rec_df.isna()].index))

    import more_itertools as mit

    # finds consecutive index of null values
    null_list = []
    for i in range(len(NA_list)): 

        temp_list = NA_list[i]
        null_groups = [list(group) for group in mit.consecutive_groups(temp_list)] #gets consecutive numbers
        null_list.append(null_groups)

    # searches for over 15 seconds of consecutive loss
    uc_list = []
    for i1 in range((len(null_list))):
        for i2 in range(len(null_list[i1])):
            if len(null_list[i1][i2]) > 60:
                new_val = i1+1 # contraction with missing data
                uc_list.append(new_val)
                
    return uc_list

In [None]:
findMissing(1001)

In [None]:
rec.ann['UC']

In [None]:
int_df = rec._signalDf.interpolate(method='linear')

In [None]:
rec._signalDf = int_df

In [None]:
rec.plotUC(5)

In [None]:
rec.acc

In [None]:
import numpy as np
from scipy.interpolate import interp1d
uc = rec1001._signalDf['UC']
fhr = rec1001._signalDf['FHR']
# uc.interpolate(method='spline', order=3)
# uc


In [None]:
uc.last_valid_index()

In [None]:
#fhr.iloc[19017]

In [None]:
uc.iloc[14938]

In [None]:
rec1018._signalDf

In [None]:

rec1018._signalDf['UC'] = pd.Series(np.interp(np.flatnonzero(t), np.flatnonzero(~t), uc[~t]))

In [None]:
rec1018._signalDf

In [None]:
rec1001.plotUC(2)

In [None]:
rec1018.clearImages()

In [None]:
rec1018.saveImages()

In [None]:
record_nums = map(lambda x: x[-8:-4], os.listdir("../../data/annotations/csv"))
img_dirs = list(map(lambda x: x[-4:], glob.glob(f'../../data/images/*', recursive=False)))

In [None]:
for rec in record_nums:
    if rec in img_dirs:
        record = Record(rec)
        record.clearImages()
        record.saveImages()
        del record