#### Add defect depths to parsed XML csv
----------------------------
Version 1

### Imports

In [1]:
import numpy as np
import pandas as pd

import os
from os import listdir

### File Paths

Set file paths to directories containing the csv outputs, parsed XML csv, and desired output for cleaned file

In [2]:
# Set to directory containing the dicoms and ouput folder
output_fp = "/Users/nicolebrye/Desktop/HGC/Data_Management/HFA3_60_4_Defect_Tool/AF_DCM_Matlab20210818-Z7VjKdO6X4"

# Set to directory containing the parsed XML csv file, include the name of the file in the file path
csv_fp = "/Users/nicolebrye/Desktop/HGC/Data_Management/HFA3_60_4_Defect_Tool/testConvertedXML60-4.csv"

# Set to directory where the new csv will be written to
out_fp = "/Users/nicolebrye/Desktop/HGC/Data_Management/HFA3_60_4_Defect_Tool"

### Reading in files

Files are read in, tests are run to ensure paths are set correctly

In [3]:
path = os.path.join(output_fp, "output")

try:
    files = [file for file in listdir(path) if file.endswith(".csv")]
except FileNotFoundError:
    print("Output folder directory error - double check and make sure output_fp is pointing to the right spot")
    
try:
    temp = pd.read_csv(csv_fp)
except FileNotFoundError:
    print("Parsed XML directory error - double check and make sure csv_fp is pointing to the right spot")

DATA = temp.copy()

### Adding defect depths to parsed XML

Files are looped over, each defect depth column is added as a row to the parsed XML data frame

In [4]:
for i, x in enumerate(files):
    
    # Read in the exam data
    dat = pd.read_csv(os.path.join(path, x))
    
    # Save important attributes for exam data
    date  = str(pd.to_datetime(dat["ExamDate"][0]).date())
    ID    = dat["PatientID"][0]
    time  = dat["ExamTime"][0]
    depth = dat["defect_depth"]
    
    # Determine which columns to replace data for
    start = np.where(DATA.columns == "N30_S42_Defect")[0][0]
    end   = np.where(DATA.columns == "T18_I54_Defect")[0][0] + 1
    
    DATA.loc[(DATA.PATIENT_ID == ID) &
             (DATA.VISIT_DATE == date) &
             (DATA.EXAM_TIME == time), 
             DATA.columns[start:end]] = list(depth)
    

### Write to csv

File with defect depths added is written to a csv, it will be saved at the specified out path

In [5]:
try: 
    DATA.to_csv(os.path.join(out_fp, "defect_depths.csv"), index = False)
except FileNotFoundError:
    print("Csv output directory error - double check and make sure out_fp is pointing to the right spot")
    