In [35]:
from openpyxl.styles import Color, PatternFill, Font, Border
from openpyxl.styles import colors
from openpyxl.cell import Cell

redFill = PatternFill(start_color='FFFF0000',
                   end_color='FFFF0000',
                   fill_type='solid')
blueFill = PatternFill(start_color='00FFFF',
                   end_color='00FFFF',
                   fill_type='solid')

## FUNCTIONS ##
"""
Returns string of hh-mm-ss
"""
from time import gmtime, strftime
def getTime():
    return strftime("%H-%M-%S", gmtime())

"""
Splits input string by delimiter
Returns list of these strings 
"""
import re
def split(string):
    return re.split(', |; |\. |  ', string)

"""
Add column with to sheet to max column of sheet
Labels first row with column_header
"""
def add_column(sheet, column_header):
    max_col = sheet.max_column
    sheet.cell(row=1, column=max_col+1).value = column_header

"""
Returns True if statement contains any of words in keywords
Else return False
"""
def contains(statement, keywords):
    for word in keywords:
        if word in statement:
            return True
    return False

"""
Shortens diagnostic_type
admission => addx
discharge => dcdx
"""
def get_shortened_diagnostic_type(diagnostic_type):
    if diagnostic_type == "admission":
        return "addx"
    elif diagnostic_type == "discharge":
        return "dcdx"
    else:
        print("WARNING: BAD DIAGNOSTIC TYPE: {}".format(diagnostic_type))
        return ""

"""
For each illness in diagnosis, label the corresponding column with a 0
"""
def one_hot_encode_diagnoses(sheet, diagnostic_type, ignore_keywords, illness_keywords):
    ## Setup diagnostic type
    diagnostic_type_shortened = get_shortened_diagnostic_type(diagnostic_type)
        
    for row_cells in sheet.iter_rows(min_row=2, max_row=sheet.max_row):
        
        ## Ensure valid cell data
        if type(row_cells[column_names['{}_diagnosis'.format(diagnostic_type)]].value) is str:
            
            ## Get admission diagnosis in lower-case
            full_diagnosis = row_cells[column_names['{}_diagnosis'.format(diagnostic_type)]].value.lower()
            diagnoses = split(full_diagnosis)
            
            ## Add illnesses for each statement in diagnosis
            for statement in diagnoses:
                
                # Ignore statements with key exlusion terms 
                if not contains(statement, ignore_keywords):

                    ## Add all illnesses associated with each statement
                    for illness in illness_keywords:
                        if contains(statement, illness_keywords[illness]):
                            row_cells[column_names['{}_{}'.format(diagnostic_type_shortened, illness)]].value = 1
                            ## TODO: add exceptions
                                
"""
Find first illness in diagnosis, then label the main diagnosis
"""                           
def set_main_diagnosis(sheet, diagnostic_type, ignore_keywords, illness_keywords):
    ## Setup diagnostic type
    diagnostic_type_shortened = get_shortened_diagnostic_type(diagnostic_type)
        
    for row_cells in sheet.iter_rows(min_row=2, max_row=sheet.max_row):
        
        ## Ensure valid cell data
        if type(row_cells[column_names['{}_diagnosis'.format(diagnostic_type)]].value) is str:

            ## Get diagnosis in lower-case
            full_diagnosis = row_cells[column_names['{}_diagnosis'.format(diagnostic_type)]].value.lower()
            diagnoses = split(full_diagnosis)
            illnesses = []

            ## Loop through diagnoses until illness is found
            while len(diagnoses) > 0:
                main_diagnosis = diagnoses[0]

                ## Get main illness
                for illness in illness_keywords:
                    if not contains(main_diagnosis, ignore_keywords) and contains(main_diagnosis, illness_keywords[illness]):
                        illnesses.append(illness)

                ## Update spreadsheet with illness and break (if more than 1, will show list in red)
                if len(illnesses) == 1:
                    row_cells[column_names["main_{}".format(diagnostic_type_shortened)]].value = illnesses[0]
                    break
                elif len(illnesses) > 1:
                    row_cells[column_names["main_{}".format(diagnostic_type_shortened)]].fill = redFill
                    
                    ## ASD/FASD 
                    if "ASD" in illnesses and "FASD" in illnesses:
                        illnesses.remove("ASD")
                        print("ASD Removal for : {}".format(str(split(full_diagnosis))))
                        print("**************")
                    
                    ## EOS/Personality
                    if "EOS" in illnesses and "Personality" in illnesses:
                        illnesses.remove("EOS")
                        print("EOS Removal for : {}".format(str(split(full_diagnosis))))
                        print("**************")

                    ## Lower suicide ranking
                    if len(illnesses) == 2 and "suicide" in illnesses:
                        print("Suicide Removal for : {}".format(str(split(full_diagnosis))))
                        illnesses.remove("suicide")
                        print("**************")
                        
                    if len(illnesses) > 1:
                        row_cells[column_names["main_{}".format(diagnostic_type_shortened)]].value = str(illnesses)
                    elif len(illnesses) == 1:
                        row_cells[column_names["main_{}".format(diagnostic_type_shortened)]].value = illnesses[0]
                        row_cells[column_names["main_{}".format(diagnostic_type_shortened)]].fill = blueFill
                    else:
                        print("EDGE CASE")

                    break

                diagnoses = diagnoses[1:]
            
            ## If none found, fill with color
            if len(illnesses) == 0:
                row_cells[column_names["main_{}".format(diagnostic_type_shortened)]].fill = redFill
"""
Set all columns with binary headers to 0
"""
def set_binaries_to_zero(sheet, diagnostic_type, illnesses):
    ## Setup diagnostic type
    diagnostic_type_shortened = get_shortened_diagnostic_type(diagnostic_type)
    
    for row_cells in sheet.iter_rows(min_row=2, max_row=sheet.max_row):
        for illness in illnesses:
            row_cells[column_names["{}_{}".format(diagnostic_type_shortened, illness)]].value = 0
            
"""
Set column headers at end of columns
"""
def set_headers(sheet, diagnostic_type, illnesses):
    ## Setup diagnostic type
    diagnostic_type_shortened = get_shortened_diagnostic_type(diagnostic_type)
    
    add_column(sheet, "main_{}".format(diagnostic_type_shortened))
    for illness in illnesses:
        add_column(sheet, "{}_{}".format(diagnostic_type_shortened, illness))
        
"""
Create a dictionary of column names
"""   
def get_column_names(sheet):
    column_names = {}
    i = 0
    for col in sheet.iter_cols(1, sheet.max_column):
        column_names[col[0].value] = i
        i += 1
    return column_names

"""
Create key list from dictionary
"""
def get_key_list(dictionary):
    key_list = []
    for i in dictionary.keys():
        key_list.append(i)
    return key_list