In [1]:
import openpyxl

In [2]:
%run MyFunctions.ipynb

In [3]:
## Import data
filename = "ResearchInChildAndAd_DATA_2018-12-14_1531.xlsx"
workbook = openpyxl.load_workbook(filename)
sheet1 = workbook["ResearchInChildAndAd_DATA_2018-"]
sheet2 = workbook["Tasks"]

In [4]:
## Module 1: Make columns for new data
IGNORE = ['query', 'vs', 'r/o', 'rule out']
KEYWORDS_DICT = {'ADHD': ['attention', 'adhd', 'add'], 
                 'ASD': ['autis', 'asd'],
                 'Anxiety': ['anxiety', 'panic', 'phobi'], 
                 'Bipolar': ['bipolar', 'mani'],
                 'Depression': ['depress', 'MDD', 'PDD', 'mood', 'dysthymi'], 
                 'EOS': ['schizo'], 
                 'FASD': ['fasd', 'fetal', 'alcohol'], 
                 'OCD': ['obsess', 'compuls', 'ocd'], 
                 'ODD': ['opposit', 'defiant', 'odd'], 
                 'Parent-Child': ['parent'], 
                 'Personality': ['cluster', 'personality', 'histrio', 'borderline'], 
                 'Psychosis': ['psychosis', 'psychotic'],
                 'SUD': ['substance', 'SUD', 'use disorder'], 
                 'Trauma': ['trauma', 'adjust', 'ptsd'], 
                 'eating': ['eating', 'anorexi', 'bulimi', 'binge'], 
                 'neurodevelopmental': ['learning', 'delay', 'neurodevelopment', 'intellectual', 'conduct', 'cerebral', 'palsy', 'chromosom'],
                 'somatic': ['somati'], 
                 'suicide': ['suicid']}
# dont forget handle fads and NOT personality for EOS
# read file in instead
BINARY_HEADERS = []
for i in KEYWORDS_DICT.keys():
    BINARY_HEADERS.append(i)

## Add columns
add_column(sheet1, "main_addx")
for i in range(0, len(BINARY_HEADERS)):
    add_column(sheet1, "addx_{}".format(BINARY_HEADERS[i]))
    
add_column(sheet1, "main_dcdx")
for i in range(0, len(BINARY_HEADERS)):
    add_column(sheet1, "dcdx_{}".format(BINARY_HEADERS[i]))

In [5]:
## Create a dictionary of column names
ColNames = {}
Current  = 0
for COL in sheet1.iter_cols(1, sheet1.max_column):
    ColNames[COL[0].value] = Current
    Current += 1

In [6]:
## Set binary values to 0
for row_cells in sheet1.iter_rows(min_row=2, max_row=sheet1.max_row):
    for header in BINARY_HEADERS:
        row_cells[ColNames["addx_{}".format(header)]].value = 0
        row_cells[ColNames["dcdx_{}".format(header)]].value = 0

In [7]:
## Module 2: Fill in main diagnosis

## Admission
for row_cells in sheet1.iter_rows(min_row=2, max_row=sheet1.max_row):
    if type(row_cells[ColNames['admission_diagnosis']].value) is str:
        
        ## Get admission diagnosis in lower-case
        full_diagnosis = row_cells[ColNames['admission_diagnosis']].value.lower()

        ## Split and iterate through statements
        diagnoses = split(full_diagnosis)
        
        if len(diagnoses) > 0:
            main_diagnosis = diagnoses[0]
            
            ## Get main illness NOTE: no check of ignore terms
            num_main = 0
            main_illness = ""
            for illness in KEYWORDS_DICT:
                if not contains(main_diagnosis, IGNORE) and contains(main_diagnosis, KEYWORDS_DICT[illness]):
                    num_main = num_main + 1
                    main_illness = illness

            ## Update spreadsheet
            row_cells[ColNames["main_addx"]].value = main_illness
            
            ## Print warning for multiple illnesses
            if num_main != 1:
                print("****************")
                print("Main diagnosis full description: {}".format(main_diagnosis))
                print("Main illness: {}".format(main_illness))
                print("Non-zero illnesses at record_id = {}".format(row_cells[ColNames['record_id']].value))
        
        else:
            print("Length of 0 diagnosis found for {}".format(row_cells[ColNames['record_id']].value))

****************
Main diagnosis full description: adjustment disorder with disturbance of conduct
Main illness: neurodevelopmental
Non-zero illnesses at record_id = 21296
****************
Main diagnosis full description: fasd
Main illness: FASD
Non-zero illnesses at record_id = 11238
****************
Main diagnosis full description: reactive attachment disorder
Main illness: 
Non-zero illnesses at record_id = 11250
****************
Main diagnosis full description: disinhibited social engagement disorder
Main illness: 
Non-zero illnesses at record_id = 11257
****************
Main diagnosis full description: unspecified depressive disorder (r/o mdd)
Main illness: 
Non-zero illnesses at record_id = 21284
****************
Main diagnosis full description: query unspecified psychosis versus bipolar disorder
Main illness: 
Non-zero illnesses at record_id = 11267
****************
Main diagnosis full description: situational crisis
Main illness: 
Non-zero illnesses at record_id = 11268
********

****************
Main diagnosis full description: xxx syndrome
Main illness: 
Non-zero illnesses at record_id = 21470
****************
Main diagnosis full description: none
Main illness: 
Non-zero illnesses at record_id = 21472
****************
Main diagnosis full description: acute situational crisis
Main illness: 
Non-zero illnesses at record_id = 21476
****************
Main diagnosis full description: r/o borderline personality disorder
Main illness: 
Non-zero illnesses at record_id = 21580
****************
Main diagnosis full description: major depressive disorder with suicidal ideation
Main illness: suicide
Non-zero illnesses at record_id = 21479
****************
Main diagnosis full description: none
Main illness: 
Non-zero illnesses at record_id = 21481
****************
Main diagnosis full description: major depressive disorder with active and acute suicidal ideation
Main illness: suicide
Non-zero illnesses at record_id = 21484
****************
Main diagnosis full description: no

In [8]:
## Discharge
for row_cells in sheet1.iter_rows(min_row=2, max_row=sheet1.max_row):
    if type(row_cells[ColNames['discharge_diagnosis']].value) is str:
        
        ## Get discharge diagnosis in lower-case
        full_diagnosis = row_cells[ColNames['discharge_diagnosis']].value.lower()

        ## Split and iterate through statements
        diagnoses = split(full_diagnosis)
        
        if len(diagnoses) > 0:
            main_diagnosis = diagnoses[0]
            
            ## Get main illness NOTE: no check of ignore terms
            num_main = 0
            main_illness = ""
            for illness in KEYWORDS_DICT:
                if not contains(main_diagnosis, IGNORE) and contains(main_diagnosis, KEYWORDS_DICT[illness]):
                    num_main = num_main + 1
                    main_illness = illness

            ## Update spreadsheet
            row_cells[ColNames["main_dcdx"]].value = main_illness
            
            ## Print warning for multiple illnesses
            if num_main != 1:
                print("****************")
                print("Main diagnosis full description: {}".format(main_diagnosis))
                print("Main illness: {}".format(main_illness))
                print("Non-zero illnesses at record_id = {}".format(row_cells[ColNames['record_id']].value))
        
        else:
            print("Length of 0 diagnosis found for {}".format(row_cells[ColNames['record_id']].value))

****************
Main diagnosis full description: unspecified disruptive
Main illness: 
Non-zero illnesses at record_id = 21296
****************
Main diagnosis full description: fasd
Main illness: FASD
Non-zero illnesses at record_id = 11238
****************
Main diagnosis full description: disruptive behavioural disorder
Main illness: 
Non-zero illnesses at record_id = 21265
****************
Main diagnosis full description: fix
Main illness: 
Non-zero illnesses at record_id = 11244
****************
Main diagnosis full description: none specified
Main illness: 
Non-zero illnesses at record_id = 11245
****************
Main diagnosis full description: query gender dysphoria
Main illness: 
Non-zero illnesses at record_id = 11247
****************
Main diagnosis full description: fasd
Main illness: FASD
Non-zero illnesses at record_id = 11252
****************
Main diagnosis full description: disinhibited social engagement disorder
Main illness: 
Non-zero illnesses at record_id = 11257
*****

****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 21530
****************
Main diagnosis full description: likely gender dysphoria
Main illness: 
Non-zero illnesses at record_id = 21533
****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 21536
****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 21538
****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 21540
****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 21541
****************
Main diagnosis full description: 
Main illness: 
Non-zero illnesses at record_id = 21545
****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 31241
****************
Main diagnosis full description: n/a
Main illness: 
Non-zero illnesses at record_id = 21600
**

In [9]:
## Module 3: Read Diagnosis, break into substrings, fill in

## Admissions
for row_cells in sheet1.iter_rows(min_row=2, max_row=sheet1.max_row):
    if type(row_cells[ColNames['admission_diagnosis']].value) is str:
        ## Get admission diagnosis in lower case
        full_diagnosis = row_cells[ColNames['admission_diagnosis']].value.lower()

        ## Split and iterate through statements
        diagnoses = split(full_diagnosis)
        for statement in diagnoses:

            # Ignore certain statements
            if not contains(statement, IGNORE):

                for illness in KEYWORDS_DICT:
                    if contains(statement, KEYWORDS_DICT[illness]):
                        row_cells[ColNames['addx_{}'.format(illness)]].value = 1
                        if illness == "FASD" and row_cells[ColNames['addx_ASD'.format(illness)]].value == 1:
                            row_cells[ColNames['addx_ASD'.format(illness)]].value = 0

## Discharge
for row_cells in sheet1.iter_rows(min_row=2, max_row=sheet1.max_row):
    if type(row_cells[ColNames['discharge_diagnosis']].value) is str:
        ## Get admission diagnosis in lower case
        full_diagnosis = row_cells[ColNames['discharge_diagnosis']].value.lower()

        ## Split and iterate through statements
        diagnoses = split(full_diagnosis)
        for statement in diagnoses:

            # Ignore certain statements
            if not contains(statement, IGNORE):
                print(statement)
                for illness in KEYWORDS_DICT:
                    if contains(statement, KEYWORDS_DICT[illness]):
                        row_cells[ColNames['dcdx_{}'.format(illness)]].value = 1
                        if illness == "FASD" and row_cells[ColNames['addx_ASD'.format(illness)]].value == 1:
                            row_cells[ColNames['addx_ASD'.format(illness)]].value = 0

axis i: major depressive disorder
 parent-child relational problem
 partial borderline personality traits 
parent-child relational difficulties
major depressive disorder
major depressive disorder
parent-child relational difficulties. 
unspecified depressive disorder
cluster b personallity traits parent-child relational difficulties. 
unspecified psychosis 
generalized anxiety disorder
adhd hyperactive type
community disorder unspecified 
unspecified disruptive
impulse control
and conduct disorder
generalized anxiety disorder
attention deficit hyperactivity disorder
primarily hyperactive type
unspecified trauma and stressor related disorder
unspecified tic disorder
major depressive disorder
cluster b personality disorder traits
parent-child relational problems
unspecified psychosis
mild autistic spectrum disorder
recurrent depressive episodes 
complex ptsd
major depressive disorder with psychosis
amphetamine use disorder
marijuana use disorder
parent-child relational problem
gender dysp

gad with panic symptoms
social anxiety disorder
affect dysregulation
history of mdd
school stress and questions regarding own sexuality
anorexia nervosa
partial response
borderline personality organization.
major depressive disorder
moderate severity
social phobia
unspecified anxiety disorder
unspecified anxiety disorder
likely gad
stressors - transition to high school
conduct disorder
mild to moderate depression by subjective report
possible adhd by history but not seen during admission at cape.
conduct disorder
marijuana use disorder
parent-child relational problem
traumatic brain injury. 
unspecified psychosis
marijuana use disorder
parent-child relational stressors
adjustment disorder with depression and anxiety
mild intellectual delay
unspecified depressive disorder
social anxiety disorder with panic attacks
previous substance use disorder
parent child relationship.
mdd
complicated grief
affect dysregulation
complicated grief
unspecified mood disorder
ocd
generalized anxiety disor

parent-child relational problem. 
adjustment disorder
borderline personality traits
adhd. 
adverse response to fluoxetine with depressed mood and si
unspecified mood disorder: differential diagnosis dysthymia versus major depressive disorder
adjustment disorder
borderline personality traits
intellectual delay (global)
complex neuropsychiatric brain
 odd
 history of previous traumas
adhd.
n/a
major depressive disorder
major depressive disorder.
major depressive disorder
ptsd
behavioural and emotional dysregulation from complex developmental trauma. 
unspecified depressive disorder
likely adjustment disorder
social anxiety disorder with performance anxiety
parent child relational issue
concerning symptoms on admission were likely due to substance induced mood changes
ongoing substance use disorder
ongoing parent-child relational disorder
medical challenges with celiac like disease. 
adjustment disorder with anxious mood
unspecified anxiety disorder
past history of hallucinations
now reso

n/a
n/a
generalized anxiety disorder with occasional panic attacks
significant regressive behaviour in light of severe anxiety as well as acquired behavioural characteristics and personality subtypes (cluster b and c traits very present)
complex brain with this likely social reciprocity learning disorder
perhaps verbal output disorder
but clearly also not normal
parent-child relational difficulties. 
complex neurodevelopmental condition not yet specified with concerns around language and cognitive development as well as coordination. likely adhd
combined type. attachment issues affecting interpersonal relationships. 
complex neurodevelopmental condition not yet specified with concerns around language and cognitive development as well as coordination. likely adhd
combined type. attachment issues affecting interpersonal relationships. 
persistent depressive disorder with intermittent major depressive episodes
generalized anxiety disorder
unspecified trauma and stressor related disorder. 

In [10]:
## Output file
workbook.save("output-{}.xlsx".format(getTime()))

In [11]:
s = (split("R/O Borderline Personality Disorder, R/O PTSD, R/O Conduct Disorder (none in consult   - taken from CAPE discharge summary). "))
for d in s:
    d = d.lower()
    if not contains(d, IGNORE):
        print(d)
    else:
        print("IGNORE {}".format(d))

IGNORE r/o borderline personality disorder
IGNORE r/o ptsd
IGNORE r/o conduct disorder (none in consult
 - taken from cape discharge summary). 
