In [1]:
import re
import csv
from datetime import time
from dataclasses import dataclass
import math
import pandas as pd
import numpy as np
import nltk

In [2]:
#-------------------------Follow-up keywords strings--------------------------------
regAfter = r'([^.;|]{,1})((monitor\b)|(monitored.{,10}every))(?=([^;|]{,25}))' #each has 2
regBefore = r'([^.;|]{,10})((with testing)|(testing.{,10}hvf)|sooner.{,5}if worse)(?=([^;|]{,1}))'
regMed = r'([^.;|]{,14})((eylea.{,10}in)|lucentis in|intraocular pressure in|PRP laser|eylea|avastin|(\boct\b)|HVF|\bprp\b|iop check|\biop\b|\bvf\b|\bhrt\b|7 fields color|Spectralis)(?=([^;|(]{,40}))'
regDil = r'([^.;|]{,12})((\bdil\b)|(dilation)|dilated?|\bdfe\b|dilated fundus exam)(?=([^;|]{,25}))'
regAppt = r'([^.;|]{,14})(appointment|(re)?evaluation|examination|(exam\b)|examine|repeat.{,20}exam|repeat testing|next visit|will repeat|diabetic eye exam)(?=([^;|]{,35}))'
regAmbig =r'([^.;,|]{,8})((back in)|repeat in|refer to.{,15}within|recheck.{,10}in|re-?evaluate|will.{,10}see|see me|will see|come back|recommend(ed)?|\bdue\b.{,10}in|check|\brx\b|extend to|review.{,15}in|see\b.{,15}\bin|surveillance|reassess)(?=([^;|]{,25}))'
regKey2 = r'([^.;)|]{,15})(follow(ed)?[ -]?(up)?\b|follow-up|followup|\bf/?u\b|follow up with|Follow-  up)(?=([^;|]{,50}))'
regKey = r'([^.;)|]{,8})((\brto\b)|(\brtn\b)|\bret\b|return to clinic|return to Dr|return|\brtc\b)(?=([^;|]{,35}))'
regDR_FU = r'([^.;)]{,10})(\sdr[\s\n]+(?P<word>\S*)|(juxtatoveal edema)|dm retinopathy|diabetic retinopathy on dilated exam|\bdiabetic retinopathy|\bDME?\b|glucose.{,30}control|\bN?PDR\b|diabetic macular edema|glycemic control|gl\b.{,10}control|blood sugar control|sugar control|diabetic eye disease|proliferative DR)(?=([^;|]{,50}))'

#----------------------------------priorities-----------------------------------------
regSearches = [regKey, regKey2, regDil, regAfter, regBefore, regAmbig, regMed, regAppt] #ordered by priority
regSearchesProg = [regDR_FU, regKey, regDil, regMed, regDR_FU, regDR_FU, regDR_FU, regDR_FU] #the order for progress notes (search based off of DR keywords)

#---------------------------------column keywords----------------------------------
regID = r'enterprise mrn' 
regPatID = r'pat_id' #'enterprise_mrn'
regSearch = r'note[- _]text'
regFilter = r'ICD.{,2}10'
regType = r'note.{,2}type'
regICD = r'E(08|09|10|11|13)'
regProgNote = r'progress notes'

#----------------------------------date keywords--------------------------------------------
regYear = r'((?P<t>\d+(\.\d)?|next|this|every|each|one|two|three|four|five|six|seven|eight|nine|ten|\ba\b)(\W|to){,3}(?P<t2>(\b\d+)\D{,2})?(years?|yrs?\b))([^.;]{,8})' #allow for typed out one
regMonth = r'((?P<t>\d+(\.\d)?|this|several|next|each|one|few|two|three|four|five|six|seven|eight|nine|ten|in the|within the|in a|after a|next|\ba\b)([^a-z0123456789:]|to){,3}(?P<t2>(\b\d+)\D{,2})?(mm?o?o?n?ths?|mos?\b|m\b))([^.;]{,8})' # to allow for w/in the(a) month
regWeek = r'((?P<t>\d+(\.\d)?|this|couple|next|few|one|two|three|four|five|six|seven|eight|nine|ten|\ba\b)(\W|to){,3}(?P<t2>(\b\d+|\btwo\b|\bthree\b|\bfour\b|\bfive\b|\bsix\b|\bseven\b|\beight\b|\bnine\b)\D{,2})?(weeks?|ws?\b|wks?\b))([^.;]{,8})'
regDay = r'((?P<t>\d+(\.\d)?|next|few|one|two|three|four|five|six|seven|eight|nine|ten)(\W|to){,3}(?P<t2>(\b\d+)[^/\d]{,2})?(\bdays?))([^.;]{,8})' #need \b in front of day to diffrentiate from e.g. monday
regExtra = r'annual(ly)?|yearly'
regExtraBi = r'biannual(ly)?|biyearly|twice yearly'
regExtraWeek = r'(this|next).{,5}(monday|tuesday|wednesday|thursday|friday|\bsat(urday)?\b)' #allow sat but only as a word boundary
regExtraDay = r'tomorrow' 
regExtraMonth = r'monthly' 
regDate = [regDay, regWeek, regMonth, regYear]

#----------------------------------------misc------------------------------------------------ (not used currently)
regMisc = r'(?P<note>(prn|as needed|next available))'


#-------------------------------------Diabetic retinopathy hits--------------------------------
regDR = r'(?P<pre>[^.;]{,4})(?P<DR>\sdr[\s\n]+(?=\S*)|prolif DR|proliferative retinopathy|mild dr\b|\bno dr\b|DM.{,15}with.?out.{,10}complication|diabetic eye|no evidence.{,5}\bdr\b|diabetic retin|sugar.{,25}well maintained|diabetic.{,5}exam|diabeties.{,10}without complication|bdr\b|bg control|juxtatoveal edema|dm.{,10}dr\b|diabetes mellitus.{,20}retinopathy|dm.{,20}retinopathy|\bdiabetic retinopathy|\bDME\b|glucose.{,30}control|control.{,10}glycemia|\bN?PDR\b|diabetic macular edema|glycemic control|control.{,20}blood sugar|gl\b.{,10}control|blood sugar control|sugar control|diabetic eye disease|proliferative DR)(?P<post>[^.;]{,4})'
regDM = r'DM|diabetes mellitus|diabetes'
regR = r'(retinopathy|neovascularization|exudates)'
regStab = r'(stable|prolifer|blood sugar|macular edema|glucose|retinop|exudate|NIDDM|DM|diabetes|monitor yearly|\bprp\b|monitor annually|retina exam|avastin|eylea|edema|control|retinal|dilated exam|lucentis)'


In [3]:
def numberVal(val) -> int:
    switcher = {
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7, 
        "eight": 8,
        "nine": 9,
        "ten": 10,
        "next": 1,
        "every": 1,
        "in a": 1,
        "within the": 1,
        "after a": 1,
        "in the": 1,
        "this": 1,
        "each": 1,
        "tomorrow": 1,
        "a": 1,
    }
    num = switcher.get(val)
    if num:
        return num
    else:
        return None #few
        
def isNaN(s):
    return s == "nan"

def isEqual(numTag, Numcell, dateTag, FUcell):
    equal = False
    if ("4" in numTag and "week" in dateTag and "1" in Numcell and "month" in FUcell) or ("4" in Numcell and "week" in FUcell and "1" in numTag and "month" in dateTag):
        equal = True
    elif ("12" in numTag and "month" in dateTag and "1" in Numcell and "year" in FUcell) or ("12" in Numcell and "month" in FUcell and "1" in numTag and "year" in dateTag):
        equal = True
    return equal

def isfloat(value):
    try:
        float(value)
        return True
    except ValueError:
        return False

def findDate(search, i):
    global dateTag
    global numTag2
    global numTag
    
    for x in range(4):
        raw = re.findall(regDate[x], search, re.IGNORECASE)
        if raw:
            minNum = None
            index = x            
            for dateHit in raw:
                if "after" in dateHit[len(dateHit)-1]:
                    continue
                else:
                    dateTag = dates[index]
                parse = dateHit[1] 
                if(isfloat(parse)):
                    val = math.ceil(float(parse))
                else:
                    val = numberVal(parse.lower()) #if it says next week interpret as 1 week
                if (not minNum or (val and val < minNum)):
                    if val:
                        numTag = int(val)                        
                    minNum = val
                    if dateHit[4]:#2nd date is found
                        parse = dateHit[4]
                        if(isfloat(parse)):
                            val = math.ceil(float(parse))
                        else:
                            val = numberVal(parse.lower().strip())
                        if val:
                            numTag2 = int(val)
                    else:
                        numTag2= None                
            break
        elif x == 0:
            extraD = re.search(regExtraDay, search, re.IGNORECASE)
            if extraD and numTag is None:
                numTag = 1
                dateTag = "day"
        elif x == 1:
            extraW = re.search(regExtraWeek, search, re.IGNORECASE)
            if extraW and numTag is None:
                numTag = 1
                dateTag = "week"
        elif x == 2:
            extraM = re.search(regExtraMonth, search, re.IGNORECASE)
            if extraM and numTag is None:
                numTag = 1
                dateTag = "month"
        elif x == 3:
            extraB = re.search(regExtraBi, search, re.IGNORECASE)
            if extraB and numTag is None:
                numTag = 6
                dateTag = "month"
                break
            extra = re.search(regExtra, search, re.IGNORECASE)
            if extra and numTag is None:
                numTag = 1
                dateTag = "year"

In [6]:
file = '/home/idies/workspace/Storage/ccai6/NLP in DR/Follow-up and Tag/Input/All notes 1_2_2022.xlsx'
df = pd.read_excel(file, header=None)

searchCol = None
IDcol = None
Patcol = None
DMcol = None
Numcol = None
Num2col = None
FUcol = None
ICDcol = None
Typecol = None

for j in range(df.shape[1]): #iterate over columns
    cell = str(df.iloc[0,j])
    if not searchCol:
        searchFind = re.search(regSearch, cell, re.IGNORECASE) #can make it so that after found already, don't search the other columns
        if searchFind:
            searchCol = j
    if not IDcol:
        encIDSearch = re.search(regID, cell, re.IGNORECASE)
        if encIDSearch:
            IDcol = j    
    if not Patcol:
        patIDSearch = re.search(regPatID, cell, re.IGNORECASE)
        if patIDSearch:
            Patcol = j
    if not ICDcol:
        ICDSearch = re.search(regFilter, cell, re.IGNORECASE)
        if ICDSearch:
            ICDcol = j
    if not Typecol:
        TypeSearch = re.search(regType, cell, re.IGNORECASE)
        if TypeSearch:
            Typecol = j
    if not DMcol:
        regDM2 = r'\bDM\b'
        DMSearch = re.search(regDM2, cell, re.IGNORECASE)
        if DMSearch:#gold standard columns
            DMcol = j
            Numcol = j+1
            Num2col = j+3
            FUcol = j+2 #change these depending on order of gold standard
            
 


In [7]:
time1 = []
dateCol = []
time2 = []
hitCol = []
agreeHit = []
agreeFU = []
hitWord = []

hitRow = []
ICDlist = []
GS = []
match = []

hitRow2 = []
ICDlist2 = []
GS2 = []
match2 = []

truePosProg = 0
falsePosProg = 0
trueNegProg = 0
falseNegProg = 0
truePosProb = 0
falsePosProb = 0
trueNegProb = 0
falseNegProb = 0

truePosHit = 0
falsePosHit = 0
trueNegHit = 0
falseNegHit = 0

tot = 0
hit2 = 0
truePos2 = 0
trueNeg2 = 0
falsePos2 = 0
falseNeg2 = 0

tot2 = 0
hit3 = 0
truePos3 = 0
trueNeg3 = 0
falsePos3 = 0
falseNeg3 = 0
truePos4 = 0
trueNeg4 = 0
falsePos4 = 0
falseNeg4 = 0

#testN is the row to print
testN = 765
testB = True #for printing followup time info
testB2 = False #for printing glyc hit


for i in range(df.shape[0]): #iterate over rows
    if i == 0:
        time1.append(None)
        dateCol.append(None)
        time2.append(None)
        hitCol.append(None)
        agreeHit.append(None)
        agreeFU.append(None)
        hitWord.append(None)
        continue
    
    cell = str(df.iloc[i,searchCol])
    if DMcol:
        DMcell = str(df.iloc[i,DMcol])#
    else:
        DMcell = ""
    if Numcol:
        Numcell = str(df.iloc[i,Numcol])
    else:
        Numcell = ""
    if Num2col:
        Num2cell = str(df.iloc[i,Num2col])#
    else:
        Num2cell = ""
    if FUcol:
        FUcell = str(df.iloc[i,FUcol])#
    else:
        FUcell = ""
    if ICDcol:
        ICDcell = str(df.iloc[i,ICDcol])#
    else:
        ICDcell = ""
    if Typecol:
        Typecell = str(df.iloc[i,Typecol])#
    else:
        Typecell = ""
    
    end = ""    
    search = ""
    
    dates = ["day", "week", "month", "year"]
    index = None
    date = [None] * 4
    dateTag = None
    numTag = None
    numTag2 = None
    noteTag = None
   
    ICDhit = None
    ICDhit2 = None
    ICDhit = re.search(regProgNote, Typecell, re.IGNORECASE)# Determine if its a progress note or not..
    searchesFinal = [None] * 8
    
    if ICDhit:
        searchesFinal = regSearchesProg
    else:
        searchesFinal = regSearches       
    if len(cell) > 6000 and not ICDhit: #if not a progress note
        temp = int(-0.25 * len(cell))
        cell =  cell.strip()[:temp]
        end = cell[-80:]
    elif len(cell) > 85:
        end = cell.strip()[-85:]
    else:
        end = cell
        
    if i == testN and testB:
        print(end)
        
    if ICDhit:
        ProgSearch = ""
        raw = re.findall(regDR_FU, cell, re.IGNORECASE)
        for hit in raw:
            ProgSearch += hit[0]
            ProgSearch += "| "
            ProgSearch += hit[4]
            ProgSearch += "| "
        for x in range(8):
            search = ""
            if dateTag:
                break
            raw = None
            raw = re.findall(regSearches[x], ProgSearch, re.IGNORECASE)#do findall here now too??
            for hit in raw:
                search += hit[0]
                search += "| "
                search += hit[4]
                search += "| "                    
            if raw:
                findDate(search, i)
            if numTag:
                if int(numTag) > 12 or (int(numTag) > 1 and "year" in dateTag):            
                    dateTag = None
                    numTag = None
            if i == testN and testB:
                print(x , search)
                print(numTag)
                print(numTag2)
                print(dateTag)

    
    #first search of the end
    for x in range(8):        
        if dateTag:            
            break
        raw = None
        raw = re.findall(regSearches[x], end, re.IGNORECASE)#do findall here now too??
        for hit in raw:
            search += hit[0]
            search += "| "
            search += hit[4]
            search += "| "                    
        if i == testN and testB:
            print(x, search)
            
        if raw:
            findDate(search, i)
        if numTag:
            if int(numTag) > 12 or (int(numTag) > 1 and "year" in dateTag):            
                dateTag = None
                numTag = None
            
    #if search:
    #    findDate(search, i)
    if i == testN and testB:
        print(numTag)
        print(numTag2)
        print(dateTag)
        
        
    if not dateTag:            
        for x in range(8):
            search = ""
            if dateTag:
                break
            raw = None
            raw = re.findall(regSearches[x], cell, re.IGNORECASE)#do findall here now too??
            for hit in raw:
                search += hit[0]
                search += "| "
                search += hit[4]
                search += "| "                    
            if raw:
                findDate(search, i)
            if numTag:
                if int(numTag) > 12 or (int(numTag) > 1 and "year" in dateTag):            
                    dateTag = None
                    numTag = None
            if i == testN and testB:
                print(x , search)
                print(numTag)
                print(numTag2)
                print(dateTag)
    

    #third search
    regEnd = r'(?P<e>[^)]{,15}$)'
    if not dateTag:
        end = re.search(regEnd, cell.strip(), re.IGNORECASE)
        if end:
            search = end.group('e')            
        if i == testN and testB:
            print("final check" , search)                
    findDate(search, i)
    

    #---------------------------HIt script is run here------------------------
    ICDhit2 = None
    if not ICDhit:
        ICDhit2 = re.search(regICD, ICDcell, re.IGNORECASE)
    
    glycHit = False
    hitword = ''
    DRsearch = re.findall(regDR, cell, re.IGNORECASE)
    if i == testN and testB2:
        print(cell)                
    word = None
    for hit in DRsearch:        
        if hit[2]:            
            test = hit[2]
            if test[0].isupper() and (len(test) < 2 or test[1].islower()):                
                continue
            else:
                glycHit = True
                hitword = hit[1]
                if i == testN and testB2:                
                    print("found", hit[1])
                break
        else:
            glycHit = True
            hitword = hit[1]
            if i == testN and testB2:                
                print(hit[1])                
            break
        
    if not glycHit:
        pt1 = re.search(regDM, cell, re.IGNORECASE)
        pt2 = re.search(regR, cell, re.IGNORECASE)
        if pt1 and pt2:
            hitword = pt1.group(0) + "+" + pt2.group(0)
            glycHit = True
    
    if not glycHit and ICDhit2:
        stab = re.search(regStab, cell, re.IGNORECASE)
        if stab:
            glycHit = True
            
    if not glycHit and i == testN and testB2:        
        print("no hit")
        
        
        
    time1.append(numTag)
    time2.append(numTag2)
    dateCol.append(dateTag)
    
    if glycHit:
        hitCol.append("Y")
        hitWord.append(hitword)
    else:
        hitCol.append("N")
        hitWord.append("N/A")
    
   
    
    if i == testN and testB:
        print("final result: ")
        print(numTag)
        print(numTag2)
        print(dateTag)
    
    if (not numTag and isNaN(Numcell)) or isEqual(str(numTag), str(Numcell), str(dateTag), str(FUcell)) or ((numTag and str(numTag) in str(Numcell) or Numcell and str(Numcell) in str(numTag)) and dateTag and dateTag in FUcell and (not numTag2 and isNaN(Num2cell) or (str(numTag2) in str(Num2cell) or str(Num2cell) in str(numTag2)))): 
        agreeFU.append(True)
        if dateTag:
            if ICDhit:
                truePosProg+=1
            else:
                truePosProb+=1
        else:
            if ICDhit:
                trueNegProg+=1
            else:
                trueNegProb+=1            
    else:
        agreeFU.append(False)
        if dateTag:
            if ICDhit:
                falsePosProg+=1
            else:
                falsePosProb+=1            
        else:
            if ICDhit:
                falseNegProg+=1
            else:
                falseNegProb+=1
            
        
    result = False
    if glycHit and "Y" in DMcell or not glycHit and ("N" in DMcell or not "Y" in DMcell):
        if i == testN and testB2:                
            print("its true")
        result = True
        if glycHit:
            truePosHit+=1
        else:
            trueNegHit+=1
    else:
        if i == testN and testB2:                
            print("its false")
        result = False
        if glycHit:            
            falsePosHit+=1
        else:
            falseNegHit+=1
            #print(i+1)            
    agreeHit.append(result)    
    
    if ICDhit:
        hitRow.append(i+1)
        ICDlist.append(glycHit)
        GS.append(DMcell)
        match.append(result)
        tot +=1
        if result:
            hit2 +=1
            if glycHit:
                truePos2 +=1
            else:
                trueNeg2 +=1
        else:
            if glycHit:
                falsePos2 +=1
            else:
                falseNeg2 +=1
    else:
        #not progres note                
        if not ICDhit2:
            hitRow2.append(i+1)
            ICDlist2.append(glycHit)
            GS2.append(DMcell)
            match2.append(result)
            tot2 +=1
            if result:
                hit3 +=1
                if glycHit:
                    truePos3 +=1
                else:
                    trueNeg3 +=1
            else:
                if glycHit:                    
                    falsePos3 +=1
                else:
                    falseNeg3 +=1
        else:
            if result:                
                if glycHit:
                    truePos4 +=1
                else:
                    trueNeg4 +=1
            else:                
                if glycHit:                                        
                    falsePos4 +=1
                else:
                    falseNeg4 +=1
    
print("true positives:" , truePosProg)
print("true negatives:" , trueNegProg)
print("false positives:" , falsePosProg)
print("false negatives:" , falseNegProg)
if truePosProg+falseNegProg != 0:
    print("sensitivity:" , truePosProg/(truePosProg+falseNegProg))
    print("specificity:" , trueNegProg/(trueNegProg+falsePosProg))
    print("accuracy:" , (truePosProg+trueNegProg)/(falseNegProg+falsePosProg+truePosProg+trueNegProg))

print("true positives:" , truePosProb)
print("true negatives:" , trueNegProb)
print("false positives:" , falsePosProb)
print("false negatives:" , falseNegProb)
if truePosProb+falseNegProb != 0:
    print("sensitivity:" , truePosProb/(truePosProb+falseNegProb))
    print("specificity:" , trueNegProb/(trueNegProb+falsePosProb))
    print("accuracy:" , (truePosProb+trueNegProb)/(falseNegProb+falsePosProb+truePosProb+trueNegProb))

#print("\ntrue positives:" , truePosHit)
#print("true negatives:" , trueNegHit)
#print("false positives:" , falsePosHit)
#print("false negatives:" , falseNegHit)
#print("accuracy:" , (truePosHit+trueNegHit)/(falseNegHit+falsePosHit+truePosHit+trueNegHit))

Dilate next visit.  
0 
1 
2 |  next visit.  | 
3 |  next visit.  | 
4 |  next visit.  | 
5 |  next visit.  | 
6 |  next visit.  | 
7 |  next visit.  | Dilate | .  | 
None
None
None
0 
None
None
None
1 
None
None
None
2 |  next visit.  | 
None
None
None
3 
None
None
None
4 
None
None
None
5 
None
None
None
6 
None
None
None
7 Dilate | .  | 
None
None
None
final check ate next visit.
final result: 
None
None
None
true positives: 0
true negatives: 0
false positives: 64067
false negatives: 79888
sensitivity: 0.0
specificity: 0.0
accuracy: 0.0
true positives: 0
true negatives: 0
false positives: 246557
false negatives: 347190
sensitivity: 0.0
specificity: 0.0
accuracy: 0.0


In [8]:
#for outputing to a csv
endCol = len(df.columns)
df = pd.read_excel(file, header=None)
df.insert(endCol,'time1', time1) 
df.insert(endCol+1,'date', dateCol)
df.insert(endCol+2,'time2', time2)
df.insert(endCol+3,'hit', hitCol)
df.insert(endCol+4,'hit word', hitWord)
#comment out lines below if not matching to gold standard
df.insert(endCol+5,'agree FU', agreeFU) #prints whether FU matches true or false
df.insert(endCol+6,'agree Hit', agreeHit) #prints whether hit matches true or false

df.to_csv('/home/idies/workspace/Storage/ccai6/NLP in DR/Follow-up and Tag/Output/Output_test_3_22.csv')

In [362]:
df2 = pd.DataFrame()
df3 = pd.DataFrame()
df2.insert(0, 'row', hitRow)
df2.insert(1, 'hit',ICDlist)
df2.insert(2, 'GS',GS)
df2.insert(3, 'match',match)
#print("total:" , tot)
print("true positives:" , truePos2)
print("true negatives:" , trueNeg2)
print("false positives:" , falsePos2)
print("false negatives:" , falseNeg2)
if truePos2+falseNeg2 != 0:
    print("sensitivity:" , truePos2/(truePos2+falseNeg2))
    print("specificity:" , trueNeg2/(trueNeg2+falsePos2))
    print("accuracy:" , hit2/tot)
df3.insert(0, 'row', hitRow2)
df3.insert(1, 'hit',ICDlist2)
df3.insert(2, 'GS',GS2)
df3.insert(3, 'match',match2)
print("total:" , tot2)
print("true positives:" , truePos3)
print("true negatives:" , trueNeg3)
print("false positives:" , falsePos3)
print("false negatives:" , falseNeg3)
if truePos3+falseNeg3 != 0:
    print("sensitivity:" , truePos3/(truePos3+falseNeg3))
    print("specificity:" , trueNeg3/(trueNeg3+falsePos3))
    print("accuracy:" , hit3/(truePos3+falseNeg3 + trueNeg3+falsePos3))
print("true positives:" , truePos4)
print("true negatives:" , trueNeg4)
print("false positives:" , falsePos4)
print("false negatives:" , falseNeg4)
if truePos4+falseNeg4 != 0:
    print("sensitivity:" , truePos4/(truePos4+falseNeg4))
    print("specificity:" , trueNeg4/(trueNeg4+falsePos4))
    print("accuracy:" , (truePos4+trueNeg4)/(truePos4+falseNeg4 + trueNeg4+falsePos4))
#df2.to_csv('/home/idies/workspace/Storage/ccai6/NLP in DR/Output/ProgressNotes_1st_set_1_4.csv')#includes indexes

#df3.to_csv('/home/idies/workspace/Storage/ccai6/NLP in DR/Output/ICDtag_1st_set_1_4.csv')#includes indexes

true positives: 109
true negatives: 47
false positives: 1
false negatives: 10
sensitivity: 0.9159663865546218
specificity: 0.9791666666666666
accuracy: 0.9341317365269461
total: 574
true positives: 9
true negatives: 553
false positives: 5
false negatives: 7
sensitivity: 0.5625
specificity: 0.9910394265232975
accuracy: 0.9790940766550522
true positives: 492
true negatives: 3
false positives: 0
false negatives: 21
sensitivity: 0.9590643274853801
specificity: 1.0
accuracy: 0.9593023255813954
