<a href="https://colab.research.google.com/github/zelal-Eizaldeen/DLH-Project-Reproduce-HurtfulWords/blob/main/filtering_tokenizing_notes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

------------------------------------------------------------------------------
# Names:
Zilal Eiz Al Din && Payel Chakraborty
# NetIDs:
zelalae2 && payelc2
# Purpose:
 Filters the clinical notes and tokenizes them.



#Resources:
- https://learning.oreilly.com/library/view/hands-on-large-language/9781098150952/ch04.html
- https://learning.oreilly.com/library/view/build-a-large/9781633437166/Text/chapter-5.html#p40
- https://learning.oreilly.com/library/view/transformers-for-natural/9781805128724/Text/Chapter_05.xhtml
- https://learning.oreilly.com/library/view/transformers-for-natural/9781805128724/Text/Chapter_03.xhtml#_idParaDest-62

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import json
import re
import  pandas as pd
import numpy as np
from collections import defaultdict
from scipy.stats import wilcoxon


import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize


import torch
import torch.nn.functional as F


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [None]:
# Load the output from preprocessing_dataset.ipynb
cleaned_one=pd.read_pickle("path_to_the_output_of_preprocessing_dataset_output.pkl")


In [None]:
len(cleaned_one)

23759

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
!nvidia-sm

/bin/bash: line 1: nvidia-sm: command not found


# Filtering the notes
1. Limit notes to the following types: 1) "Nursing"; 2) "Nursing/other"; and 3) "Physician" 4) "Discharge summary".
2. Concatenate note subsequences starting from the
end of each patient’s period of interest, working backwards, until
we reach a limit of 30 subsequences.
3. Restrict to notes within the first 48 hours of ICU stay.

In [None]:
# Using smaller dataset
PATH_TO_CLEANED_RAW_DATA="path_to_the_output_of_preprocessing_dataset_output.pkl"


In [None]:
# Read the data
data=pd.read_pickle(PATH_TO_CLEANED_RAW_DATA)


In [None]:
icu_stay_df=pd.read_csv('/path_to_/mimic-iii-clinical-database-1.4/ICUSTAYS.csv')

In [None]:
len(data)

23759

In [None]:
# Strip whitespace from the CATEGORY column
data['category'] = data['category'].str.strip()
data['category'].unique()

array(['Discharge summary', 'Echo', 'ECG', 'Nursing', 'Respiratory',
       'General', 'Physician', 'Nutrition', 'Social Work',
       'Case Management', 'Rehab Services', 'Consult', 'Pharmacy',
       'Radiology', 'Nursing/other'], dtype=object)

In [None]:
# Filter note types
allowed_categories = ["Nursing", "Nursing/other", "Physician", "Discharge summary"]
data= data[data['category'].isin(allowed_categories)]

# Reset index
data = data.reset_index(drop=True)

# View the first few rows
data['category'].unique()


array(['Discharge summary', 'Nursing', 'Physician', 'Nursing/other'],
      dtype=object)

In [None]:
data['subject_id_x'] == data['subject_id_y']

Unnamed: 0,0
0,True
1,True
2,True
3,True
4,True
...,...
18402,True
18403,True
18404,True
18405,True


In [None]:
#drop one of the duplicate columns as they are always equal
data = data.drop(columns='subject_id_y')


In [None]:
data = data.rename(columns={'subject_id_x': 'subject_id'})
data.columns

Index(['row_id', 'subject_id', 'hadm_id', 'chartdate', 'charttime',
       'storetime', 'category', 'description', 'cgid', 'iserror', 'text',
       'row_id_x', 'gender', 'dob', 'dod', 'dod_hosp', 'dod_ssn',
       'expire_flag', 'fold', 'row_id_y', 'admittime', 'dischtime',
       'deathtime', 'admission_type', 'admission_location',
       'discharge_location', 'insurance', 'language', 'religion',
       'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'diagnosis',
       'hospital_expire_flag', 'has_chartevents_data', 'dod_merged',
       'ethnicity_to_use', 'age', 'icd9_code', 'language_to_use',
       'icustay_id'],
      dtype='object')

In [None]:
icu_stay_df.columns=icu_stay_df.columns.str.lower()
icu_stay_df.columns

Index(['row_id', 'subject_id', 'hadm_id', 'icustay_id', 'dbsource',
       'first_careunit', 'last_careunit', 'first_wardid', 'last_wardid',
       'intime', 'outtime', 'los'],
      dtype='object')

In [None]:
# Merge notes with ICU stays on SUBJECT_ID and HADM_ID
data = pd.merge(data, icu_stay_df[['subject_id', 'hadm_id', 'icustay_id', 'intime']],
                  on=['subject_id', 'hadm_id'], how='inner')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20860 entries, 0 to 20859
Data columns (total 44 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   row_id                20860 non-null  int64         
 1   subject_id            20860 non-null  int64         
 2   hadm_id               20860 non-null  float64       
 3   chartdate             20860 non-null  datetime64[ns]
 4   charttime             14618 non-null  datetime64[ns]
 5   storetime             14618 non-null  object        
 6   category              20860 non-null  object        
 7   description           20860 non-null  object        
 8   cgid                  14618 non-null  float64       
 9   iserror               8 non-null      float64       
 10  text                  20860 non-null  object        
 11  row_id_x              20860 non-null  int64         
 12  gender                20860 non-null  object        
 13  dob             

let's restrict to notes within the first 48 hours of ICU stay:

In [None]:
# Convert CHARTTIME to datetime
data['charttime'] = pd.to_datetime(data['charttime'], errors='coerce')
data['intime'] = pd.to_datetime(data['intime'], errors='coerce')

In [None]:
# Calculate time difference in hours
data['hours_from_icu_admit'] = (data['charttime'] - data['intime']).dt.total_seconds() / 3600

# Keep only notes within first 48 hours
data = data[(data['hours_from_icu_admit'] >= 0) & (data['hours_from_icu_admit'] <= 48)]

# Preview
print(data.head())

      row_id  subject_id   hadm_id  chartdate           charttime  \
6243  316599       29080  181664.0 2163-02-23 2163-02-23 09:11:00   
6244  316573       29080  181664.0 2163-02-23 2163-02-23 04:31:00   
6251  315769       29548  182189.0 2195-01-18 2195-01-18 16:24:00   
6252  315777       29548  182189.0 2195-01-18 2195-01-18 16:50:00   
6253  317118       30699  108670.0 2121-01-07 2121-01-07 05:50:00   

                storetime   category                         description  \
6243  2163-02-23 09:11:59    Nursing               Nursing Progress Note   
6244  2163-02-23 04:31:07    Nursing               Nursing Progress Note   
6251  2195-01-18 16:25:03  Physician   Physician Resident Admission Note   
6252  2195-01-18 16:50:40  Physician  Physician Attending Admission Note   
6253  2121-01-07 05:50:42    Nursing               Nursing Progress Note   

         cgid  iserror  ... has_chartevents_data           dod_merged  \
6243  21297.0      NaN  ...                    1  2163-

In [None]:
len(data)

5018

Concatenate up to 30 note chunks per ICU stay, starting from the latest to earliest, by following this approach:
- For each ICUSTAY_ID, you want:
1. Sorted in reverse chronological order

2. Take up to 30 notes (or fewer if less)

3. Concatenate their TEXT content into one string



In [None]:
data = data.drop(columns='icustay_id_x') # it has only none or nan values

In [None]:
data.icustay_id_y.unique()
data = data.rename(columns={'icustay_id_y': 'icustay_id'})
data.columns

Index(['row_id', 'subject_id', 'hadm_id', 'chartdate', 'charttime',
       'storetime', 'category', 'description', 'cgid', 'iserror', 'text',
       'row_id_x', 'gender', 'dob', 'dod', 'dod_hosp', 'dod_ssn',
       'expire_flag', 'fold', 'row_id_y', 'admittime', 'dischtime',
       'deathtime', 'admission_type', 'admission_location',
       'discharge_location', 'insurance', 'language', 'religion',
       'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'diagnosis',
       'hospital_expire_flag', 'has_chartevents_data', 'dod_merged',
       'ethnicity_to_use', 'age', 'icd9_code', 'language_to_use', 'icustay_id',
       'intime', 'hours_from_icu_admit'],
      dtype='object')

In [None]:
# First, sort notes by ICUSTAY_ID and CHARTTIME (latest first)
notes_sorted = data.sort_values(['icustay_id', 'charttime'], ascending=[True, False])

# Group by icustay_id and collect up to 30 note texts
def concat_notes(group):
    top_notes = group.head(30)['text']  # Take top 30
    return ' '.join(str(t) for t in top_notes)  # Concatenate into one string

# Apply the function
icu_note_texts = notes_sorted.groupby('icustay_id').apply(concat_notes).reset_index()
icu_note_texts.columns = ['icustay_id', 'concat_notes']

# Preview the result
(icu_note_texts) # Each row has all the notes (up to 30) for a patient’s ICU stay, concatenated into one long string.


  icu_note_texts = notes_sorted.groupby('icustay_id').apply(concat_notes).reset_index()


Unnamed: 0,icustay_id,concat_notes
0,200025,Respiratory Care Note\nPt received from OR int...
1,200037,NICU NPN 2300-0700\n\n\nRESP O: Baby remains i...
2,200038,84 yo M with hx of prior intraparenchymal hemo...
3,200096,1 Infant with Potential Sepsis\n2 Alt in Resp ...
4,200102,resp care\nPt initially on pcv but sats and vo...
...,...,...
4358,299883,TITLE:\n Chief Complaint:\n HPI:\n 84 yo...
4359,299907,Admission Note\nNeonatal Attending Note:\nGirl...
4360,299923,Pt was put on morphine gtt and was extubated p...
4361,299929,56 year-old man with a history of hypertension...


In [None]:
data = pd.merge(data, icu_note_texts,
                  on=['icustay_id'], how='left')

data

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,storetime,category,description,cgid,iserror,...,has_chartevents_data,dod_merged,ethnicity_to_use,age,icd9_code,language_to_use,icustay_id,intime,hours_from_icu_admit,concat_notes
0,661955,44906,148560.0,2197-03-04,2197-03-04 05:48:00,2197-03-04 06:46:23,Nursing,Nursing Progress Note,19650.0,,...,1,,BLACK,57.0,"[7503, 53019, 53550]",English,281111,2197-03-03 21:22:19,8.428056,57 y.o. ma without significant PMHx presenting...
1,484577,87144,132449.0,2173-08-14,2173-08-14 03:08:00,2173-08-14 03:08:01,Nursing,Nursing Progress Note,20951.0,,...,1,2173-08-14 14:45:00,WHITE,62.0,"[99649, 34401, 73741, 1985, V1052, 5859, 2720,...",English,204332,2173-08-12 18:34:53,32.551944,"PMH: hypothyroidism, [**2162**] left nephrecto..."
2,1270535,739,172752.0,2197-11-22,2197-11-22 22:38:00,2197-11-22 22:47:00,Nursing/other,Report,18078.0,,...,1,2197-12-11 03:16:00,WHITE,60.0,"[5100, 1622, 4660, 04111, 51881, 5789, 4928, 7...",Missing,299463,2197-11-22 16:27:00,6.183333,"CTIC/SICU UPDATE NOTE\nNEURO: OFF PROPOFOL, PT..."
3,1553139,22210,163639.0,2119-11-26,2119-11-26 04:30:00,2119-11-26 04:41:00,Nursing/other,Report,19374.0,,...,1,2119-11-29 02:58:00,WHITE,87.0,"[8604, 8074, 2763, 80706, 86101, E8130, 81002,...",Missing,255332,2119-11-25 14:00:57,14.484167,T/SICU RN Admit/Progress Note\nPatient s/p mvc...
4,2016227,25240,155680.0,2199-10-08,2199-10-08 16:38:00,2199-10-08 17:12:00,Nursing/other,Report,16888.0,,...,1,,WHITE,0.0,"[V3001, 7706, 7661, V053]",Missing,245753,2199-10-08 15:22:13,1.263056,Neonatology\nWritten on wrog patient. Disregar...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4703,469445,86402,168718.0,2159-07-02,2159-07-02 13:18:00,2159-07-02 17:39:05,Nursing,Nursing Progress Note,18240.0,,...,1,2160-05-26 00:00:00,WHITE,78.0,"[41071, 49121, 42832, 41401, 40390, 5859, 2724...",English,235449,2159-07-02 11:13:08,2.081111,"Chief Complaint: Dyspnea, chest pain\n HPI:..."
4704,1881545,14362,170500.0,2127-09-03,2127-09-03 05:13:00,2127-09-03 05:27:00,Nursing/other,Report,18720.0,,...,1,,WHITE,0.0,"[V3000, 76527, 7793, 7742, 76519, V502, V053, ...",Missing,280854,2127-09-02 16:55:10,12.297222,NPN:\n\nRESP: Sats 97-100% in RA. Occasional ...
4705,1593817,25410,186484.0,2111-05-30,2111-05-30 17:06:00,2111-05-30 17:25:00,Nursing/other,Report,15654.0,,...,1,2111-06-29 00:00:00,WHITE,84.0,"[43310, 5070, 99811, 2762, 8670, E8788, 4019, ...",Missing,291494,2111-05-29 13:38:26,27.459444,"PT SUFFERED STROKE 2 WEEKS AGO , CAME TO [**Ho..."
4706,730238,42035,140567.0,2117-04-17,2117-04-17 05:07:00,2117-04-17 07:42:33,Physician,Physician Resident Progress Note,20689.0,,...,1,,WHITE,81.0,"[5070, 51881, 4150, 9348, 5180, 4940, 42731, 2...",English,263483,2117-04-15 20:19:01,32.799722,Chief Complaint:\n 24 Hour Events:\n Pt ma...


#Preprocessing the notes based on the paper
1. Replace PHI placeholders with special tokens (e.g., dates
→ [DATE], names → [NAME], etc.)..
2. Drop outpatient notes

In [None]:
def is_date(string):
    string = string.lower()
    return bool(re.search(r'^\d{4}-\d{1,2}-\d{1,2}$', string) or
                re.search(r'^\d{1,2}-\d{1,2}$', string) or
                re.search(r'^\d{4}$', string) or
                re.search(r'^\d{1,2}/\d{4}$', string) or
                re.search(r'^january|february|march|april|may|june|july|august|september|october|november|december|month|year|date range', string))

def replace_deid(s):
    low_label = s.lower()
    if is_date(low_label) or 'holiday' in low_label:
        return 'PHIDATEPHI'
    elif 'hospital' in low_label:
        return 'PHIHOSPITALPHI'
    elif any(x in low_label for x in ['location', 'url', 'university', 'address', 'po box', 'state', 'country', 'company']):
        return 'PHILOCATIONPHI'
    elif any(x in low_label for x in ['name', 'dictator info', 'contact info', 'attending info']):
        return 'PHINAMEPHI'
    elif 'telephone' in low_label:
        return 'PHICONTACTPHI'
    elif any(x in low_label for x in ['job number', 'number', 'numeric identifier']) or re.search(r'^[\d\-\/]+$', low_label):
        return 'PHINUMBERPHI'
    elif 'age over 90' in low_label:
        return 'PHIAGEPHI'
    else:
        return 'PHIOTHERPHI'

def repl(match):
    s = match.group(0)
    label = s[3:-3].strip()
    return replace_deid(label)

In [None]:

def process_note_helper(note):
    # Replace PHI patterns
    regex = r'\[\*\*[^*]*\*\*\]'
    note = re.sub(regex, repl, note)

    # Clean formatting
    note = re.sub(r'\n', ' ', note)
    note = re.sub(r'[0-9]+\.', '', note)  # numbered lists
    note = re.sub(r'(-){2,}|_{2,}|={2,}', '', note)  # long underscores/dashes
    note = re.sub(r'\bdr\.', 'doctor', note, flags=re.IGNORECASE)
    note = re.sub(r'\bm\.d\.', 'md', note, flags=re.IGNORECASE)

    # Split into sentences
    note_sections = sent_tokenize(note)

    # Remove empty ones and strip
    return [s.strip() for s in note_sections if s.strip()]


In [None]:
print(nltk.data.path)


['/root/nltk_data', '/usr/nltk_data', '/usr/share/nltk_data', '/usr/lib/nltk_data', '/usr/share/nltk_data', '/usr/local/share/nltk_data', '/usr/lib/nltk_data', '/usr/local/lib/nltk_data']


In [None]:
data['cleaned_sent'] = data['concat_notes'].apply(process_note_helper)


In [None]:
example_data_note0= data['cleaned_sent'][0]
example_data_note0 #Each row in cleaned_sent will be a list of cleaned, deidentified sentences.



['57 y.o.',
 'ma without significant PMHx presenting with ?',
 'of foreign body    in his throat.',
 'The patient reported he was eating boneless sweet and    sour chicken on the evening PTA when he felt he got something caught in    his throat.',
 'Since that time, he has been unable to swallow liquids or    solids and feels as if these things get stuck in one place and comes    back up when he attempts to swallow.',
 'This is associated with pain in    his neck around the sternal notch.',
 'He denies vomiting, hematemesis, or    problems with secretions.',
 'PHINAMEPHI Problem -  ?',
 'foreign body in esophagus    Assessment:    Pt c/o inability to swallow, spits out oral secretions.',
 'Discomfort in    throat, upper chest when trying to swallow.',
 'c/o nausea, started    wretching up clear secretions a number of times.',
 'Pt was getting low    back spasms when wretching and when trying to move.',
 'SBP 130s-150s    (has not had recent health care re BP), sats 96-98 on RA, no resp

In [None]:
data['bert_input'] = data['cleaned_sent'].apply(lambda sents: ' '.join(sents))
data['bert_input'][0]

'57 y.o. ma without significant PMHx presenting with ? of foreign body    in his throat. The patient reported he was eating boneless sweet and    sour chicken on the evening PTA when he felt he got something caught in    his throat. Since that time, he has been unable to swallow liquids or    solids and feels as if these things get stuck in one place and comes    back up when he attempts to swallow. This is associated with pain in    his neck around the sternal notch. He denies vomiting, hematemesis, or    problems with secretions. PHINAMEPHI Problem -  ? foreign body in esophagus    Assessment:    Pt c/o inability to swallow, spits out oral secretions. Discomfort in    throat, upper chest when trying to swallow. c/o nausea, started    wretching up clear secretions a number of times. Pt was getting low    back spasms when wretching and when trying to move. SBP 130s-150s    (has not had recent health care re BP), sats 96-98 on RA, no resp    distress. HR 70s, nsr, no ectopy. Action:    

In [None]:
data

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,storetime,category,description,cgid,iserror,...,ethnicity_to_use,age,icd9_code,language_to_use,icustay_id,intime,hours_from_icu_admit,concat_notes,cleaned_sent,bert_input
0,661955,44906,148560.0,2197-03-04,2197-03-04 05:48:00,2197-03-04 06:46:23,Nursing,Nursing Progress Note,19650.0,,...,BLACK,57.0,"[7503, 53019, 53550]",English,281111,2197-03-03 21:22:19,8.428056,57 y.o. ma without significant PMHx presenting...,"[57 y.o., ma without significant PMHx presenti...",57 y.o. ma without significant PMHx presenting...
1,484577,87144,132449.0,2173-08-14,2173-08-14 03:08:00,2173-08-14 03:08:01,Nursing,Nursing Progress Note,20951.0,,...,WHITE,62.0,"[99649, 34401, 73741, 1985, V1052, 5859, 2720,...",English,204332,2173-08-12 18:34:53,32.551944,"PMH: hypothyroidism, [**2162**] left nephrecto...","[PMH: hypothyroidism, PHIDATEPHI left nephrect...","PMH: hypothyroidism, PHIDATEPHI left nephrecto..."
2,1270535,739,172752.0,2197-11-22,2197-11-22 22:38:00,2197-11-22 22:47:00,Nursing/other,Report,18078.0,,...,WHITE,60.0,"[5100, 1622, 4660, 04111, 51881, 5789, 4928, 7...",Missing,299463,2197-11-22 16:27:00,6.183333,"CTIC/SICU UPDATE NOTE\nNEURO: OFF PROPOFOL, PT...","[CTIC/SICU UPDATE NOTE NEURO: OFF PROPOFOL, PT...","CTIC/SICU UPDATE NOTE NEURO: OFF PROPOFOL, PT ..."
3,1553139,22210,163639.0,2119-11-26,2119-11-26 04:30:00,2119-11-26 04:41:00,Nursing/other,Report,19374.0,,...,WHITE,87.0,"[8604, 8074, 2763, 80706, 86101, E8130, 81002,...",Missing,255332,2119-11-25 14:00:57,14.484167,T/SICU RN Admit/Progress Note\nPatient s/p mvc...,[T/SICU RN Admit/Progress Note Patient s/p mvc...,T/SICU RN Admit/Progress Note Patient s/p mvc ...
4,2016227,25240,155680.0,2199-10-08,2199-10-08 16:38:00,2199-10-08 17:12:00,Nursing/other,Report,16888.0,,...,WHITE,0.0,"[V3001, 7706, 7661, V053]",Missing,245753,2199-10-08 15:22:13,1.263056,Neonatology\nWritten on wrog patient. Disregar...,"[Neonatology Written on wrog patient., Disrega...",Neonatology Written on wrog patient. Disregard...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4703,469445,86402,168718.0,2159-07-02,2159-07-02 13:18:00,2159-07-02 17:39:05,Nursing,Nursing Progress Note,18240.0,,...,WHITE,78.0,"[41071, 49121, 42832, 41401, 40390, 5859, 2724...",English,235449,2159-07-02 11:13:08,2.081111,"Chief Complaint: Dyspnea, chest pain\n HPI:...","[Chief Complaint: Dyspnea, chest pain HPI:...","Chief Complaint: Dyspnea, chest pain HPI: ..."
4704,1881545,14362,170500.0,2127-09-03,2127-09-03 05:13:00,2127-09-03 05:27:00,Nursing/other,Report,18720.0,,...,WHITE,0.0,"[V3000, 76527, 7793, 7742, 76519, V502, V053, ...",Missing,280854,2127-09-02 16:55:10,12.297222,NPN:\n\nRESP: Sats 97-100% in RA. Occasional ...,"[NPN: RESP: Sats 97-100% in RA., Occasional ...",NPN: RESP: Sats 97-100% in RA. Occasional br...
4705,1593817,25410,186484.0,2111-05-30,2111-05-30 17:06:00,2111-05-30 17:25:00,Nursing/other,Report,15654.0,,...,WHITE,84.0,"[43310, 5070, 99811, 2762, 8670, E8788, 4019, ...",Missing,291494,2111-05-29 13:38:26,27.459444,"PT SUFFERED STROKE 2 WEEKS AGO , CAME TO [**Ho...","[PT SUFFERED STROKE 2 WEEKS AGO , CAME TO PHIH...","PT SUFFERED STROKE 2 WEEKS AGO , CAME TO PHIHO..."
4706,730238,42035,140567.0,2117-04-17,2117-04-17 05:07:00,2117-04-17 07:42:33,Physician,Physician Resident Progress Note,20689.0,,...,WHITE,81.0,"[5070, 51881, 4150, 9348, 5180, 4940, 42731, 2...",English,263483,2117-04-15 20:19:01,32.799722,Chief Complaint:\n 24 Hour Events:\n Pt ma...,[Chief Complaint: 24 Hour Events: Pt mai...,Chief Complaint: 24 Hour Events: Pt main...


In [None]:
#Save the file after preprocessing
data.to_csv('/path_to_the_output_of_filtering_tokeniazing_notes.ipynb/cleaned_data/data.csv',index=False)