In [None]:
import pandas as pd
import ollama
from process_pm import classify_with_extras
from tqdm import tqdm

tqdm.pandas()
client = ollama.Client()

# model = 'llama3.2'
model = 'medllama2:7b'

def classify_llm(text):
    lowered = text.lower()
    start = lowered.find("impression")
    if start > 0:
        text = text[start:]
    results = {'seizure_label': None, 'SE': None, 'NCSE': None}

    # Study Number - study_no
    prompt = """
    
    """

    # Presence of Seizure (T) - seizure_label
    prompt = """
    You are a medical document classifier. Your task is to read the EEG impression and return 'True' if the impression confirms any of the following: 
    - A seizure or seizures were captured, observed, noted, or was otherwise present during the study, whether electrographic or electroclinical.
    Do not infer or deduce based on other findings. Only return 'True' if the impression states that a seizure occurred during this recording.
    Output must be exactly one of the following: `True` or `False`. Do not return "no", "yes", or any other text.

    This is the EEG report: '{}'
    """.format(text)
    response = client.generate(prompt=prompt,model=model,options={"temperature": 0.2})
    results["seizure_label"] = response.response.strip().lower().split(maxsplit=1)[0].replace(".", "")

    # Presence of Status Epilepticus (T) - SE
    prompt = """
    You are a medical document classifier. Your task is to read the EEG impression and return 'True' if the impression explicitly confirms any of the following: 
    - Status epilepticus was observed or diagnosed.
    Do not infer or deduce based on other findings. Only return 'True' if the impression states a patient was in status epilepticus during this recording.
    Output must be exactly one of the following: `True` or `False`. Do not return "no", "yes", or any other text.
    This is the EEG report: '{}'
    """.format(text)
    response = client.generate(model=model,prompt=prompt,options={"temperature": 0.2})
    results["SE"] = response.response.strip().lower().split(maxsplit=1)[0].replace(".", "")

        # Is the status epilepticus nonconvulsive (T) or convulsive (F) - NCSE
    prompt =  """
    You are a medical document classifier. Your task is to read the EEG impression and return 'True' if the impression explicitly confirms any of the following: 
    - Nonconvulsive Status epilepticus was observed or diagnosed.
    Do not infer or deduce based on other findings. Only return 'True' if the impression states a patient was in nonconvulsive status epilepticus during this recording.
    Output must be exactly one of the following: `True` or `False`. Do not return "no", "yes", or any other text.
    This is the EEG report: '{}'
    """.format(text)
    response = client.generate(model=model,prompt=prompt,options={"temperature": 0.2})
    results["NCSE"] = response.response.strip().lower().split(maxsplit=1)[0].replace(".", "")
    return results

# Read in .xlsx file into a DataFrame
df = pd.read_excel("EEG Prolonged Procs vs 8_19.xlsx")
#columns:
#NOTE_ID	Note_Entry_Date	Note Author	proc_name	PAT_NAME	note_text	MRN	BIRTH_DATE	LOC_NAME	ETHNICITY	RACE	ETHNIC_BACKGROUND	GENDER_IDENTITY	Problem_List
#make headers all lower
df.columns = df.columns.str.lower()

#make note_text data type string
df["note_text"] = df["note_text"].astype(str)

df_sorted = df.sort_values(by=["note_id", "line"])

df_merged = df_sorted.groupby("note_id", as_index=False).agg({        
    "note_id": "first",  # Keep the first instance
    "line": "last",
    'note_file_dttm': "first",  # Keep the first instance
    "note_status": "first",  # Keep the first instance
    "note_type": "first",  # Keep the first instance
    "date_of_service_dttm": "first",  # Keep the first instance
    "author_name": "first",  # Keep the first instance
    "encounter_type": "first",  # Keep the first instance
    "pat_id": "first",  # Keep the first instance
    "pat_name": "first",  # Keep the first instance
    "mrn": "first",  # Keep the first instance
    "birth_date": "first",  # Keep the first instance
    "pat_enc_csn_id": "first",  # Keep the first instance
    "loc_name": "first",  # Keep the first instance
    "ethnicity": "first",  # Keep the first instance
    "race": "first",  # Keep the first instance
    "ethnic_background": "first",  # Keep the first instance
    "gender_identity": "first",  # Keep the first instance
    "problem_list": "first",  # Keep the first instance
    "department_name": "first",  # Keep the first instance
    "room": "first",  # Keep the first instance
    "hosp_admsn_time": "first",  # Keep the first instance
    "hosp_disch_time": "first",  # Keep the first instance
    "inpatient_days": "first",  # Keep the first instance
    "discharge_location": "first",  # Keep the first instance
    "note_text": " ".join  # Keep the first instance of note_text   
})

#get the first 1000 rows for testing
#df_merged = df_merged.head(500)

# Apply enhanced classifier
classification_results_pm = df_merged["note_text"].progress_apply(classify_with_extras)

#classification results 
classification_results = df_merged["note_text"].progress_apply(classify_llm)

df_extras = pd.DataFrame(list(classification_results))
df_extras2 = pd.DataFrame(list(classification_results_pm))
df_final = pd.concat([df_merged, df_extras, df_extras2], axis=1)

df_final.to_excel("EEG_Classification_Result_llm_pm.xlsx", index=False)
