In [1]:
import pandas as pd
import streamlit as st
from langchain_community.llms import Ollama
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts import (
    ChatPromptTemplate,
    FewShotChatMessagePromptTemplate,
)
from io import StringIO
import re
import numpy as np
import requests
from typing import Any, List, Mapping, Optional
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.embeddings import HuggingFaceEmbeddings
from chromadb.utils import embedding_functions
from chromadb import Documents, EmbeddingFunction, Embeddings
from typing import Optional, Sequence, Union, TypeVar, List, Dict, Any, Tuple, cast
from langchain.prompts.example_selector.ngram_overlap import NGramOverlapExampleSelector
from langchain.output_parsers.enum import EnumOutputParser
from enum import Enum
from gbnf_compiler import *
from random import sample 


#read in grammar file
grammar = ""
with open('json.gbnf', 'r') as file:
    grammar = file.read()


#llm connector for Palmetto Cluster (llama.cpp ./server)
class CustomLLM(LLM):
    n: int

    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")
        
        payload = {
            "prompt": prompt,
            "temperature": 0.15, 
            "cache_prompt": True,
            "n_predict": self.n,
            "grammar": grammar
        }
        response = requests.post("http://localhost:8080/completion", json=payload)
        
        if response.status_code == 200:
            return response.json().get("content", "")
        else:
            raise Exception(f"Error from llama.cpp server: {response.text}")

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"n": self.n}
        
#create langchain llm instance
llm = CustomLLM(n=128)


In [2]:
#prepare few-shot file
few_shot_examples = pd.read_csv('fixed.csv')
data_column = 'COMMENT1'
data_column_2 = 'COMMENT1'

headers = []
for col in few_shot_examples.columns:
    if (col != data_column_2):
        headers.append(col)

# parse dataframe into examples using grammar format
examples_list = []
for index, row in few_shot_examples.iterrows():
    ex= {'Comment': row[data_column] }        
    ex['Data'] = ('\n'.join([f'{col}: {row[col]}' for col in headers]))
    examples_list.append(ex)

examples_list = sample(examples_list,30)
print(few_shot_examples.head())

                                            COMMENT1 Expectations Met  \
0  Very through, no short cuts!  It was if we wer...              Yes   
1                                         No comment          Unknown   
2                                               Poor               No   
3   The nurses were unbelievably helpful and caring.              Yes   
4  i should have been allowed to come back with m...               No   

  Trust in Staff Feeling of Safety Positive Experience    Dirty  \
0        Unknown           Unknown                 Yes  Unknown   
1        Unknown           Unknown             Unknown  Unknown   
2        Unknown           Unknown             Unknown  Unknown   
3            Yes           Unknown                 Yes  Unknown   
4        Unknown           Unknown             Unknown  Unknown   

  Risks/Challenges Actions/Strategies      Facilitators  Visit Rating  
0          unknown            unknown  resources_caring           NaN  
1          unk

In [3]:
#new dataframe
new_data = pd.read_csv('ptinfo.csv')
new_data = new_data[['COMMENT1']]


#template config
qtemp = "<|start_header_id|>user<|end_header_id|>Patient Feedback: {Comment}\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n{Data}<|eot_id|>"

labels_s = headers[:]
labels_s.append("Comment")

example_prompt = PromptTemplate(
    input_variables=['Comment','Data'], template=qtemp
)

sys_prompt = '''<|begin_of_text|><|start_header_id|>system<|end_header_id|>
# Task:
The comments that you are given are a patient describing their visit at a hospital.
Label the patient feedback data based on the different fields.
If there is no context for the field, return 'unknown' for the field.
Because the comments are anonymized, [person_name] represents an identify name that has been anonymized.
If the comment is '(BLANK)', '(INAUDIBLE)' or '(unreadable)', just label 'unknown' for all categories unless you can extract meaningful statements.
If the comment appears incomplete or a fragment, just label 'unknown' for all categories unless you can extract meaningful statements.
Consider the sentiment of the patient feedback before labeling.
Return only the label structure as follows:

# Output Structure:
Expectations Met: [Label]
Trust in Staff: [Label]
Feeling of Safety: [Label]
Positive Experience: [Label]
Dirty: [Label]
Risks/Challenges: [Label]
Actions/Strategies: [Label]
Facilitators: [Label]
Visit Rating: [1-5]


# Output Labels:
Expectations Met:
This field evaluates whether the patient's expectations regarding their visit, treatment, or service were met. Possible labels:
yes: The patient's expectations were met. Make sure that the patient is not being sarcastic, as that means that their expectations probably have not been met.
no: The patient's expectations were not met.
unknown: It is unclear or not recorded whether the patient's expectations were met.

Trust in Staff:
This measures the patient's level of trust in the medical staff's competence and intentions. Possible labels:
yes: The patient trusted the staff.
no: The patient did not trust the staff.
unknown: It is unclear or not recorded whether the patient trusted the staff.

Feeling of Safety:
This field assesses the patient's perception of safety during their stay or visit. Possible labels:
yes: The patient felt safe.
no: The patient did not feel safe.
unknown: It is unclear or not recorded whether the patient felt safe.

Positive Experience:
This field indicates whether the overall experience of the patient was positive. Possible labels:
yes: The patient had a positive experience.
no: The patient had a negative experience.
unknown: It is unclear or not recorded what the patient's overall experience was.

Dirty:
Label as 'unknown' if no Possible labels:
yes: Hospital is specifically mentioned as being dirty.
no: Hospital is specifically mentioned as being clean.
unknown: Nothing about cleanliness is mentioned. Default.

Risks/Challenges:
This field identifies any risks or challenges faced during the patient's care or stay. Possible labels:
unknown: Default
user_error
insurance_issue: The feedback mentions an issue with health insurance.
waiting_time: Anything about a long wait time is mentioned.
safety_resources
public_concern
safety_concern: The feedback mentions a safety concern in the hospital.
public_external
resources_waiting
waiting_external
insurance_error
safety_waiting
safety_user
user_waiting

Actions/Strategies:
This field details any actions or strategies employed to address the patient's needs or the circumstances of their care. Possible labels:
unknown: Default
communication_staff: The feedback indicates that communication with hospital staff helped them solve their issue.
communication_interaction
use_resources: The feedback indicates that the hospital was resourceful in treating them.

Facilitators:
This field covers the resources or factors that facilitated the patient's care positively. Possible labels:
unknown: Default
resources_caring: The feedback indicates that the staff were kind or caring in the service provided.
resources_documentation: The feedback specifically mentions staff using documentation or records.
quick_response: The patient was pleased with a short wait time in the feedback.

Visit Rating:
Return a number from 1 to 5 depending on how well the patient's visit went according to the feedback.
1: Worst Treatment
2: Bad Treatment
3. Mediocre Treatment
4. Good Treatment
5. Best Treatment
unknown: Default, if a score cannot be inferred from feedback.

<|eot_id|>
'''

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples_list,
    prefix=sys_prompt,
    suffix='<|start_header_id|>user<|end_header_id|>Patient Feedback: {comment}\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n',
    input_variables=["comment"],
    partial_variables={}
)

sys_prompt = prompt.format(comment='hospital')


with open("prompt.txt", "w") as text_file:
    text_file.write(sys_prompt)

In [4]:
# Function to parse the returned grammar
def parse_data(output):
    lines = output.strip().split('\n')
    data = {}

    for line in lines:
        if ': ' in line:
            field, label = line.split(': ', 1)
            data[field] = [label]  # Use a list to be compatible with DataFrame
    
    # Create a DataFrame from the dictionary
    df = pd.DataFrame(data)
    
    return df

In [5]:
new_d = pd.DataFrame().reindex_like(new_data)
new_d = new_d.head(0)


# Perform Analysis
batch_size = 1  # Adjust batch size according to your requirements and limitations
batches = [new_data[i:i + batch_size]
            for i in range(0, new_data.shape[0], batch_size)]

for batch in batches:
    # Create a batched prompt for processing
    batch_prompts = [prompt.format(comment=(row[data_column]).strip())
                        for index, row in batch.iterrows()]
    batch_comments = [row[data_column]
                        for index, row in batch.iterrows()]

    # Process the batch
    try:
        batch_results = llm.batch(batch_prompts)
        for index, result in zip(batch.index, batch_results):     
            try:
                
                new_row = parse_data(result)

                print(f"row {index}:")
                print("Patient Feedback: " + batch_comments[0])
                print(result)
                print("")
                new_row[data_column] = batch_comments[0]
                
                new_d = pd.concat([new_d, new_row], ignore_index=True)

            except Exception as e:
                print(f'Error analyzing row {index}: {e}')
    except Exception as e:
        print(f'Error processing batch: {e}')

row 0:
Patient Feedback:    [person_name] x  was nice and he listened to all my .   concerns and answered all my questions and he took the time to explain what I did not understand...
Expectations Met: yes
Trust in Staff: yes
Feeling of Safety: unknown
Positive Experience: yes
Dirty: unknown
Risks/Challenges: user_error
Actions/Strategies: unknown
Facilitators: resources_caring
Visit Rating: 1


row 1:
Patient Feedback:    They treated me exceptionally well, and in exactly the way I needed. I was extremely dehydrated after a night of stomach illness so bad that I couldn't keep down water. Extreme vomiting and diarrhea...   I knew I needed an IV unit to help me get back in shape and they readily agreed, and suggested an anti-nausea medication I hadn't considered before...   Some quick tests were able to show it wasn't [person name] or the flu. Once I was rehydrated and able to hold down a [person_name], they said I was good to go and helped me find the exit. ..   Excellent staff all aro

KeyboardInterrupt: 

In [None]:
new_d.replace('', 'Unknown')
new_d.to_csv('sample_70b.csv')