In [37]:
import json
import logging
import anthropic
from tenacity import retry, wait_random_exponential, stop_after_attempt
import os

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))

# Define the NER labels to be identified, based on your updated schema
entity_labels = [
    "PastDiagnoses", "SeizureOnset", "SeizureChange", "Symptoms", "ProvocationSeizureAura",
    "SeizureSeverity", "SeizurePropagation", "SeizurePalliation", "MedicationHistory",
    "Age", "EpilepsySurgery", "FrequencyOfSeizures", "Patient"
]

# Define relation labels to identify, based on your updated schema
relation_labels = [
    "HAS", "EXPERIENCES", "PROVOKED_BY", "IMPACTS", "ASSOCIATED_WITH",
    "IMPROVES", "WORSENS", "UNDERGOES", "LEADS_TO"
]

# Prepare messages
def system_message(entity_labels, relation_labels):
    return f"""
You are an expert in Natural Language Processing. Your task is to identify Named Entities (NER) and relations in a given text.
The possible Named Entities (NER) types are: ({", ".join(entity_labels)}).
The possible relations are: ({", ".join(relation_labels)}).
"""

def assistant_message():
    return f"""
EXAMPLE:
    Text: 'The 50-year-old patient has a history of probable generalized epilepsy and experiences generalized seizures every two months. She injured herself and bit her tongue during a seizure episode. Her epilepsy began in childhood but reappeared five years ago. Currently, she is on sodium valproate and levetiracetam.'
{{
    "Entities": {{
        "PastDiagnoses": ["probable generalized epilepsy"],
        "Age": ["50-year-old"],
        "FrequencyOfSeizures": ["every two months"],
        "SeizureOnset": ["began in childhood", "reappeared five years ago"],
        "SeizureRelatedInjuries": ["injured herself", "bit her tongue"],
        "MedicationHistory": ["sodium valproate", "levetiracetam"],
        "Patient": ["She"]
    }},
    "Relations": [
        {{"type": "HAS", "source": "She", "target": "probable generalized epilepsy"}},
        {{"type": "EXPERIENCES", "source": "She", "target": "bit her tongue"}},
        {{"type": "LEADS_TO", "source": "began in childhood", "target": "probably generalized epilepsy"}}
    ]
}}
--"""

def user_message(text):
    return f"""
TASK:
    Text: {text}
"""

# Chat Completion with Claude

def get_entities_relations(entity_labels, relation_labels, text):
    # Build the prompt
    prompt = (
        f"{anthropic.HUMAN_PROMPT}{system_message(entity_labels, relation_labels)}\n"
        f"{assistant_message()}\n"
        f"{user_message(text)}\n"
        "Please output only the JSON object containing the recognized entities and relations.\n"
        f"{anthropic.AI_PROMPT}"
    )

    response = anthropic_client.completions.create(
        model="claude-2",
        prompt=prompt,
        stop_sequences=[anthropic.HUMAN_PROMPT],
        max_tokens_to_sample=1000,
        temperature=0,
        top_p=1,
    )

    response_text = response.completion.strip()
    # logging.info(f"Assistant's response: {response_text}")
    
    
    # Parse the json response within response_text into a dict
    matches = re.search(r'\{.*\}', response_text, re.DOTALL)
    # convert match object to string then dict
    response_text = json.loads(matches.group())
    
    return response_text


def execute_ner(text):
    # Get the entities and relations from the text
    response = get_entities_relations(entity_labels, relation_labels, text)
    return response

In [39]:
# get the text from a clinical note in data/clinical_notes
import re 
import json

for clinical_note_filename in os.listdir('../data/clinical_notes')[:1]:
        # Construct clinical note path
        clinical_note_path = os.path.join('../data/clinical_notes/', 'EA0009.txt')

        # Read the clinical note text
        with open(clinical_note_path, 'r') as clinical_note_file:
            clinical_note_text = clinical_note_file.read()
            
            # Execute the NER model
            ner_result = execute_ner(clinical_note_text)
    
            if ner_result:
                print(json.dumps(ner_result, indent=4))
        
            else:
                print("No extracted JSON found.")
        
             
         

2024-10-27 01:20:50,428 - INFO - HTTP Request: POST https://api.anthropic.com/v1/complete "HTTP/1.1 200 OK"


{
    "Entities": {
        "PastDiagnoses": [
            "frontal lobe brain tumour"
        ],
        "Age": [
            "42-year-old"
        ],
        "SeizureOnset": [
            "around 4 weeks after her operation"
        ],
        "Symptoms": [
            "left-handed stiffness which progresses to her shoulder",
            "lose consciousness",
            "bilateral convulsive seizure"
        ],
        "FrequencyOfSeizures": [
            "2-4 seizures per month",
            "cluster of seizures in August, 2017 where she had 6-9 seizures every week for 3 weeks"
        ],
        "SeizureSeverity": [
            "lose consciousness",
            "bilateral convulsive seizure"
        ],
        "MedicationHistory": [
            "levetiracetam 750 mg twice a day",
            "lamotrigine 100 mg twice a day"
        ],
        "EpilepsySurgery": [
            "craniotomy"
        ],
        "Patient": [
            "she",
            "This woman"
        ]
    },
 