In [15]:
import pandas as pd
import requests
import os
import json

# Load the Excel file
file_path = 'paper dataset.xlsx'
df = pd.read_excel(file_path)

api_key = os.environ['OPENAI_API_KEY']
# Define the function to send API call
def send_api_call(system_prompt, input_prompt):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": input_prompt}
        ],
        "max_tokens": 8000,
        "temperature":0.2
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    if response.status_code == 200:
        response_data = response.json()
        output = response_data['choices'][0]['message']['content']
        return output
    else:
        print(f"Request failed with status code {response.status_code}")
        print("Response Body:", response.text)
        return None

# Process each row in the Excel file
outputs = []
for index, row in df.iterrows():
    title = row['Title']
    abstract = row['Abstract']
    parameter_name = row['Parameter']
    parameter_definition = row['Parameter_definition']
    
    # Create the system prompt
    system_prompt = f"""
    You are an expert in analyzing medical literature. Your task is to classify the content of medical articles based on specific criteria. For each article, you will assess whether it meets the criteria for being a clinical study, discusses a medical device, explores safety and performance, addresses a specific parameter, mentions vital signs, or involves stress.

    Here’s what each category means:

    1. Clinical Study: This includes observational studies (case-control, cohort, cross-sectional) or clinical trials aimed at understanding disease factors and evaluating the safety and efficacy of investigational drugs, procedures, or devices.
    2. Medical Device: An instrument, apparatus, machine, implant, software, or similar object used in the diagnosis, cure, treatment, or prevention of disease. This excludes products that work through chemical action (such as drugs).
    3. Safety and Performance: This involves evaluating the safety, clinical efficacy, potential side effects, user experiences, regulatory compliance, and comparative effectiveness of a medical device.
    4. {parameter_name}: {parameter_definition}
    5. Vital Signs: Includes metrics like blood pressure, pulse, respiratory rate, temperature, pulse oximetry, and end-tidal CO2.
    6. Stress: A neurobiological response to stressors that disrupts homeostasis, causing physical, mental, or emotional tension and adverse psychological and physiological changes.

    You will be provided with the title and abstract of a medical article and must determine if it meets the criteria for each category.
    Very important:
    1. Do not invent any information.
    2. Strictly follow the above-mentioned definition to extract the required information.
    """
    ##Vital Signs: Refer to the standard medical definition of vital signs. 
    # Create the input prompt
    input_prompt = f"""
    Title:
    {title}

    Abstract:
    {abstract}

    Based on the title and abstract above, classify the following categories and provide the output in JSON format:

    {{
        "Clinical_study": "[yes/no]",
        "Medical_device": "[yes/no]",
        "Safety_and_performance": "[yes/no]",
        "{parameter_name}": "[yes/no]",
        "Vital_signs": "[yes/no]",
        "Stress": "[yes/no]"
    }}
    """
    
    # Send the API call
    output = send_api_call(system_prompt, input_prompt)
    outputs.append(output)

# Define the columns based on the given dictionary keys
columns = [
    "Clinical_study",
    "Medical_device",
    "Safety_and_performance",
    "{parameter_name}",
    "Parameter_addressed",
    "Vital_signs",
    "Stress"
]

# Clean and parse the outputs
cleaned_outputs = [output.strip('```json\n').strip('```') for output in outputs]

# Convert the JSON strings into dictionaries
parsed_outputs = [json.loads(output) for output in cleaned_outputs]

# Replace "{parameter_name}" placeholder with the actual parameter name in the outputs
for output in parsed_outputs:
    parameter_name = list(output.keys())[3]  # the parameter name is at the 4th position
    output["Parameter_addressed"] = output[parameter_name]
    output["{parameter_name}"] = parameter_name

df = pd.DataFrame(parsed_outputs, columns=columns)
df


Unnamed: 0,Clinical_study,Medical_device,Safety_and_performance,{parameter_name},Parameter_addressed,Vital_signs,Stress
0,yes,no,yes,Imaging_Techniques,yes,no,no
1,yes,yes,yes,Imaging_Techniques,yes,no,no
2,yes,no,no,Patient Monitoring Systems,yes,yes,no


In [62]:
##Alternative prompts using 'chain-of-thought' approach. Worthy trying. 
import pandas as pd
import requests
import os
import json

# Load the Excel file
file_path = 'paper dataset.xlsx'
df = pd.read_excel(file_path)

api_key = os.environ['OPENAI_API_KEY']
# Define the function to send API call
def send_api_call(system_prompt, input_prompt):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": input_prompt}
        ],
        "max_tokens": 8000,
        "temperature":0.2
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    if response.status_code == 200:
        response_data = response.json()
        output = response_data['choices'][0]['message']['content']
        return output
    else:
        print(f"Request failed with status code {response.status_code}")
        print("Response Body:", response.text)
        return None

# Process each row in the Excel file
outputs = []
for index, row in df.iterrows():
    title = row['Title']
    abstract = row['Abstract']
    parameter_name = row['Parameter']
    parameter_definition = row['Parameter_definition']
    
    system_prompt = f"""
    You are an expert in analyzing medical literature. Your task is to carefully classify the content of medical articles based on specific criteria. For each article, you'll follow a step-by-step approach to determine whether it falls into one or more of the following categories: clinical study, medical device, safety and performance, specific parameter, vital signs, or stress.
    Let's start by considering each category in detail:
    1. **Clinical Study**: First, think about whether the article involves research on human subjects, such as observational studies (case-control, cohort, cross-sectional) or clinical trials. These studies aim to understand disease factors or evaluate the safety and efficacy of investigational drugs, procedures, or devices. Does the title or abstract mention such a study?
    2. **Medical Device**: Next, consider whether the article discusses any instrument, apparatus, machine, implant, software, or similar article used in the diagnosis, cure, treatment, or prevention of disease. Remember, this excludes products that work through chemical action, such as drugs. Does the article describe or evaluate such a device?
    3. **Safety and Performance**: Then, evaluate whether the article involves assessing the safety, clinical efficacy, potential side effects, user experiences, regulatory compliance, or comparative effectiveness of a medical device. Does the article focus on how safe or effective a device or method is?
    4. **{parameter_name}**: Reflect on whether the article addresses this specific parameter. Does the title or abstract mention anything about {parameter_name} which can be defined as {parameter_definition}?
    5. **Vital Signs**: Consider if the article discusses vital signs—key metrics such as blood pressure, pulse, respiratory rate, temperature, pulse oximetry, or end-tidal CO2. Does the article measure or analyze any of these vital signs?
    6. **Stress**: Finally, think about whether the article involves stress, defined as a neurobiological response to stressors that disrupt homeostasis, causing physical, mental, or emotional tension. Does the article explore how stress affects individuals or groups?
    As you evaluate the title and abstract of the article, work through each of these categories systematically. Carefully apply the definitions provided, and be sure not to make any assumptions or invent information that is not explicitly mentioned.
    """
    input_prompt = f"""
    Title:
    {title}
    Abstract:
    {abstract}
    Now, let’s classify the content of the article based on the categories we’ve defined. For each category, ask yourself the following questions and answer them based on the information in the title and abstract:
    1. **Clinical Study**: Does this article involve human subjects in research studies, such as observational studies or clinical trials? [yes/no]
    2. **Medical Device**: Does this article describe or evaluate an instrument, apparatus, machine, implant, or software used in medical diagnosis or treatment (excluding drugs)? [yes/no]
    3. **Safety and Performance**: Does this article assess the safety, efficacy, side effects, user experiences, or regulatory compliance of a medical device? [yes/no]
    4. **{parameter_name}**: Does this article discuss {parameter_definition}? [yes/no]
    5. **Vital Signs**: Does this article measure or analyze vital signs like blood pressure, pulse, or temperature? [yes/no]
    6. **Stress**: Does this article explore the effects of stress or stressors on individuals? [yes/no]
    Based on your careful consideration of the definitions of the categories, provide your output in the following JSON format:
    {{
        "Clinical_study": "[yes/no]",
        "Medical_device": "[yes/no]",
        "Safety_and_performance": "[yes/no]",
        "{parameter_name}": "[yes/no]",
        "Vital_signs": "[yes/no]",
        "Stress": "[yes/no]"
    }}
    """
    # Send the API call
    output = send_api_call(system_prompt, input_prompt)
    outputs.append(output)

# Define the columns based on the given dictionary keys
columns = [
    "Clinical_study",
    "Medical_device",
    "Safety_and_performance",
    "{parameter_name}",
    "Parameter_addressed",
    "Vital_signs",
    "Stress"
]

# Clean and parse the outputs
cleaned_outputs = [output.strip('```json\n').strip('```') for output in outputs]

# Convert the JSON strings into dictionaries
parsed_outputs = [json.loads(output) for output in cleaned_outputs]

# Replace "{parameter_name}" placeholder with the actual parameter name in the outputs
for output in parsed_outputs:
    parameter_name = list(output.keys())[3]  # the parameter name is at the 4th position
    output["Parameter_addressed"] = output[parameter_name]
    output["{parameter_name}"] = parameter_name

df = pd.DataFrame(parsed_outputs, columns=columns)
df


Unnamed: 0,Clinical_study,Medical_device,Safety_and_performance,{parameter_name},Parameter_addressed,Vital_signs,Stress
0,yes,no,yes,Imaging_Techniques,yes,no,no
1,yes,yes,yes,Imaging_Techniques,yes,no,no
2,yes,no,no,Patient Monitoring Systems,yes,yes,no
