In [None]:
# call the library

# to generate data processing and visualization tools
import pandas as pd

# parsing and timing utilities
import json

# openAI API integration
import openai
from openai import OpenAI

# **Data Derivation**
During data derivation, gpt-40-mini was called to extract the side effects and effectiveness from patient reviews. <br>
In order to perform the derivation process: <br>
1. OpenAI API Key was called
2. Initialize the Model
3. Provide the Prompt for Keywords

In [None]:
# retrieve the cleaned dataset
drug_reviews = pd.read_csv('cleaned dataset.csv')
drug_reviews.shape

In [None]:
drug_reviews.head()

In [None]:
drug_reviews.info()

# **Call ChatGPT API**

**Steps:** <br>
1. Setup API Key
2. Provide the instruction
3. Initialize the model
4. Extract the generated response
5. Decode the JSON object
6. Store into the dataset
<br>

**Discussion:** <br>
ChatGPT will analyse each review to extract out the **side effects** and **effectiveness** of the drug taken by patients. <br>
Two new columns name **side effects** and **effectiveness** will be added into dataset to store the the output.

In [None]:
! pip install openai
! pip install --upgrade openai

In [None]:
# call API key
client = OpenAI(api_key="API_KEY")

In [None]:
# get the data
patient_reviews = drug_reviews['review']

# assign new columns
drug_reviews['side effects'] = None
drug_reviews['effectiveness'] = None

In [None]:
# loop for each reviews to extract the keywords
for i in range(0, len(drug_reviews)):

  print(f"Processing review {i + 1}/{len(drug_reviews)}")

  # get text data
  text = patient_reviews[i]

  # provide instruction to chatgpt
  prompt = f"""
             Analyze the following drug review and extract keywords that are specifically related to
             side effects and the effectiveness of the drug. Provide the output as a JSON object with
             two keys: 'side_effects' and 'effectiveness'.
             ```{text}``` """

  try:
    # call model
    response = client.chat.completions.create(
        model = 'gpt-4o-mini',
        messages = [
            {'role':'system', 'content': 'Provide output in valid JSON format.'},
            {'role':'user', 'content': prompt}],
        # deterministic and focus output
        temperature = 0
    )

    # extract response to store in dataset
    generated_text = response.choices[0].message.content

    # remove the unneccessary words in json format
    generated_text = generated_text.replace("```json", "").replace("```", "").strip()
    print(generated_text)

    # print(response)

    # decode json
    response_json = json.loads(generated_text)

    drug_reviews['side effects'][i] = response_json.get('side_effects')
    drug_reviews['effectiveness'][i] = response_json.get('effectiveness')

    # generate the dataset
    drug_reviews.to_csv("cleaned dataset.csv", index=False)

  # pass the error
  except Exception as e:
    print("Error in ", (i+1), ":", e)

  print("\n")

In [None]:
drug_reviews.shape

In [None]:
drug_reviews.head()