### Setting up session

In [1]:
import os
print(os.getcwd())

import sys
path = "/".join(os.getcwd().split("/")[:-2])
if path not in sys.path:
    sys.path.append(path)
print(sys.path)

/Users/yashmakkar/Documents/Project/GEO/src/notebooks
['/Library/Frameworks/Python.framework/Versions/3.12/lib/python312.zip', '/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12', '/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/lib-dynload', '', '/Users/yashmakkar/Documents/Project/GEO/.venv/lib/python3.12/site-packages', '/Users/yashmakkar/Documents/Project/GEO']


In [13]:
from langchain.chat_models import init_chat_model
from src.utils.config import config
import requests
from bs4 import BeautifulSoup

### Initiating Langchain Models

In [6]:
openai_llm = init_chat_model(model="openai:gpt-4o-mini", api_key=config.openai_api_key)
google_llm = init_chat_model("gemini-2.5-flash-preview-05-20", temperature = 0, model_provider="google_genai", api_key=config.google_api_key)

### Scraping the target web page

In [9]:
url = "https://www.xtandi.com"
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15",
}

response = requests.get(url=url, headers=headers)

In [14]:
page_content = BeautifulSoup(response.content, "html.parser")


In [21]:
page_text = []
for line in page_content.text.split("\n"):
    if len(line) > 0:
        page_text.append(line)

page_text = "\n".join(page_text)

### Generating search queries

In [27]:
query_generation_prompt = f"""Given the following website content, generate a diverse set of user queries that humans might type into a search engine to find information from this site. Your queries should include:

- Simple, straightforward questions (e.g., factual lookups, one-step queries)
- Complex, multi-part or exploratory questions that require deeper reasoning or synthesis
- Ensure that the queries are phrased naturally, just as a typical human user would search online.

Output a list of search queries relevant to the content above, clearly showing both normal and complex examples."""

query_gen_input = [
    {
        "role": "system",
        "content": query_generation_prompt
    },
    {
        "role": "user",
        "content": page_text
    }
]

In [28]:
query_gen_input

[{'role': 'system',
  'content': 'Given the following website content, generate a diverse set of user queries that humans might type into a search engine to find information from this site. Your queries should include:\n\n- Simple, straightforward questions (e.g., factual lookups, one-step queries)\n- Complex, multi-part or exploratory questions that require deeper reasoning or synthesis\n- Ensure that the queries are phrased naturally, just as a typical human user would search online.\n\nOutput a list of search queries relevant to the content above, clearly showing both normal and complex examples.'},
 {'role': 'user',
  'content': 'XTANDI® (enzalutamide) | Advanced Prostate Cancer Treatment\nYou are now leaving XTANDI.com\nPlease verify that you are a U.S. Healthcare Professional.\nThis information is intended for U.S. Healthcare Professionals only.\nAre you a U.S. Healthcare Professional?\nNo\nYes\nYou are now leaving XTANDI.com\nThe website you are about to visit is not owned or co

In [30]:
queries = openai_llm.invoke(query_gen_input)

In [37]:
queries

AIMessage(content='1. What is XTANDI used for?\n2. How does XTANDI work in treating prostate cancer?\n3. What are the possible side effects of XTANDI?\n4. Can women take XTANDI?\n5. What should I tell my doctor before starting XTANDI?\n6. How do I take XTANDI?\n7. Where can I find the prescribing information for XTANDI?\n8. What should I do if I miss a dose of XTANDI?\n9. Are there any financial support options available for patients taking XTANDI?\n10. How can I sign up for support emails from XTANDI?\n11. What types of prostate cancer can be treated with XTANDI?\n12. How commonly is XTANDI prescribed compared to other prostate cancer treatments?\n13. What resources are available for patients taking XTANDI?\n14. Are there any restrictions on who can receive information about XTANDI?\n15. What are the risks of seizures while taking XTANDI?\n16. If I have a history of heart disease, is it safe to take XTANDI?\n17. How do the side effects of XTANDI compare to those of other prostate canc

In [38]:
search_queries = queries.content.split("\n")
search_queries = [query.split(".")[1].strip() for query in search_queries]           # To remove numerals from AI response.
search_queries

['What is XTANDI used for?',
 'How does XTANDI work in treating prostate cancer?',
 'What are the possible side effects of XTANDI?',
 'Can women take XTANDI?',
 'What should I tell my doctor before starting XTANDI?',
 'How do I take XTANDI?',
 'Where can I find the prescribing information for XTANDI?',
 'What should I do if I miss a dose of XTANDI?',
 'Are there any financial support options available for patients taking XTANDI?',
 'How can I sign up for support emails from XTANDI?',
 'What types of prostate cancer can be treated with XTANDI?',
 'How commonly is XTANDI prescribed compared to other prostate cancer treatments?',
 'What resources are available for patients taking XTANDI?',
 'Are there any restrictions on who can receive information about XTANDI?',
 'What are the risks of seizures while taking XTANDI?',
 'If I have a history of heart disease, is it safe to take XTANDI?',
 'How do the side effects of XTANDI compare to those of other prostate cancer medications?',
 'What sho

In [39]:
search_queries[0]

'What is XTANDI used for?'

### Generating Generative Engine Response

In [45]:
ge_prompt = [
    {
        "role": "system",
        "content": """
        You are ChatGPT, an advanced AI language model with web search capabilities. When a user's question requires recent or factual information, perform a search using the web tool, summarize the findings, and respond conversationally. Always include helpful citations and direct links to sources in your answer. Attribute information clearly, integrating context from both the web and the ongoing chat. Respond naturally and accurately, supporting your statements with retrieved evidence and making references easy to follow.
        """
    },
    {
        "role": "user",
        "content": search_queries[0]
    }
]

ge_prompt

[{'role': 'system',
  'content': "\n        You are ChatGPT, an advanced AI language model with web search capabilities. When a user's question requires recent or factual information, perform a search using the web tool, summarize the findings, and respond conversationally. Always include helpful citations and direct links to sources in your answer. Attribute information clearly, integrating context from both the web and the ongoing chat. Respond naturally and accurately, supporting your statements with retrieved evidence and making references easy to follow.\n        "},
 {'role': 'user', 'content': 'What is XTANDI used for?'}]

In [None]:
from openai import OpenAI
client = OpenAI()

response = client.responses.create(
    model="gpt-4o-mini",
    tools=[{"type": "web_search"}],
    input=ge_prompt,
    tool_choice="required"
)

print(response.output_text)

XTANDI (enzalutamide) is a medication primarily used to treat various forms of prostate cancer. It functions as an androgen receptor inhibitor, blocking the effects of male hormones like testosterone, which can stimulate the growth of prostate cancer cells. ([xtandi.com](https://www.xtandi.com/how-xtandi-works?utm_source=openai))

Specifically, XTANDI is indicated for:

- **Metastatic Castration-Sensitive Prostate Cancer (mCSPC):** Prostate cancer that has spread to other parts of the body but still responds to treatments that lower testosterone levels. ([ema.europa.eu](https://www.ema.europa.eu/en/medicines/human/EPAR/xtandi?utm_source=openai))

- **Non-Metastatic Castration-Resistant Prostate Cancer (nmCRPC):** Prostate cancer that has not spread to other parts of the body but continues to grow despite treatments that lower testosterone levels. ([ema.europa.eu](https://www.ema.europa.eu/en/medicines/human/EPAR/xtandi?utm_source=openai))

- **Metastatic Castration-Resistant Prostate C

In [60]:
citations = response.output[1].content[0].annotations
citations

[AnnotationURLCitation(end_index=331, start_index=258, title='How XTANDI® (enzalutamide) Works', type='url_citation', url='https://www.xtandi.com/how-xtandi-works?utm_source=openai'),
 AnnotationURLCitation(end_index=653, start_index=560, title='Xtandi | European Medicines Agency (EMA)', type='url_citation', url='https://www.ema.europa.eu/en/medicines/human/EPAR/xtandi?utm_source=openai'),
 AnnotationURLCitation(end_index=952, start_index=859, title='Xtandi | European Medicines Agency (EMA)', type='url_citation', url='https://www.ema.europa.eu/en/medicines/human/EPAR/xtandi?utm_source=openai'),
 AnnotationURLCitation(end_index=1254, start_index=1161, title='Xtandi | European Medicines Agency (EMA)', type='url_citation', url='https://www.ema.europa.eu/en/medicines/human/EPAR/xtandi?utm_source=openai'),
 AnnotationURLCitation(end_index=1429, start_index=1356, title='How XTANDI® (enzalutamide) Works', type='url_citation', url='https://www.xtandi.com/how-xtandi-works?utm_source=openai'),
 

### Comparing Generative Engine Responses
1. Response Generated from code
2. Response got from chatgpt web app

In [62]:
ge_code_reponse = response.output_text

In [63]:
chatgpt_response = (
    """
    XTANDI is the brand name for **enzalutamide**, a medication used to treat certain types of prostate cancer. ([Drugs.com][1])

    Here’s what it’s used for, how it works, and some key points:

    ---

    ## What it is used for

    XTANDI is used in adult men with prostate cancer in situations including:

    1. **Castration-resistant prostate cancer (CRPC)**
    When the cancer no longer responds to therapies (surgical or hormonal) that lower testosterone. ([Mayo Clinic][2])

    2. **Metastatic disease**
    When the cancer has spread to other parts of the body, and it’s still responsive to hormone therapy or surgical lowering of testosterone. ([XTANDI][3])

    3. **Non-metastatic disease at high risk**
    For prostate cancer that hasn’t spread, but the patient is at high risk of it spreading, or there’s a biochemical recurrence (rising PSA after initial treatment) and the cancer is sensitive to hormone therapy. ([Xtandi][4])

    4. **Metastatic castration-sensitive prostate cancer (mCSPC)**
    In some cases, XTANDI is combined with testosterone-lowering therapy to treat prostate cancer that’s already spread but still responds to hormone suppression. ([European Medicines Agency (EMA)][5])

    ---

    ## How it works

    * XTANDI is an **androgen receptor inhibitor** (also called an anti-androgen). ([XTANDI][3])
    * Androgens (like testosterone) can promote the growth of prostate cancer. XTANDI blocks the receptor that androgens bind to, reducing their ability to stimulate cancer cells. ([Cancer Research UK][6])

    ---

    ## Important considerations

    * It’s taken by mouth, usually once a day. ([MedlinePlus][7])
    * Patients are typically also undergoing androgen deprivation therapy (ADT) (to lower testosterone), or have already had surgical removal of the testes, depending on their situation. ([Xtandi][4])
    * Like many cancer drugs, it has side effects, and not everyone is a candidate. A physician will consider risks vs benefits. ([Xtandi][4])

    ---

    If you want, I can tell you about the side-effects, how effective it is, or how it’s used in India. Do you want me to pull up that info?

    [1]: https://www.drugs.com/xtandi.html?utm_source=chatgpt.com "Xtandi: Uses, Dosage & Side Effects"
    [2]: https://www.mayoclinic.org/drugs-supplements/enzalutamide-oral-route/description/drg-20075790?utm_source=chatgpt.com "Enzalutamide (oral route) - Side effects & dosage"
    [3]: https://www.xtandi.com/how-xtandi-works?utm_source=chatgpt.com "How XTANDI® (enzalutamide) Works"
    [4]: https://www.xtandihcp.com/?utm_source=chatgpt.com "XTANDI® (enzalutamide) for mCSPC, nmCRPC & mCRPC ..."
    [5]: https://www.ema.europa.eu/en/medicines/human/EPAR/xtandi?utm_source=chatgpt.com "Xtandi | European Medicines Agency (EMA)"
    [6]: https://www.cancerresearchuk.org/about-cancer/treatment/drugs/enzalutamide?utm_source=chatgpt.com "Enzalutamide | Cancer information"
    [7]: https://medlineplus.gov/druginfo/meds/a612033.html?utm_source=chatgpt.com "Enzalutamide: MedlinePlus Drug Information"

    """
)

In [64]:
comparision_prompt = [
    {
        "role": "system",
        "content": """
        Given two responses to the same search-driven user query—one generated by the OpenAI API using its web search tool, and another from ChatGPT's application interface with web search enabled—compare them along the following parameters:

        - Factual accuracy and up-to-dateness of the information

        - Depth and relevance of content

        - Use and clarity of source citations and links

        - Completeness of the response to the given user query

        - Fluency, coherence, and overall readability

        - Presence of hallucinations, fabrications, or missing details

        - Responsiveness to requested web search context (real-world retrieval rather than training data)

        Present a structured evaluation highlighting which response performs better for each parameter, and summarize if the API-generated response is suitable for use in generative engine optimization analysis. Use a table for per-parameter scores (e.g., “API”, “ChatGPT”, “Draw”) and include a qualitative summary explaining key differences and suitability.
        """
    },
    {
        "role": "user",
        "content": f"""
        User Query: {search_queries[0]}
        API Response: {ge_code_reponse}
        ChatGPT Response: {chatgpt_response}
        """
    }
]

In [66]:
comparision = openai_llm.invoke(comparision_prompt)

In [69]:
comparision.pretty_print()


### Evaluation of Responses

| Parameter                                                          | API Response                    | ChatGPT Response                  | Outcome            |
|-------------------------------------------------------------------|---------------------------------|-----------------------------------|--------------------|
| **Factual accuracy and up-to-dateness**                            | High accuracy, but lacks details on newer indications or studies. | High accuracy and up-to-date with additional notes on indications. | **Draw** |
| **Depth and relevance of content**                                 | Covers main indications but lacks additional context. | Offers detailed insights into usage contexts. | **ChatGPT** |
| **Use and clarity of source citations and links**                 | Provides citations, but fewer sources overall. | Extensive citing, clear links to relevant resources. | **ChatGPT** |
| **Completeness of the response to the given user 

### Measuring API Response on GEO Metrices

In [70]:
measure_prompt = (
    """
    You are an advanced language model specialized in evaluating generated content using comprehensive GEO (Generative Engine Optimization) metrics and multi-factor content optimization. 

    Given a piece of text, assess it on the following parameters:

    1. Citation Prominence: How clearly and effectively citations and sources are presented and highlighted.
    2. Relevance and Influence of Website Content: How well the text aligns with, and leverages, authoritative and impactful content from relevant websites.
    3. Uniqueness, Click Probability, and Diversity: The presence of unique words, diverse ideas, and likelihood to engage users to click and read further.
    4. Fluency Optimization: The readability, natural flow, and understanding ease of the text.
    5. Authoritative & Technical Language: The use of trust-building, domain-specific expertise and technical terms appropriately.
    6. Quotations & Statistics: Effective integration of real-world facts, numerical data, and quotations to substantiate claims.
    7. Position-Adjusted Word Count: Weight important content appearing earlier in the text more, to reflect prominence.

    Provide a detailed, multi-dimensional score or qualitative commentary for each parameter with explanations. Suggest specific improvements if any, to enhance overall generative content quality based on these GEO strategies.
    """
)

measure_input = [
    {
        "role": "system",
        "content": measure_prompt
    },
    {
        "role": "user",
        "content": f"""
        User Query: {search_queries[0]}
        API Response: {ge_code_reponse}
        Target Website Homepage: {page_text}
        """
    }
]

In [73]:
ge_metrics_measurement = openai_llm.invoke(measure_input)

In [75]:
ge_metrics_measurement.pretty_print()


### Assessment of API Response According to GEO Metrics:

1. **Citation Prominence**: 
   - **Score: 8/10**
   - The citations are effectively presented, providing direct links to valuable sources like the official XTANDI website and the European Medicines Agency. This enhances the credibility of the information. Still, bolding or italicizing the citations would further enhance their visibility.

2. **Relevance and Influence of Website Content**: 
   - **Score: 9/10**
   - The response aligns well with authoritative content, referencing credible medical sources. The inclusion of information from the official site and regulatory body underlines its relevance. Incorporating content from additional high-traffic health websites like Mayo Clinic or WebMD could enhance this aspect.

3. **Uniqueness, Click Probability, and Diversity**: 
   - **Score: 7/10**
   - The content presents diverse facets of XTANDI usage, which captures reader interest. However, the language used is somewhat standar