In [29]:
!pip install langchain-community
!pip install py2neo
!pip install neo4j-driver
!pip install python-dotenv

Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [74]:
# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain

In [62]:
import textwrap

In [31]:
from dotenv import main
import os

In [49]:
# Load from environment
main.load_dotenv('.env', override=True)
print(os.getenv('NEO4J_URI'))


bolt://localhost:7687


In [34]:
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')

In [35]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

### Querying the Assets knowledge graph 
- Match all nodes in the graph

In [5]:
cypher = """
  MATCH (n) 
  RETURN count(n)
  """

In [6]:
result = kg.query(cypher)
result

[{'count(n)': 249}]

#### Match only Client Nodes

In [7]:
cypher = """
    MATCH (n:Client) 
    RETURN n
    """
kg.query(cypher)

[{'n': {'name': 'Pfizer'}}, {'n': {'name': 'Johnson & Johnson'}}]

In [8]:
cypher = """
    MATCH (claim:Claim) WHERE claim.text IS NOT NULL
    RETURN claim limit 5
    """
kg.query(cypher)

[{'claim': {'text': 'Helps protect against 20 strains of pneumococcal disease.'}},
 {'claim': {'text': 'Treats moderate to severe plaque psoriasis in adults. (References not provided)'}},
 {'claim': {'text': 'Proven effective for adults aged 18 and older.'}},
 {'claim': {'text': 'Provides relief for rheumatoid arthritis symptoms.'}},
 {'claim': {'text': 'Significantly delays disease progression in HR+/HER2- breast cancer.'}}]

In [None]:
#BASE_LLM_URL = "https://dev.lionis.ai/api/v1/llm/"
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

KEY

### Create Vector Index

In [None]:
kg.query("""
  CREATE VECTOR INDEX claim_text_embeddings IF NOT EXISTS
  FOR (c:Claim) ON (c.claimTextEmbedding) 
  OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
  }}"""
)

In [495]:
kg.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 2,
  'name': 'claim_text_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Claim'],
  'properties': ['claimTextEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 11, 28, 17, 28, 48, 336000000, tzinfo=<UTC>),
  'readCount': 2242}]

### Create Embeddings and populate Vector Index

In [496]:
kg.query("""
    MATCH (claim:Claim) WHERE claim.text IS NOT NULL
    WITH claim, genai.vector.encode(
        claim.text, 
        "OpenAI", 
        {
          token: $openAIKey
        }) AS vector
    CALL db.create.setNodeVectorProperty(claim, "claimTextEmbedding", vector)
    """, 
    params={"openAIKey":OPENAI_API_KEY} )

[]

In [497]:
kg.query("SHOW INDEXES")

[{'id': 3,
  'name': 'claimTextSearch',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'FULLTEXT',
  'entityType': 'NODE',
  'labelsOrTypes': ['Claim'],
  'properties': ['text'],
  'indexProvider': 'fulltext-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 11, 28, 16, 51, 5, 25000000, tzinfo=<UTC>),
  'readCount': 2},
 {'id': 2,
  'name': 'claim_text_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Claim'],
  'properties': ['claimTextEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 11, 28, 17, 28, 48, 336000000, tzinfo=<UTC>),
  'readCount': 2242},
 {'id': 0,
  'name': 'index_343aff4e',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'NODE',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead':

In [None]:
result = kg.query("""
    MATCH (c:Claim) 
    WHERE c.text IS NOT NULL
    RETURN c.text, c.claimTextEmbedding
    LIMIT 1
    """
)

1536

In [13]:
result[0]['c.text']

'Helps protect against 20 strains of pneumococcal disease.'

In [14]:
result[0]['c.claimTextEmbedding'][:10]

[-0.010019322857260704,
 -0.004647746682167053,
 0.01297813467681408,
 -0.03438824787735939,
 0.01973387598991394,
 0.02104184776544571,
 -0.013041628524661064,
 -0.015517886728048325,
 -0.03098497912287712,
 0.00860976055264473]

In [15]:
len(result[0]['c.claimTextEmbedding'])

1536

In [339]:
kg.query("""
    MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim)-[:HAS_REFERENCE]->(reference:Reference),
    (advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
    WHERE mlrStatus.status = 'fail' AND reference.text = 'No references'
    DETACH DELETE reference
    """
    )

[]

In [340]:
kg.refresh_schema()
print(kg.schema)

Node properties:
Client {name: STRING}
Brand {name: STRING}
Advertisement {assetSubType: STRING, audience: STRING, indicationAndUsage: STRING, assetFormat: STRING, assetPurpose: STRING, fullPrescribingInformation: STRING, headline: STRING, assetURL: STRING, assetType: STRING, dimensions: STRING}
Claim {text: STRING, claimTextEmbedding: LIST}
Footnote {text: STRING}
Reference {text: STRING}
Channel {name: STRING}
MLRStatus {status: STRING, reason: FLOAT}
Relationship properties:

The relationships:
(:Client)-[:HAS_BRAND]->(:Brand)
(:Brand)-[:HAS_ADVERTISEMENT]->(:Advertisement)
(:Advertisement)-[:USES_CLAIM]->(:Claim)
(:Advertisement)-[:DISPLAYED_ON]->(:Channel)
(:Advertisement)-[:HAS_MLR_STATUS]->(:MLRStatus)
(:Claim)-[:HAS_REFERENCE]->(:Reference)
(:Claim)-[:HAS_FOOTNOTE]->(:Footnote)


In [75]:
query = "WITH genai.vector.encode($inputClaim, 'OpenAI', { token: $openAIKey }) AS inputVector " \
        "MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim), " \
        "(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus) " \
        "WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL " \
        "CALL db.index.vector.queryNodes($index_name, $top_k, inputVector) yield node, score " \
        "WITH advertisement, node, score " \
        "WHERE score > $similarityThreshold " \
        "RETURN { assetSubType: advertisement.assetSubType, audience: advertisement.audience, indicationAndUsage: advertisement.indicationAndUsage, " \
        "assetFormat: advertisement.assetFormat, assetPurpose: advertisement.assetPurpose, fullPrescribingInformation: advertisement.fullPrescribingInformation, " \
        "headline: advertisement.headline, assetURL: advertisement.assetURL, assetType: advertisement.assetType, dimensions: advertisement.dimensions } AS advertisementDetails, " \
        "node.text AS claimText, score " \
        "ORDER BY score DESC " \
        "LIMIT $limit"
        
params = {
    "openAIKey": OPENAI_API_KEY,
    "inputClaim": "Helps protect against 18 strains of pneumococcal disease",
    "similarityThreshold": 0.9,
    "index_name": "claim_text_embeddings",
    "top_k": 1,
    "limit": 1
}
kg.query(query, params)

[{'advertisementDetails': {'assetFormat': 'Banner',
   'headline': 'Science for a Better Life',
   'audience': 'Medical Professional',
   'assetType': 'Multi Frame',
   'assetSubType': 'Video',
   'assetURL': 'https://example.com/assets/8253',
   'dimensions': '300x250',
   'fullPrescribingInformation': 'Prevnar 20® is a pneumococcal 20-valent conjugate vaccine indicated for the prevention of invasive disease and pneumonia caused by Streptococcus pneumoniae serotypes covered by the vaccine. It is recommended for use in adults 18 years of age and older. Common side effects include injection site reactions, fatigue, and muscle pain. Please consult the full prescribing information for detailed safety and usage instructions.',
   'assetPurpose': 'Educational',
   'indicationAndUsage': 'Vaccination'},
  'claimText': 'Helps protect against 20 strains of pneumococcal disease.',
  'score': 0.9763641357421875}]

In [214]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [215]:
input_vector = embeddings.embed_query("Helps protect against 18 strains of pneumococcal disease")

In [216]:
query = """MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes($index_name, $top_k, $inputVector) YIELD node, score
WITH advertisement, node, score
WHERE score > 0.9
RETURN advertisement.assetSubType, advertisement.audience, advertisement.indicationAndUsage,
advertisement.assetFormat, advertisement.assetPurpose, advertisement.fullPrescribingInformation,
advertisement.headline, advertisement.assetURL, advertisement.assetType, advertisement.dimensions,
node.text AS claimText, score
ORDER BY score DESC
LIMIT $limit"""

params = {
    "openAIKey": OPENAI_API_KEY,
    "inputVector": input_vector,
    "index_name": "claim_text_embeddings",
    "top_k": 1,
    "limit": 1
}

In [217]:
kg.query(query, params)

[{'advertisement.assetSubType': 'Video',
  'advertisement.audience': 'Medical Professional',
  'advertisement.indicationAndUsage': 'Vaccination',
  'advertisement.assetFormat': 'Banner',
  'advertisement.assetPurpose': 'Educational',
  'advertisement.fullPrescribingInformation': 'Prevnar 20® is a pneumococcal 20-valent conjugate vaccine indicated for the prevention of invasive disease and pneumonia caused by Streptococcus pneumoniae serotypes covered by the vaccine. It is recommended for use in adults 18 years of age and older. Common side effects include injection site reactions, fatigue, and muscle pain. Please consult the full prescribing information for detailed safety and usage instructions.',
  'advertisement.headline': 'Science for a Better Life',
  'advertisement.assetURL': 'https://example.com/assets/8253',
  'advertisement.assetType': 'Multi Frame',
  'advertisement.dimensions': '300x250',
  'claimText': 'Helps protect against 20 strains of pneumococcal disease.',
  'score': 

### Similarity search and setup a RAG to interact with AssetClaim repository
- Calculate embedding for question
- Identify matching advertisment based on similarity of question and `claimTextEmbedding` vectors

In [42]:
# Global constants
VECTOR_INDEX_NAME = 'claim_text_embeddings'
VECTOR_NODE_LABEL = 'Claim'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'claimTextEmbedding'

In [51]:
neo4j_vector_store = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label=VECTOR_NODE_LABEL,
    text_node_properties=[VECTOR_SOURCE_PROPERTY],
    embedding_node_property=VECTOR_EMBEDDING_PROPERTY,
)

In [None]:
#retriever = neo4j_vector_store.as_retriever()

### Writing Cypher with an LLM

In this section, we'll use few-shot learning to teach an LLM to write Cypher
- We'll use the OpenAI's GPT 3.5 model 
- We'll also use a new Neo4j integration within LangChain called **GraphCypherQAChain**

In [532]:
CYPHER_GENERATION_TEMPLATE = """
Task: Generate a Cypher statement to query a graph database.
Instructions:
- Use only the provided relationship types and properties in the schema. Do not use any other relationship types or properties that are not provided. Consider the relationship direction as well.
- Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
- Ensure the generated Cypher statement is syntactically correct and compatible with Neo4j.
- Return only the Cypher statement without explanations, apologies, or additional commentary.
- If the query involves vector similarity, use appropriate vector-based querying methods such as `db.index.vector.queryNodes` or `genai.vector.encode`
- If inputClaim and token are provided then use their value while generating cypher query.

Schema:
{schema}

Examples: Here are a few examples of generated Cypher statements for particular questions:

# Retrieve claims and details for Advertisements with MLRStatus 'pass' and a claim similar to the {inputClaim}.
WITH genai.vector.encode("{inputClaim}", 'OpenAI', {token}) AS inputVector
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes("claim_text_embeddings", 1, inputVector) YIELD node, score
WITH advertisement, node, score
WHERE score > 0.9
RETURN advertisement.assetSubType, advertisement.audience, advertisement.indicationAndUsage,
advertisement.assetFormat, advertisement.assetPurpose, advertisement.fullPrescribingInformation,
advertisement.headline, advertisement.assetURL, advertisement.assetType, advertisement.dimensions,
node.text AS claimText, score
ORDER BY score DESC
LIMIT 1

# Retrieve claims for the Advertisements for which MLRStatus is'pass' and a claim similar to the {inputClaim}.
WITH genai.vector.encode("{inputClaim}", 'OpenAI', {token}) AS inputVector
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes("claim_text_embeddings", 1, inputVector) YIELD node, score
WITH DISTINCT node, score
WHERE score > 0.9
RETURN node.text AS claimText, score
ORDER BY score DESC
LIMIT 1

# Give me the asset URL and claim for the advertisment for which reference is missing
MATCH (advertisement:Advertisement)
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
OPTIONAL MATCH (claim)-[:HAS_REFERENCE]->(reference:Reference)
WITH advertisement, claim, reference
WHERE reference IS NULL
RETURN advertisement.assetURL, claim.text, reference

# Retrieve claims, references and asset MLR review failure reason for Advertisements with MLR Status as failure
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim)
MATCH (claim)-[:HAS_REFERENCE]->(reference:Reference)
MATCH (advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'fail'
RETURN claim.text AS claimText, reference.text AS referenceText, mlrStatus.reason

# Retrive the most often used claim in any passed advertisement
MATCH (advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
WHERE mlrStatus.status = 'pass'
WITH claim, COUNT(advertisement) AS usageCount
RETURN claim.text AS claimText, usageCount
ORDER BY usageCount DESC
LIMIT 5

# Give me top 5 headlines used for client Johnson and Johnson
MATCH (client:Client)-[:HAS_BRAND]->(:Brand)-[:HAS_ADVERTISEMENT]->(advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE client.name = 'Johnson & Johnson'and mlrStatus.status = 'pass'
WITH advertisement
MATCH (advertisement)-[:USES_CLAIM]->(:Claim)
RETURN advertisement.headline AS headline, COUNT(advertisement) AS usageCount
ORDER BY usageCount DESC
LIMIT 5

# Tell me in terms of Yes or No if this particular claim text is present for any passed advertisment. Claim text is 'Proven effective for adults aged 18 and older.'
MATCH (advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
WHERE mlrStatus.status = 'pass' and claim.text= 'Proven effective for adults aged 18 and older.'
RETURN CASE WHEN COUNT(*) > 0 THEN 'Yes' ELSE 'No' END AS ClaimPresentInPassedAdvertisements

Question:
{question}
"""


In [533]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question","inputClaim","token"], 
    template=CYPHER_GENERATION_TEMPLATE
)

In [534]:
cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY),
    graph=kg,
    verbose=True,
    allow_dangerous_requests=True,
    return_direct=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [467]:
failedClaim = "Delays disease progression in HR/HER2 cancer."
question = f"Retrieve claims and details for Advertisements with MLRStatus 'pass' and a claim similar to the input claim which is -{failedClaim}"

In [143]:
input_vector = embeddings.embed_query(failedClaim)

In [266]:
token = f"""{{ token: "{OPENAI_API_KEY}" }}"""
token

'{ token: "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" }'

In [535]:
prompt=CYPHER_GENERATION_PROMPT.format(schema=kg.get_schema, question=question, inputClaim=failedClaim, token=token)
print('Prompt:', prompt)

Prompt: 
Task: Generate a Cypher statement to query a graph database.
Instructions:
- Use only the provided relationship types and properties in the schema. Do not use any other relationship types or properties that are not provided. Consider the relationship direction as well.
- Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
- Ensure the generated Cypher statement is syntactically correct and compatible with Neo4j.
- Return only the Cypher statement without explanations, apologies, or additional commentary.
- If the query involves vector similarity, use appropriate vector-based querying methods such as `db.index.vector.queryNodes` or `genai.vector.encode`
- If inputClaim and token are provided then use their value while generating cypher query.

Schema:
Node properties:
Client {name: STRING}
Brand {name: STRING}
Advertisement {assetSubType: STRING, audience: STRING, indicationAndUsage: STRING, assetFormat: STRING, assetPurpos

In [309]:
def wrap_text_in_dict(d, width=60):
    if isinstance(d, dict):
        return {k: wrap_text_in_dict(v, width) for k, v in d.items()}
    elif isinstance(d, str):
        return textwrap.fill(d, width)
    elif isinstance(d, list):
        return [wrap_text_in_dict(item, width) for item in d]
    else:
        return d  # Return non-string values as is

In [536]:
def prettychain(question: str):
    """Execute the chain's response and pretty print the output."""
    try:
       
        
        response = cypherChain.invoke({
            "query": question,
            "token":token,
            "inputClaim": failedClaim},
            prompt=prompt,
            return_only_outputs = True,)
        
        # Formatting response for better readability
       
        print(response)
        #wrapped_response = wrap_text_in_dict(response, width=60)
        #print(wrapped_response)
    except Exception as e:
        print("An error occurred while generating the query:")
        print(e)
        print(str(e))
        return None


In [257]:
query_params = {
      "question": question,
      "openAIKey": OPENAI_API_KEY,
      "similarityThreshold": 0.9,
      "index_name": "claim_text_embeddings",
      "top_k": 1,
      "limit": 1,
      "inputVector": input_vector
}

In [None]:
kg.refresh_schema()

In [311]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mWITH genai.vector.encode("Delays disease progression in HR/HER2 cancer.", 'OpenAI', { token: "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" }) AS inputVector
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes("claim_text_embeddings", 1, inputVector) YIELD node, score
WITH advertisement, node, score
WHERE score > 0.9
RETURN advertisement.assetSubType, advertisement.audience, advertisement.indicationAndUsage,
advertisement.assetFormat, advertisement.assetPurpose, advertisement.fullPrescribingInformation,
advertisement.headline, advertisement.assetURL, advertisement.assetType, advertisement.dimensions,
node.text AS 

In [480]:
failedClaim = "Helps protect against 2 strains of pneumococcal disease."
question = f"Retrieve claims for Advertisements with MLRStatus 'pass' and a claim similar to the input claim which is -{failedClaim}"

In [481]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mWITH genai.vector.encode("Helps protect against 2 strains of pneumococcal disease.", 'OpenAI', { token: "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" }) AS inputVector
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes("claim_text_embeddings", 1, inputVector) YIELD node, score
WITH node, score
WHERE score > 0.9
RETURN node.text AS claimText, score[0m

[1m> Finished chain.[0m
{'result': [{'claimText': 'Helps protect against 20 strains of pneumococcal disease.', 'score': 0.9705352783203125}, {'claimText': 'Helps protect against 20 strains of pneumococcal disease.', 'score': 0.9705352783203125}, {'claimText': 'H

In [482]:
failedClaim = "Not needed"
question = f"Retrieve claims and references for Advertisements with MLRStatus 'fail' and also give reason for failure"

In [483]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim)
MATCH (claim)-[:HAS_REFERENCE]->(reference:Reference)
MATCH (advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'fail'
RETURN claim.text AS claimText, reference.text AS referenceText, mlrStatus.reason[0m

[1m> Finished chain.[0m
{'result': [{'claimText': 'Proven effective for adults aged 18 and older.', 'referenceText': 'https://www.fda.gov/drugs; https://pubmed.ncbi.nlm.nih.gov', 'mlrStatus.reason': 'Non-compliance with regulations'}, {'claimText': 'Provides relief for rheumatoid arthritis symptoms.', 'referenceText': 'https://ard.bmj.com; https://www.rheumatology.org', 'mlrStatus.reason': 'Non-compliance with regulations'}, {'claimText': 'Proven effective for children aged 18 and older.', 'referenceText': 'https://www.fda.gov/drugs; https://pubmed.ncbi.nlm.nih.gov', 'mlrStatus.reason': 'Non-compliance wit

In [537]:
question = f"Return the headline which is used most often in any passed advetisment of Johnson and Johnson and brand Stelara. Give how many times its used. Give top 5 such headline"

In [538]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (client:Client {name: 'Johnson & Johnson'})-[:HAS_BRAND]->(:Brand {name: 'Stelara'})-[:HAS_ADVERTISEMENT]->(advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass'
WITH advertisement
MATCH (advertisement)-[:USES_CLAIM]->(:Claim)
RETURN advertisement.headline AS headline, COUNT(advertisement) AS usageCount
ORDER BY usageCount DESC
LIMIT 5[0m

[1m> Finished chain.[0m
{'result': [{'headline': 'Innovative Treatments for Tomorrow', 'usageCount': 10}, {'headline': 'Protect Your Health Today', 'usageCount': 7}, {'headline': 'Relief is Within Reach', 'usageCount': 6}, {'headline': 'Science for a Better Life', 'usageCount': 4}]}


In [486]:
question = f"Give me top 5 mostly used claims for an advertisment for 'Johnson & Johnson' and brand Stelara. This advertisement is for Educational purpose with dimensions 300x250. Status of advertisements should be passed"

In [487]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (client:Client {name: 'Johnson & Johnson'})-[:HAS_BRAND]->(brand:Brand {name: 'Stelara'})
MATCH (advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus {status: 'pass'})
WHERE advertisement.assetPurpose = 'Educational' AND advertisement.dimensions = '300x250'
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
WITH claim, COUNT(advertisement) AS usageCount
RETURN claim.text AS claimText, usageCount
ORDER BY usageCount DESC
LIMIT 5[0m

[1m> Finished chain.[0m
{'result': [{'claimText': 'Helps protect against 20 strains of pneumococcal disease.', 'usageCount': 3}, {'claimText': 'Improves progression-free survival in combination therapy.', 'usageCount': 2}, {'claimText': 'Proven effective for adults aged 18 and older.', 'usageCount': 1}, {'claimText': "Effective for Crohn's disease after failure of conventional therapy.", 'usageCount': 1}, {'claimText': 'Shown to reduce inflammation in

In [488]:
question = "Tell me in terms of Yes or No if this particular claim text is present for any passed advertisment. Claim text is 'Proven effective for adults aged 18 and older.'"

In [489]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
WHERE mlrStatus.status = 'pass' and claim.text= 'Proven effective for adults aged 18 and older.'
RETURN CASE WHEN COUNT(*) > 0 THEN 'Yes' ELSE 'No' END AS ClaimPresentInPassedAdvertisements[0m

[1m> Finished chain.[0m
{'result': [{'ClaimPresentInPassedAdvertisements': 'Yes'}]}


In [540]:
question = "Give me top 5 headlines used in passed advertisements for the client Johnson and Johnson"

In [541]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (client:Client {name: 'Johnson & Johnson'})-[:HAS_BRAND]->(:Brand)-[:HAS_ADVERTISEMENT]->(advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass'
RETURN advertisement.headline AS headline, COUNT(advertisement) AS usageCount
ORDER BY usageCount DESC
LIMIT 5[0m

[1m> Finished chain.[0m
{'result': [{'headline': 'Innovative Treatments for Tomorrow', 'usageCount': 16}, {'headline': 'Protect Your Health Today', 'usageCount': 14}, {'headline': 'Relief is Within Reach', 'usageCount': 9}, {'headline': 'Science for a Better Life', 'usageCount': 9}]}


In [490]:
question = "Give me similar claim text which is present for any passed advertisment. Claim text is 'Proven effective for adults aged 1 and older.'"

In [491]:
prettychain(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mWITH genai.vector.encode("Proven effective for adults aged 1 and older.", 'OpenAI', { token: "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" }) AS inputVector
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes("claim_text_embeddings", 1, inputVector) YIELD node, score
WITH claim.text AS similarClaimText, score
WHERE score > 0.9
RETURN similarClaimText, score
ORDER BY score DESC
LIMIT 1[0m

[1m> Finished chain.[0m
{'result': [{'similarClaimText': 'Helps protect against 20 strains of pneumococcal disease.', 'score': 0.9823150634765625}]}


In [500]:
result = kg.query("""WITH genai.vector.encode("Proven effective for adults aged 18 and older.", 'OpenAI', { token: "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" }) AS inputVector
MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim),(advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
WHERE mlrStatus.status = 'pass' AND claim.claimTextEmbedding IS NOT NULL
CALL db.index.vector.queryNodes("claim_text_embeddings", 5, inputVector) YIELD node, score
WITH node.text AS similarClaimText, score
WHERE score > 0.9
RETURN similarClaimText, score
ORDER BY score DESC
LIMIT 5""")

result

[{'similarClaimText': 'Proven effective for adults aged 18 and older.',
  'score': 0.996612548828125},
 {'similarClaimText': 'Proven effective for adults aged 18 and older.',
  'score': 0.996612548828125},
 {'similarClaimText': 'Proven effective for adults aged 18 and older.',
  'score': 0.996612548828125},
 {'similarClaimText': 'Proven effective for adults aged 18 and older.',
  'score': 0.996612548828125},
 {'similarClaimText': 'Proven effective for adults aged 18 and older.',
  'score': 0.996612548828125}]

In [369]:
result = kg.query("""MATCH (advertisement:Advertisement)
WHERE advertisement.assetURL = 'https://example.com/assets/8253'
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
OPTIONAL MATCH (claim)-[:HAS_REFERENCE]->(reference:Reference)
RETURN advertisement.assetURL, claim.text, reference
""")

result

[{'advertisement.assetURL': 'https://example.com/assets/8253',
  'claim.text': 'Helps protect against 20 strains of pneumococcal disease.',
  'reference': {'text': 'https://www.fda.gov; https://www.cdc.gov/vaccines'}},
 {'advertisement.assetURL': 'https://example.com/assets/8253',
  'claim.text': 'Helps protect against 20 strains of pneumococcal disease.',
  'reference': {'text': 'https://www.fda.gov; https://www.cdc.gov/vaccines'}}]

In [361]:
result = kg.query("""MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim)
OPTIONAL MATCH (claim)-[:HAS_REFERENCE]->(reference:Reference)
WHERE reference IS NULL
RETURN advertisement,reference""")

result

[{'advertisement': {'assetSubType': 'Video',
   'audience': 'Medical Professional',
   'indicationAndUsage': 'Vaccination',
   'assetFormat': 'Banner',
   'assetPurpose': 'Educational',
   'fullPrescribingInformation': 'Prevnar 20® is a pneumococcal 20-valent conjugate vaccine indicated for the prevention of invasive disease and pneumonia caused by Streptococcus pneumoniae serotypes covered by the vaccine. It is recommended for use in adults 18 years of age and older. Common side effects include injection site reactions, fatigue, and muscle pain. Please consult the full prescribing information for detailed safety and usage instructions.',
   'headline': 'Science for a Better Life',
   'assetURL': 'https://example.com/assets/8253',
   'assetType': 'Multi Frame',
   'dimensions': '300x250'},
  'reference': None},
 {'advertisement': {'assetSubType': 'Video',
   'audience': 'Medical Professional',
   'indicationAndUsage': 'Vaccination',
   'assetFormat': 'Banner',
   'assetPurpose': 'Educa

In [367]:
result = kg.query("""MATCH (advertisement:Advertisement)
WHERE advertisement.assetURL = 'https://example.com/assets/8253'
RETURN advertisement
""")

result

[{'advertisement': {'assetSubType': 'Video',
   'audience': 'Medical Professional',
   'indicationAndUsage': 'Vaccination',
   'assetFormat': 'Banner',
   'assetPurpose': 'Educational',
   'fullPrescribingInformation': 'Prevnar 20® is a pneumococcal 20-valent conjugate vaccine indicated for the prevention of invasive disease and pneumonia caused by Streptococcus pneumoniae serotypes covered by the vaccine. It is recommended for use in adults 18 years of age and older. Common side effects include injection site reactions, fatigue, and muscle pain. Please consult the full prescribing information for detailed safety and usage instructions.',
   'headline': 'Science for a Better Life',
   'assetURL': 'https://example.com/assets/8253',
   'assetType': 'Multi Frame',
   'dimensions': '300x250'}},
 {'advertisement': {'assetSubType': 'Video',
   'audience': 'Medical Professional',
   'indicationAndUsage': 'Vaccination',
   'assetFormat': 'Banner',
   'assetPurpose': 'Educational',
   'fullPres

In [498]:
result = kg.query("""MATCH (advertisement:Advertisement)-[:USES_CLAIM]->(claim:Claim)
WITH claim, COUNT(advertisement) AS usageCount
RETURN claim.text AS claimText, usageCount
ORDER BY usageCount DESC
LIMIT 5""")

result

[{'claimText': 'Provides relief for rheumatoid arthritis symptoms.',
  'usageCount': 25},
 {'claimText': 'Significantly delays disease progression in HR+/HER2- breast cancer.',
  'usageCount': 25},
 {'claimText': "Effective for Crohn's disease after failure of conventional therapy.",
  'usageCount': 23},
 {'claimText': 'Proven effective for adults aged 18 and older.',
  'usageCount': 21},
 {'claimText': 'Improves progression-free survival in combination therapy.',
  'usageCount': 21}]

In [475]:
result = kg.query("""MATCH (advertisement:Advertisement)-[:HAS_MLR_STATUS]->(mlrStatus:MLRStatus)
MATCH (advertisement)-[:USES_CLAIM]->(claim:Claim)
WHERE mlrStatus.status = 'pass' and claim.text= 'Proven effective for adults aged 18 and older.'
RETURN CASE WHEN COUNT(*) > 0 THEN 'Yes' ELSE 'No' END AS ClaimPresentInPassedAdvertisements""")

result

[{'ClaimPresentInPassedAdvertisements': 'Yes'}]

In [446]:
result = kg.query("""CREATE FULLTEXT INDEX claimTextSearch FOR (c:Claim) ON EACH [c.text]""")

In [448]:
kg.query("""CALL db.index.fulltext.queryNodes("claimTextSearch", "Proven effective for adults aged 18 and older.") YIELD node, score
RETURN node.text, score""")

[{'node.text': 'Proven effective for adults aged 18 and older.',
  'score': 5.733086585998535},
 {'node.text': 'Proven effective for children aged 18 and older.',
  'score': 5.048819065093994},
 {'node.text': 'Proven effective for adults aged 18 and older. (References not provided)',
  'score': 5.014046669006348},
 {'node.text': "Effective for Crohn's disease after failure of conventional therapy.",
  'score': 0.9835132360458374},
 {'node.text': "Effective for Crohn's disease after failure of conventional therapy. (References not provided)",
  'score': 0.8651115298271179},
 {'node.text': 'Treats moderate to severe plaque psoriasis in adults.',
  'score': 0.6842674016952515},
 {'node.text': 'Treats moderate to severe plaque psoriasis in adults. (References not provided)',
  'score': 0.5984470844268799},
 {'node.text': 'Provides relief for rheumatoid arthritis symptoms.',
  'score': 0.4882717728614807},
 {'node.text': 'Provides relief for rheumatoid arthritis symptoms. (References not pr