In [None]:
!pwd

In [18]:
import os, random, time

from fastapi import FastAPI, HTTPException
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
# from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph
# from langchain_community.chat_models import ChatOllama
from langchain_ollama import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.vectorstores import Neo4jVector
from langchain_core.documents import Document
from langchain_ollama import OllamaEmbeddings
from langchain_experimental.llms.ollama_functions import OllamaFunctions

from neo4j import GraphDatabase

In [21]:
### functions

# neo4j database checker
def get_graph_data(tx):
    nodes = []
    edges = []
    # Query to get all nodes and their properties
    result_nodes = tx.run("MATCH (n) RETURN n")
    for record in result_nodes:
        node = record["n"]
        # nodes.append({"id": node.id, "labels": list(node.labels), "properties": dict(node)})
        nodes.append({"id": node.element_id, "labels": list(node.labels), "properties": dict(node)})
        # element_id

    # Query to get all relationships and their properties
    result_rels = tx.run("MATCH (a)-[r]->(b) RETURN a, r, b")
    for record in result_rels:
        start_node = record["a"]
        rel = record["r"]
        end_node = record["b"]
        edges.append({
            "start_id": start_node.element_id, #start_node.id,
            "end_id": end_node.element_id, #end_node.id,
            "type": rel.type,
            "properties": dict(rel)
        })
    return nodes, edges

def set_random_strength_on_edges(uri, user, password, source_node_id_value):
    """
    Finds edges where the source node's 'id' property matches the given value,
    and assigns a random number between 0.0 and 1.0 to a new 'strength' property
    on each of those edges.
    """
    driver = GraphDatabase.driver(uri, auth=(user, password))
    
    # The Cypher query to find and update the edges
    query = """
    MATCH (source {id: $source_id})-[rel]->(target)
    SET rel.strength = rand()*0.3+0.7
    RETURN source.id, type(rel), rel.strength, target.id
    """
    
    with driver.session() as session:
        result = session.run(query, source_id=source_node_id_value)
        
        updated_edges = []
        for record in result:
            updated_edges.append({
                'source_id': record['source.id'],
                'relationship_type': record['type(rel)'],
                'strength': record['rel.strength'],
                'target_id': record['target.id']
            })
            
    driver.close()
    return updated_edges


def set_random_direction_on_edges(uri, user, password, source_node_id_value):
    """
    Finds edges where the source node's 'id' property matches the given value,
    and assigns a random number between 0.0 and 1.0 to a new 'strength' property
    on each of those edges.
    """
    driver = GraphDatabase.driver(uri, auth=(user, password))
    
    # The Cypher query to find and update the edges
    query = """
    MATCH (source {id: $source_id})-[rel]->(target)
    SET rel.direction = CASE WHEN rand() > 0.5 THEN 'up' ELSE 'down' END
    RETURN source.id, type(rel), rel.strength, rel.direction, target.id
    """
    
    with driver.session() as session:
        result = session.run(query, source_id=source_node_id_value)
        
        updated_edges = []
        for record in result:
            updated_edges.append({
                'source_id': record['source.id'],
                'relationship_type': record['type(rel)'],
                'strength': record['rel.strength'],
                'direction': record['rel.direction'],
                'target_id': record['target.id']
            })
            
    driver.close()
    return updated_edges

In [23]:
NEO4J_URL = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

In [4]:
## Graph DB reset - NEO4J by apptainer 
## imageD/neo4j
passcode = 'itsokaytoasdfdelete'
if passcode == 'itsokaytodelete':
    driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASSWORD))
    with driver.session(database="neo4j") as session:  # Specify the database if not the default
        session.run("MATCH (n) DETACH DELETE n")
        print("Neo4j database reset successfully.")

Neo4j database reset successfully.


In [6]:
# Checking the database side
driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASSWORD))
with driver.session() as session:
    neo4j_nodes, neo4j_edges = session.execute_read(get_graph_data)

print(f'There are {len(neo4j_nodes)} nodes.')
print(f'There are {len(neo4j_edges)} edges.')

There are 0 nodes.
There are 0 edges.


In [7]:
graph = Neo4jGraph(
    url= NEO4J_URL,
    username=NEO4J_USER, #default
    password=NEO4J_PASSWORD #change accordingly
)

In [8]:
text1 = """
The transcription factor IL1B regulates the HP gene. 
The transcription factor IL1B regulates the IL1RN gene. 
The transcription factor IL1B regulates the IL6 gene. 
The transcription factor IL1B regulates the NOD2 gene. 
The transcription factor IL1B regulates the NFKB1 gene. 
The transcription factor IL1B regulates the PPARA gene. 
The transcription factor IL6 regulates the IL10 gene. 
The transcription factor IL6 regulates the CREBBP gene. 
The transcription factor IL6 regulates the PTEN gene. 
The transcription factor IL6 regulates the VEGFA gene. 
The transcription factor NFKB1 regulates the MYC gene. 
The transcription factor NFKB1 regulates the HIF1A gene. 
The transcription factor NFKB1 regulates the DDIT3 gene. 
"""

In [9]:
documents = [Document(page_content=text1)]

In [10]:
# Initialize the language model for text-to-graph conversion
llm = ChatOllama(model="phi4:14b", temperature=0, format="json")  
                    # gpt-oss:20b doesn't work. It talks too much.
# llm = ChatOllama(model="llama3.2", temperature=0, format="json")
llm_transformer_filtered = LLMGraphTransformer(llm=llm)

In [11]:
# Convert the text into graph documents
graph_documents = llm_transformer_filtered.convert_to_graph_documents(documents)

In [None]:
# for relationship in graph_documents[0].relationships:
#     print(relationship)

In [12]:
print('-------  nodes  -------')
graph_documents[0].nodes[0:5]

-------  nodes  -------


[Node(id='Il1B', type='Transcription factor', properties={}),
 Node(id='Hp', type='Gene', properties={}),
 Node(id='Il1Rn', type='Gene', properties={}),
 Node(id='Il6', type='Gene', properties={}),
 Node(id='Nod2', type='Gene', properties={})]

In [13]:
print('-------  edges  -------')
graph_documents[0].relationships[0:5]

-------  edges  -------


[Relationship(source=Node(id='Il1B', type='Transcription factor', properties={}), target=Node(id='Hp', type='Gene', properties={}), type='REGULATES', properties={}),
 Relationship(source=Node(id='Il1B', type='Transcription factor', properties={}), target=Node(id='Il1Rn', type='Gene', properties={}), type='REGULATES', properties={}),
 Relationship(source=Node(id='Il1B', type='Transcription factor', properties={}), target=Node(id='Il6', type='Gene', properties={}), type='REGULATES', properties={}),
 Relationship(source=Node(id='Il1B', type='Transcription factor', properties={}), target=Node(id='Nod2', type='Gene', properties={}), type='REGULATES', properties={}),
 Relationship(source=Node(id='Il1B', type='Transcription factor', properties={}), target=Node(id='Nfkb1', type='Gene', properties={}), type='REGULATES', properties={})]

In [14]:
# Add the generated graph into Neo4j
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [15]:
# Checking the database side
driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASSWORD))
with driver.session() as session:
    neo4j_nodes, neo4j_edges = session.execute_read(get_graph_data)

print(f'There are {len(neo4j_nodes)} nodes.')
print(f'There are {len(neo4j_edges)} edges.')

There are 15 nodes.
There are 27 edges.


In [16]:
## Second text to the DB
text2 = """
The transcription factor NFKB1 regulates the ICAM1 gene. 
The transcription factor NFKB1 regulates the IL6 gene. 
The transcription factor PTEN regulates the MT2A gene. 
The transcription factor PTEN regulates the MTOR gene.
"""

documents = [Document(page_content=text2)]

graph_documents = llm_transformer_filtered.convert_to_graph_documents(documents)

graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [17]:
# Checking the database side
driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASSWORD))
with driver.session() as session:
    neo4j_nodes, neo4j_edges = session.execute_read(get_graph_data)

print(f'There are {len(neo4j_nodes)} nodes.')
print(f'There are {len(neo4j_edges)} edges.')

There are 19 nodes.
There are 37 edges.


In [29]:
# Checking the database side
driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASSWORD))
with driver.session() as session:
    neo4j_nodes, neo4j_edges = session.execute_read(get_graph_data)

print(neo4j_nodes)

[{'id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34', 'labels': ['Document'], 'properties': {'text': '\nThe transcription factor IL1B regulates the HP gene. \nThe transcription factor IL1B regulates the IL1RN gene. \nThe transcription factor IL1B regulates the IL6 gene. \nThe transcription factor IL1B regulates the NOD2 gene. \nThe transcription factor IL1B regulates the NFKB1 gene. \nThe transcription factor IL1B regulates the PPARA gene. \nThe transcription factor IL6 regulates the IL10 gene. \nThe transcription factor IL6 regulates the CREBBP gene. \nThe transcription factor IL6 regulates the PTEN gene. \nThe transcription factor IL6 regulates the VEGFA gene. \nThe transcription factor NFKB1 regulates the MYC gene. \nThe transcription factor NFKB1 regulates the HIF1A gene. \nThe transcription factor NFKB1 regulates the DDIT3 gene. \n', 'id': 'c00ba19e6e99a7e47d958126ba265727'}}, {'id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:35', 'labels': ['Transcription factor', '__Entity__'], 'pro

In [30]:
updated_edges = set_random_strength_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Il1B')
print(updated_edges)


[{'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.9177109220703087, 'target_id': 'Nod2'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7676950814661141, 'target_id': 'Ppara'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7562856480483472, 'target_id': 'Hp'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7687594575575096, 'target_id': 'Nfkb1'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.8986636124245094, 'target_id': 'Il6'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7777951275856825, 'target_id': 'Il1Rn'}]


In [31]:
updated_edges = set_random_strength_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Nfkb1')
print(updated_edges)
updated_edges = set_random_strength_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Pten')
print(updated_edges)
updated_edges = set_random_strength_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Il6')
print(updated_edges)

[{'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.7903916725031803, 'target_id': 'Il6'}, {'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.9554545677431383, 'target_id': 'Icam1'}, {'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.9308901405210024, 'target_id': 'Hif1A'}, {'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.722346916416799, 'target_id': 'Ddit3'}, {'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.7170101280956744, 'target_id': 'Myc'}]
[{'source_id': 'Pten', 'relationship_type': 'REGULATES', 'strength': 0.8261261095546021, 'target_id': 'Mt2A'}, {'source_id': 'Pten', 'relationship_type': 'REGULATES', 'strength': 0.883770434228994, 'target_id': 'Mtor'}]
[{'source_id': 'Il6', 'relationship_type': 'REGULATES', 'strength': 0.9021094214246788, 'target_id': 'Crebbp'}, {'source_id': 'Il6', 'relationship_type': 'REGULATES', 'strength': 0.8633853230003733, 'target_id': 'Il10'}, {'s

In [33]:
updated_edges = set_random_direction_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Il1B')
print(updated_edges)

updated_edges = set_random_direction_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Nfkb1')
print(updated_edges)

updated_edges = set_random_direction_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Pten')
print(updated_edges)

updated_edges = set_random_direction_on_edges(uri=NEO4J_URL, user=NEO4J_USER, 
                                             password=NEO4J_PASSWORD, source_node_id_value='Il6')
print(updated_edges)

[{'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.9177109220703087, 'direction': 'down', 'target_id': 'Nod2'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7676950814661141, 'direction': 'up', 'target_id': 'Ppara'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7562856480483472, 'direction': 'up', 'target_id': 'Hp'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7687594575575096, 'direction': 'down', 'target_id': 'Nfkb1'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.8986636124245094, 'direction': 'down', 'target_id': 'Il6'}, {'source_id': 'Il1B', 'relationship_type': 'REGULATES', 'strength': 0.7777951275856825, 'direction': 'down', 'target_id': 'Il1Rn'}]
[{'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.7903916725031803, 'direction': 'up', 'target_id': 'Il6'}, {'source_id': 'Nfkb1', 'relationship_type': 'REGULATES', 'strength': 0.9554545677431383

In [34]:
# Checking the database side
driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USER, NEO4J_PASSWORD))
with driver.session() as session:
    neo4j_nodes, neo4j_edges = session.execute_read(get_graph_data)

In [35]:
neo4j_edges

[{'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34',
  'end_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:35',
  'type': 'MENTIONS',
  'properties': {}},
 {'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34',
  'end_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:36',
  'type': 'MENTIONS',
  'properties': {}},
 {'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34',
  'end_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:37',
  'type': 'MENTIONS',
  'properties': {}},
 {'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34',
  'end_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:38',
  'type': 'MENTIONS',
  'properties': {}},
 {'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34',
  'end_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:39',
  'type': 'MENTIONS',
  'properties': {}},
 {'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:34',
  'end_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c44e56a7:40',
  'type': 'MENTIONS',
  'properties': {}},
 {'start_id': '4:77d6b202-ff1a-4d0f-ae9f-df70c