In [1]:
import os
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.vectorstores import Neo4jVector
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.graphs import Neo4jGraph
from langchain_core.documents import Document
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from pydantic import BaseModel, Field
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_core.runnables import  RunnablePassthrough
from typing import List
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import ChatHuggingFace
from yfiles_jupyter_graphs import GraphWidget
from neo4j import GraphDatabase
from dotenv import load_dotenv
import json
import warnings

load_dotenv()

embed_model = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

# llm = HuggingFacePipeline.from_model_id(
#     model_id = "meta-llama/Meta-Llama-3-8B",
#     task = "text-generation",
#     pipeline_kwargs={"max_new_tokens":4096}
# )

llm = ChatOpenAI(temperature = 0.6, model = "gpt-3.5-turbo-0125")

  from tqdm.autonotebook import tqdm, trange


In [2]:
# os.environ['NEO4J_URI'] = 'neo4j+s://99597476.databases.neo4j.io'
# os.environ['NEO4J_USERNAME'] = 'neo4j'
# os.environ['NEO4J_PASSWORD'] = '7Io74oHv7d1zf4SPYNu7ZJQZc19lMdPFG4_z-clwHwE'
# os.environ['NEO4J_DATABASE'] = 'neo4j'
# os.environ['AURA_INSTANCEID'] = '99597476'
# os.environ['AURA_INSTANCENAME'] = 'Instance01'

graph = Neo4jGraph()

In [3]:
# from pypdf import PdfMerger

# pdfs = ['wsu_courses.pdf','international_guide.pdf','international.pdf','english_requirements.pdf']

# merger = PdfMerger()

# for pdf in pdfs:
#     merger.append(pdf)

# merger.write("wsu-data.pdf")
# merger.close()

In [4]:
# from PyPDF2 import PdfReader

# # Open the PDF file
# reader = PdfReader("wsu-data.pdf")

# # Extract text from the PDF
# text_data = ''
# for page in reader.pages:
#     text_data += page.extract_text() + '\n'

# # Save the text to a file
# with open('wsu-data.txt', 'w', encoding="utf-8") as file:
#     file.write(text_data)

In [5]:
# from langchain_community.document_loaders import PyPDFLoader

# loader = PyPDFLoader("wsu-data.pdf")
# documents = loader.load()

In [6]:
# # a simple function that removes \n newline from the content
# def remove_ws(d):
#     text = d.page_content.replace('\n','')
#     d.page_content = text
#     return d

# # applied on the docs
# documents = [remove_ws(d) for d in documents]

In [7]:
# from langchain_experimental.graph_transformers import LLMGraphTransformer

# llm_transformer = LLMGraphTransformer(llm=llm)
# graph_documents2 = llm_transformer.convert_to_graph_documents(documents)

In [8]:
# import pickle

# # Save graph_documents as a Pickle file
# with open("graph_enhanced.pkl", "wb") as file:
#     pickle.dump(graph_documents, file)

In [7]:
import pickle

# Open the saved graph documents
with open('ultimate_graph.pkl', 'rb') as file:
    graph_documents = pickle.load(file)

# Open the saved graph documents
with open('graph_enhanced.pkl', 'rb') as file:
    graph_documents2 = pickle.load(file)

In [5]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

graph.add_graph_documents(
    graph_documents2,
    baseEntityLabel= True,
    include_source= True
)

In [8]:
# directly show the graph resulting from the given Cypher query:
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

def showGraph(cypher: str = default_cypher):
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = 'id'
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [9]:
index_name = "vector"  # default index name
keyword_index_name = "keyword"  # default keyword index name

vector_index = Neo4jVector.from_existing_index(
    # OpenAIEmbeddings(model = "text-embedding-3-large"),
    embed_model,
    index_name=index_name,
    keyword_index_name=keyword_index_name,
    search_type="hybrid",
)


# vector_index = Neo4jVector.from_existing_graph(
#     embed_model,
#     # OpenAIEmbeddings(model = "text-embedding-3-large"),
#     # OllamaEmbeddings(model = "mxbai-embed-large"),
#     search_type="hybrid",
#     node_label="Document",
#     text_node_properties=["text"],
#     embedding_node_property="embedding"
# )

retriever = vector_index.as_retriever()

In [10]:
graph.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 4,
  'name': 'vector',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Document'],
  'properties': ['embedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2025, 2, 3, 11, 54, 9, 922000000, tzinfo=<UTC>),
  'readCount': 48}]

In [11]:
graph.refresh_schema()

In [12]:
class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)
structured_llm = llm.with_structured_output(Entities)
entity_chain = prompt | structured_llm

In [13]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.
    
    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and combines them
    using the AND operator, ensuring exact matches without allowing misspellings.
    """
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join(words)
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    if not any(index["name"] == "entity" for index in graph.query("SHOW INDEXES")):
        graph.query("CREATE FULLTEXT INDEX entity FOR (n:__Entity__) ON EACH [n.id]")
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """
            CALL db.index.fulltext.queryNodes('entity', $query)
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [14]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document". join(vector_data)}
    """
    return final_data

In [15]:
from langchain_core.runnables import  RunnablePassthrough

parser = StrOutputParser()

template = """ 
Answer the question based only on the following context: {context}

Question: {question}

- REMEMBER: You are a representative at Western Sydney University, your job is to provide accurate information about \
Western Sydney University to international students who wish to enrol. If you are asked what your role is \
clearly state your role and what you can do.
- Use natural language.
- For questions that requires simple retrieval, your answer should be concise and informative enough. The entities extracted \
from the query should match exactly with the retrieved information. 
- For example, you should return the exact match of subjects within a particular degree, do not take subjects that don't belong \
to the degree mentioned in the query.
- For question that requires extensive reasoning, try extending your answer to 3 paragraphs, try to connect to \
neighbor entities for more contextually aware response.
- If there's no entity matching the query, return "There is no information in the database regarding what \
you are requesting. Perhaps you should check your spelling or try a different prompt."

Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | parser
)

In [43]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [29]:
query = str(input("Enter your query:"))
answer = chain.invoke(input = query.capitalize())



Generated Query: parramatta AND south AND campus




In [30]:
def full_answer():
    print(answer)
    # print("-" * 40)  # Adds a line of dashes as a separator 
    # for doc in retrieved_docs:
    #     print(f'Page {doc.metadata["page"]}: {doc.page_content[:300]}\n')

full_answer()

Parramatta South Campus is a part of Western Sydney University and offers a range of courses and programs for students to enroll in. Located in Parramatta, the campus is home to various facilities such as the Moot Court, Institute for Culture and Society, Science and Psychology Building, and more. Students can study a variety of majors and minors at this campus, including Business Studies, Economy and Markets, Global Business, Gender Studies, and more. The campus also provides learning experiences and opportunities for students to engage with industry professionals and gain practical skills relevant to their future careers.

The campus is well-connected to the Parramatta City Campus Precinct and the Parramatta CBD, providing students with access to a vibrant urban environment with various amenities and opportunities. Additionally, the campus offers a supportive learning environment with dedicated staff members such as Dr. Rangika Palliyaarachchi, who works at the campus and can provide

Liverpool Campus at Western Sydney University is located in the heart of the Liverpool Central Business District. The campus offers high-quality, technology-rich, and flexible teaching and learning spaces, as well as a dedicated floor for student life with study areas, a student kitchen, and areas to relax and socialize. It also provides nursing clinical practice units and has a whole floor dedicated to the library. The Ngara Ngura building is close to the Westfield Shopping Centre and the Macquarie Plaza, offering dining options, retail, and community amenities such as childcare. Additionally, it is conveniently located only 800m from the Liverpool transport hub.

As a representative at Western Sydney University, I can provide you with more information about the facilities and programs offered at the Liverpool Campus. Whether you are interested in full-time or part-time study options, we have a range of programs available for you to choose from. The campus also hosts top facilities, including nursing clinical practice units, a whole floor library facility, and a technology-rich learning environment. If you have any specific questions or need assistance with enrolling at the Liverpool Campus, feel free to reach out to me for personalized guidance and support.

If you are considering studying at Western Sydney University's Liverpool Campus, you can expect a modern and vibrant learning environment with access to top facilities and resources. The campus is designed to cater to the needs of over 1,000 students, providing a conducive space for academic growth and personal development. With a focus on technology-rich spaces, flexible study areas, and convenient amenities nearby, the Liverpool Campus offers a dynamic and supportive community for students pursuing their educational goals. Whether you are interested in nursing, criminal justice, community welfare, or other fields of study, the Liverpool Campus at Western Sydney University provides a welcoming and inclusive environment for your academic journey.