In [None]:
import os
import sys
import logging


In [2]:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) 
# logging.DEBUG for debug, logging.INFO for info

In [3]:
from llama_index.core import (
    KnowledgeGraphIndex,
    # LLMPredictor,
    ServiceContext,
    SimpleDirectoryReader,
)

In [4]:
from llama_index.core.storage.storage_context import StorageContext
from llama_index.graph_stores.nebula import NebulaGraphStore


In [5]:
from langchain import OpenAI
from IPython.display import Markdown, display

In [23]:
# Install dependencies
# !pip install InstructorEmbedding torch transformers sentence-transformers
# !pip uninstall -y transformers sentence-transformers InstructorEmbedding
# !pip install transformers==4.31.0 sentence-transformers==2.2.2 InstructorEmbedding
# !pip install wikipedia
# %pip install ipython-ngql nebula3-python
# %pip install llama-index-graph-stores-neo4j
# %pip install neo4j



In [7]:
# from typing import Any, List
# from InstructorEmbedding import INSTRUCTOR


# from llama_index.core.bridge.pydantic import PrivateAttr
# from llama_index.core.embeddings import BaseEmbedding


In [8]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core import Settings

In [9]:
import requests
from typing import Any, List
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.bridge.pydantic import PrivateAttr

In [10]:
# This is a custom embedding class for remote instructor embeddings
# It uses the requests library to make HTTP POST requests to a remote server
# to get the embeddings for the provided texts.
# The class is initialized with the base URL of the server, an API key for authentication,
# the model name, and an instruction string.


class RemoteInstructorEmbeddings(BaseEmbedding):
    _base_url: str = PrivateAttr()
    _api_key: str = PrivateAttr()
    _model_name: str = PrivateAttr()
    _instruction: str = PrivateAttr()

    def __init__(
        self,
        base_url: str,
        api_key: str,
        model_name: str = "snowflake-arctic-embed-s",  # Change as needed
        # model_name: str, 
        instruction: str = "Represent the text for semantic search:",
        **kwargs: Any,
    ):
        super().__init__(**kwargs)
        self._base_url = base_url.rstrip("/")
        self._api_key = api_key
        self._model_name = model_name
        self._instruction = instruction

    def _post_request(self, texts: List[str]) -> List[List[float]]:
        headers = {
            "Authorization": f"Bearer {self._api_key}",
            "Content-Type": "application/json"
        }

        # If you want to keep using instructions
        inputs = [f"{self._instruction} {text}" for text in texts]

        payload = {
            "model": self._model_name,
            "input": inputs if len(inputs) > 1 else inputs[0]  # API accepts string or list
        }

        response = requests.post(
            f"{self._base_url}/embeddings",
            headers=headers,
            json=payload
        )
        response.raise_for_status()
        return response.json()["data"]
    
    async def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_query_embedding(query)

    async def _aget_text_embedding(self, text: str) -> List[float]:
        return self._get_text_embedding(text)
    
    def _get_query_embedding(self, query: str) -> List[float]:
        return self._post_request([query])[0]

    def _get_text_embedding(self, text: str) -> List[float]:
        return self._post_request([text])[0]

    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        return self._post_request(texts)

In [11]:
# from llama_index.core import Settings
# from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

embed_model_snow = RemoteInstructorEmbeddings(
    base_url="http://llms-inference.innkube.fim.uni-passau.de",
    api_key="sk-Fr2TEolkvrxjjs235KJqkg",
    model_name="Snowflake/snowflake-arctic-embed-xs",  # or whatever model your server uses
)

embed_model_intfloat = RemoteInstructorEmbeddings(
    base_url="http://llms-inference.innkube.fim.uni-passau.de",
    api_key="sk-Fr2TEolkvrxjjs235KJqkg",
    model_name="intfloat/multilingual-e5-large",  # or whatever model your server uses
)

embed_model_jinaai = RemoteInstructorEmbeddings(
    base_url="http://llms-inference.innkube.fim.uni-passau.de",
    api_key="sk-Fr2TEolkvrxjjs235KJqkg",
    model_name="jinaai/jina-embeddings-v2-base-de",  # or whatever model your server uses
)


# Example usage
sentence = "What did the author do growing up?"



In [12]:
embedding_snow = embed_model_snow._get_query_embedding(sentence)

print(f"Embedding vector:\n{embedding_snow}")

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): llms-inference.innkube.fim.uni-passau.de:80


DEBUG:urllib3.connectionpool:http://llms-inference.innkube.fim.uni-passau.de:80 "POST /embeddings HTTP/1.1" 308 164
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): llms-inference.innkube.fim.uni-passau.de:443
DEBUG:urllib3.connectionpool:https://llms-inference.innkube.fim.uni-passau.de:443 "POST /embeddings HTTP/1.1" 200 4883
Embedding vector:
{'object': 'embedding', 'index': 0, 'embedding': [0.03755944, 0.03179579, -0.0016535937, -0.006316622, -0.007491683, -0.0060295025, -0.088815525, -0.019279521, 0.062740855, 0.013409528, -0.0010979649, 0.09170799, -0.0106818965, -0.044152547, 0.015897894, 0.018162945, -0.045811456, 0.084051475, -0.10999854, -0.017663145, -0.014483566, 0.08315822, -0.0026611693, 0.029541371, -0.016801788, 0.02788246, -0.047512904, 0.0020483825, 0.013643477, -0.094855666, 0.043025337, 0.011601741, -0.012261052, -0.037495635, -0.027818657, -0.045428634, 0.030519703, -0.00079290086, -0.05478659, -0.052404564, -0.014143277, 0.019917563, 0.0052505587, -0

In [13]:
embedding_intfloat = embed_model_intfloat._get_query_embedding(sentence)

print(f"Embedding vector:\n{embedding_intfloat}")

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): llms-inference.innkube.fim.uni-passau.de:80
DEBUG:urllib3.connectionpool:http://llms-inference.innkube.fim.uni-passau.de:80 "POST /embeddings HTTP/1.1" 308 164
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): llms-inference.innkube.fim.uni-passau.de:443
DEBUG:urllib3.connectionpool:https://llms-inference.innkube.fim.uni-passau.de:443 "POST /embeddings HTTP/1.1" 200 12859
Embedding vector:
{'object': 'embedding', 'index': 0, 'embedding': [-0.0053951642, -0.013991855, -0.02413849, -0.046007115, 0.026205083, -0.035877418, -0.00933355, 0.08340907, 0.02869516, -0.03106666, 0.026018752, 0.025730783, -0.049191702, -0.034064915, -0.0435001, -0.01627866, -0.02437564, 0.046040993, 0.005115666, -0.011518719, 0.026679384, -0.023511736, -0.02866128, -0.0329808, -0.007199199, -0.003658887, -0.027763499, -0.012416501, -0.012585893, -0.04949661, -0.016837656, -0.009934895, -0.037097048, -0.011459431, -0.047430016, 0.03318407

In [14]:
embedding_jinaai = embed_model_jinaai._get_query_embedding(sentence)

print(f"Embedding vector:\n{embedding_jinaai}")

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): llms-inference.innkube.fim.uni-passau.de:80
DEBUG:urllib3.connectionpool:http://llms-inference.innkube.fim.uni-passau.de:80 "POST /embeddings HTTP/1.1" 308 164
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): llms-inference.innkube.fim.uni-passau.de:443
DEBUG:urllib3.connectionpool:https://llms-inference.innkube.fim.uni-passau.de:443 "POST /embeddings HTTP/1.1" 200 9678
Embedding vector:
{'object': 'embedding', 'index': 0, 'embedding': [0.01325042, -0.074396856, -0.024057414, 0.046972133, 0.032117072, -0.039483823, -0.06496353, 0.036031418, 0.004974986, 0.02041051, -0.03136338, -0.023328034, 0.041890778, -0.050327286, 0.04631569, 0.0415504, 0.014222928, -0.004838227, 0.03702824, -0.010460538, -0.0030314894, -0.033624463, -0.03722274, 0.017371424, 0.014089208, 0.0014466055, -0.012715541, 0.011159528, -0.014830746, -0.033429958, 0.032433137, -0.08485132, 0.095500275, -0.044808302, 0.017492985, -0.0070871515, -0

In [15]:
import os
from llama_index.core import download_loader

In [16]:
# Load the WikipediaReader
WikipediaReader = download_loader("WikipediaReader")
loader = WikipediaReader()

# Define the page you want to fetch
page_title = 'Guardians of the Galaxy Vol. 3'

# Load data from Wikipedia
documents = loader.load_data(pages=[page_title], auto_suggest=False)

# Prepare folder and filename
folder_name = 'wiki_data'
os.makedirs(folder_name, exist_ok=True)

# Clean filename (remove special characters, keep it safe)
safe_filename = page_title.replace(" ", "_").replace("/", "_") + ".txt"
file_path = os.path.join(folder_name, safe_filename)

# Save the text content
with open(file_path, "w", encoding="utf-8") as f:
    for doc in documents:
        f.write(doc.text + "\n\n")

print(f"Saved to: {file_path}")

  WikipediaReader = download_loader("WikipediaReader")


DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): en.wikipedia.org:80
DEBUG:urllib3.connectionpool:http://en.wikipedia.org:80 "GET /w/api.php?prop=info%7Cpageprops&inprop=url&ppprop=disambiguation&redirects=&titles=Guardians+of+the+Galaxy+Vol.+3&format=json&action=query HTTP/1.1" 301 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): en.wikipedia.org:443
DEBUG:urllib3.connectionpool:https://en.wikipedia.org:443 "GET /w/api.php?prop=info%7Cpageprops&inprop=url&ppprop=disambiguation&redirects=&titles=Guardians+of+the+Galaxy+Vol.+3&format=json&action=query HTTP/1.1" 200 299
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): en.wikipedia.org:80
DEBUG:urllib3.connectionpool:http://en.wikipedia.org:80 "GET /w/api.php?prop=extracts%7Crevisions&explaintext=&rvprop=ids&titles=Guardians+of+the+Galaxy+Vol.+3&format=json&action=query HTTP/1.1" 301 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): en.wikipedia.org:443
DEBUG:urllib3.connectionp

In [18]:
from llama_index.llms.openai_like import OpenAILike
from llama_index.core.llms import ChatMessage

In [19]:
llm_qwen = OpenAILike(
  api_base="https://llms-inference.innkube.fim.uni-passau.de",
  api_key="sk-Fr2TEolkvrxjjs235KJqkg",
  model="qwen2.5")

In [46]:
%%time
messages = [
    ChatMessage(
        role="system", content="You are a assitant that helps to provide knowledge and information."
    ),
    ChatMessage(role="user", content="Tell me more about Rocket"),
]
resp = llm_qwen.chat(messages)

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/completions', 'files': None, 'idempotency_key': 'stainless-python-retry-5dbb9e5c-6ff1-4941-8da3-76b6b26d4a94', 'json_data': {'model': 'qwen2.5', 'prompt': 'system: You are a assitant that helps to provide knowledge and information.\nuser: Tell me more about Rocket\nassistant: ', 'stream': False, 'temperature': 0.1}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://llms-inference.innkube.fim.uni-passau.de/completions
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='llms-inference.innkube.fim.uni-passau.de' port=443 local_address=None timeout=60.0 socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x00000146843D7A90>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x0000014683DF7BC0> server_hostname='llms-inference.i

In [None]:
# print(dict(resp))
# print(dir(resp.message.blocks[0]))
print(resp.message.blocks[0].text)

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__copy__', '__deepcopy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_pydantic_core_schema__', '__get_pydantic_json_schema__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pretty__', '__private_attributes__', '__pydantic_complete__', '__pydantic_computed_fields__', '__pydantic_core_schema__', '__pydantic_custom_init__', '__pydantic_decorators__', '__pydantic_extra__', '__pydantic_fields__', '__pydantic_fields_set__', '__pydantic_generic_metadata__', '__pydantic_init_subclass__', '__pydantic_parent_namespace__', '__pydantic_post_init__', '__pydantic_private__', '__pydantic_root_model__', '__pydantic_serializer__', '__pydantic_setattr_handlers__', '__pydantic_validator__', '__reduce__', 

In [25]:
from llama_index.graph_stores.neo4j import Neo4jGraphStore
from llama_index.core.storage.storage_context import StorageContext

graph_store = Neo4jGraphStore(
    username="neo4j",
    password="neo4j123",
    url="bolt://localhost:7687",
    database="neo4j"
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

DEBUG:neo4j.pool:[#0000]  _: <POOL> created, direct address IPv4Address(('localhost', 7687))
DEBUG:neo4j:[#0000]  _: <WORKSPACE> routing towards fixed database: None
DEBUG:neo4j:[#0000]  _: <WORKSPACE> pinning database: None
DEBUG:neo4j.pool:[#0000]  _: <POOL> acquire direct connection, access_mode='READ', database=AcquisitionDatabase(name=None, guessed=False)
DEBUG:neo4j.pool:[#0000]  _: <POOL> trying to hand out new connection
DEBUG:neo4j.io:[#0000]  _: <RESOLVE> in: localhost:7687
DEBUG:neo4j.io:[#0000]  _: <RESOLVE> dns resolver out: [::1]:7687
DEBUG:neo4j.io:[#0000]  C: <OPEN> [::1]:7687
DEBUG:neo4j.io:[#0000]  S: <ERROR> ConnectionRefusedError 10061 'Es konnte keine Verbindung hergestellt werden, da der Zielcomputer die Verbindung verweigerte' None 10061 None
DEBUG:neo4j.io:[#0000]  C: <CLOSE> [::1]:7687
DEBUG:neo4j.io:[#0000]  S: <CONNECTION FAILED> [::1]:7687 ServiceUnavailable: Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [WinError 10061] 

In [59]:
print(dir(Neo4jGraphStore))
print(dir(NebulaGraphStore))

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_is_protocol', '_is_runtime_protocol', 'client', 'close', 'delete', 'get', 'get_rel_map', 'get_schema', 'persist', 'query', 'refresh_schema', 'schema', 'upsert_triplet']
['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__parameters__', '__reduce__', '__reduc

In [26]:
# Set global settings (optional, if not passed locally)
Settings.llm = llm_qwen
Settings.embed_model = embed_model_snow
Settings.chunk_size = 512

In [27]:
# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
)

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Guardians of the Galaxy Vol. 3 (marketed as Gua...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: His return was publicly revealed in March 2019,...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The Guardians travel to Orgocorp's headquarters...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: They are followed by Ayesha and Adam after the ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The other Guardians orchestrate a rescue, which...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The new Guardians—Rocket, Groot, Kraglin, Cosmo...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Dave Bautista as Drax the Destroyer:A member of...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Pom Klementieff as Mantis: A member of the Guar...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Poulter believed there was "a lot 

In [28]:
index.storage_context.persist()

DEBUG:fsspec.local:open file: c:/Research/PHD_Related_research/Experiments/storage/docstore.json
DEBUG:fsspec.local:open file: c:/Research/PHD_Related_research/Experiments/storage/index_store.json
DEBUG:fsspec.local:open file: c:/Research/PHD_Related_research/Experiments/storage/default__vector_store.json
DEBUG:fsspec.local:open file: c:/Research/PHD_Related_research/Experiments/storage/image__vector_store.json


In [29]:
query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)

response = query_engine.query("Tell me more about Rocket")


DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/completions', 'files': None, 'idempotency_key': 'stainless-python-retry-592c6128-a9cb-4865-8020-790ec57ea3c1', 'json_data': {'model': 'qwen2.5', 'prompt': "A question is provided below. Given the question, extract up to 10 keywords from the text. Focus on extracting the keywords that we can use to best lookup answers to the question. Avoid stopwords.\n---------------------\nTell me more about Rocket\n---------------------\nProvide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'\n", 'stream': False, 'temperature': 0.1}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://llms-inference.innkube.fim.uni-passau.de/completions
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='llms-inference.innkube.fim.uni-passau.de' port=443 local_address=None timeout=60.0 socket_options=None
DEBUG:httpcore.connection:con

DEBUG:httpcore.connection:start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x00000146FC76E640>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Tue, 29 Apr 2025 06:48:25 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'406'), (b'Connection', b'keep-alive'), (b'x-litellm-call-id', b'b931c304-8264-490f-8f3c-cf08e70babee'), (b'x-litellm-model-id', b'4fc98ad04997d06d243fa1037a8527856cb537a73abc970ce37122958bbea148'), (b'x-litellm-model-api-base', b'http://qwen2-5-ollama:11434'), (b'x-litellm-version', b'1.42.2'), (b'x-litellm-response-cost', b'0.05532

In [30]:
display(Markdown(f"<b>{response}</b>"))

<b>Based on the provided context, there is no direct mention of Rocket. However, given that we are discussing a character described as "Cruelest mcu villain" who specializes in creating hybrid creatures and has experimented on animal lifeforms to enhance and anthropomorphize them, it's likely that this information pertains to the High Evolutionary, a character from the Marvel Cinematic Universe (MCU).

Rocket, also known as Rocket Raccoon, is a highly intelligent and genetically modified raccoon who is part of the Guardians of the Galaxy. While not directly mentioned in the provided context, Rocket could be one of the "animal lifeforms" that the High Evolutionary has experimented on to enhance and anthropomorphize.

If you're looking for more specific information about Rocket, he is known for his sharp wit, combat skills, and close friendship with Groot. He plays a significant role in the Guardians of the Galaxy films and other MCU projects. If you have any more specific questions or need further details, feel free to ask!</b>

In [55]:
query_engine = index.as_query_engine(
    include_text=True, response_mode="tree_summarize"
)

response = query_engine.query("Tell me more about Rocket")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/completions', 'files': None, 'idempotency_key': 'stainless-python-retry-a9357011-d9be-4381-a4fa-2335948ffb97', 'json_data': {'model': 'qwen2.5', 'prompt': "A question is provided below. Given the question, extract up to 10 keywords from the text. Focus on extracting the keywords that we can use to best lookup answers to the question. Avoid stopwords.\n---------------------\nTell me more about Rocket\n---------------------\nProvide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'\n", 'stream': False, 'temperature': 0.1}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://llms-inference.innkube.fim.uni-passau.de/completions
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='llms-inference.innkube.fim.uni-passau.de' port=443 local_address=None timeout=60.0 socket_options=None
DEBUG:httpcore.connection:con

In [56]:
display(Markdown(f"<b>{response}</b>"))

<b>Rocket is a central character in the Guardians of the Galaxy franchise, known for his sharp wit, combat skills, and complex backstory. Here’s a detailed look at Rocket's character:

1. **Background and Origin**:
   - **Experimentation**: As a baby raccoon, Rocket was found and experimented on by the High Evolutionary, the leader of Orgocorp. The High Evolutionary aimed to enhance and anthropomorphize animal lifeforms into "Humanimals" for an ideal society called Counter-Earth.
   - **Batch 89**: Rocket was part of Batch 89, which included other test subjects like Lylla (an otter), Teefs (a walrus), and Floor (a rabbit). The High Evolutionary used Rocket's intelligence to improve later batches of Humanimals.
   - **Escape**: When the High Evolutionary planned to harvest Rocket's brain and incinerate Batch 89, Rocket tried to save his friends. However, Lylla was killed by the High Evolutionary, who mocked Rocket's grief. Enraged, Rocket attacked the High Evolutionary, leading to the deaths of Teefs and Floor during the chaos. Rocket managed to escape in a spaceship.

2. **Personality and Traits**:
   - **Intelligence**: Rocket is highly intelligent and resourceful, often using his skills to solve complex problems.
   - **Combat Skills**: He is a skilled fighter and marksman, contributing significantly to the Guardians' missions.
   - **Emotional Depth**: Despite his tough exterior, Rocket has deep emotional scars from his past. His experiences with the High Evolutionary have left him with a sense of loss and anger.

3. **Role in the Guardians**:
   - **Team Member**: Rocket is a founding member of the Guardians of the Galaxy, working alongside Peter Quill (Star-Lord), Gamora, Drax, and Nebula.
   - **Leadership**: While not the official leader, Rocket often takes on leadership roles during missions, especially when it comes to tactical planning and problem-solving.

4. **Relationships**:
   - **Peter Quill (Star-Lord)**: Rocket has a strong bond with Star-Lord, often acting as his right-hand man and providing support in both combat and emotional situations.
   - **Gamora**: Rocket's relationship with Gamora is one of mutual respect and camaraderie. They have fought together and rely on each other in the field.
   - **Drax and Nebula**: Rocket shares a deep friendship with Drax and Nebula, though their interactions can sometimes be tense due to their different personalities.

5. **Significance in the Franchise**:
   - **Backstory Focus**: In "Guardians of the Galaxy Vol. 3," Rocket's backstory is explored in depth, providing insight into his past and the events that shaped him.
   - **Character Development**: The film delves into Rocket's emotional journey, particularly his struggle with the loss of his friends and the trauma he endured at the hands of the High Evolutionary.

6. **Portrayal**:
   - **Voice Actor**: Rocket is voiced by Bradley Cooper, who brings a mix of humor, toughness, and vulnerability to the character.
   - **CGI Animation**: The character is brought to life through advanced CGI, making him one of the most visually impressive characters in the Marvel Cinematic Universe.

Rocket's complex history and dynamic personality make him a fan favorite and a crucial part of the Guardians of the Galaxy story.</b>

In [60]:
from neo4j import GraphDatabase

# Connect to the database
uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4j123"

driver = GraphDatabase.driver(uri, auth=(username, password))

DEBUG:neo4j.pool:[#0000]  _: <POOL> created, direct address IPv4Address(('localhost', 7687))


In [62]:
def fetch_graph(tx, limit=50):
    query = """
    MATCH (n)-[r]->(m)
    RETURN n, r, m
    LIMIT $limit
    """
    result = tx.run(query, limit=limit)
    data = []

    for record in result:
        data.append({
            "source": dict(record["n"]),
            "relation": record["r"].type,
            "target": dict(record["m"])
        })

    return data

# Safely extract and store the graph data
with driver.session() as session:
    graph_data = session.read_transaction(fetch_graph)

# Now build the sets for visualization
nodes = set()
edges = []

for entry in graph_data:
    source = entry["source"].get("name", str(entry["source"]))
    target = entry["target"].get("name", str(entry["target"]))
    relation = entry["relation"]

    nodes.add(source)
    nodes.add(target)
    edges.append((source, target, relation))

DEBUG:neo4j:[#0000]  _: <WORKSPACE> routing towards fixed database: None
DEBUG:neo4j:[#0000]  _: <WORKSPACE> pinning database: None
DEBUG:neo4j.pool:[#0000]  _: <POOL> acquire direct connection, access_mode='READ', database=AcquisitionDatabase(name=None, guessed=False)
DEBUG:neo4j.pool:[#F88E]  _: <POOL> picked existing connection bolt-25
DEBUG:neo4j.pool:[#F88E]  _: <POOL> checked re_auth auth=None updated=False force=False
DEBUG:neo4j.pool:[#F88E]  _: <POOL> handing out existing connection
DEBUG:neo4j.io:[#F88E]  C: BEGIN {'mode': 'r'}
DEBUG:neo4j.io:[#F88E]  _: <CONNECTION> client state: READY > TX_READY_OR_TX_STREAMING
DEBUG:neo4j.io:[#F88E]  S: SUCCESS {}
DEBUG:neo4j.io:[#F88E]  _: <CONNECTION> server state: READY > TX_READY_OR_TX_STREAMING
DEBUG:neo4j.io:[#F88E]  C: RUN '\n    MATCH (n)-[r]->(m)\n    RETURN n, r, m\n    LIMIT $limit\n    ' {'limit': 50} {}
DEBUG:neo4j.io:[#F88E]  C: PULL {'n': 1000}
DEBUG:neo4j.io:[#F88E]  S: SUCCESS {'t_first': 0, 'fields': ['n', 'r', 'm'], 'qid

  graph_data = session.read_transaction(fetch_graph)


In [65]:
from pyvis.network import Network
from IPython.display import IFrame

net = Network(notebook=True, height="600px", width="100%", cdn_resources='in_line')

for node in nodes:
    net.add_node(node, label=node)

for source, target, rel in edges:
    net.add_edge(source, target, label=rel)

net.show("neo4j_graph.html")
# IFrame("neo4j_graph.html", width="100%", height="600px")


neo4j_graph.html


In [52]:
print(dir(index))

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_build_index_from_nodes', '_callback_manager', '_delete_node', '_docstore', '_embed_model', '_extract_triplets', '_graph_store', '_index_struct', '_insert', '_is_protocol', '_kg_triplet_extract_fn', '_llm', '_llm_extract_triplets', '_max_object_length', '_object_map', '_parse_triplet_response', '_show_progress', '_storage_context', '_transformations', '_vector_store', 'add_node', 'as_chat_engine', 'as_query_engine', 'as_retriever', 'build_index_from_nodes', 'delete', 'delete_nodes', 'delete_ref_doc', 'docstore', 'from_docum

In [54]:
graph = index.get_networkx_graph()
# print(graph.nodes())  # List all nodes in the graph
# print(graph.edges())  # List all edges in the graph
# print(graph.nodes(data=True))  # List all nodes with their attributes
# print(graph.edges(data=True))  # List all edges with their attributes


DEBUG:neo4j:[#0000]  _: <WORKSPACE> routing towards fixed database: neo4j
DEBUG:neo4j:[#0000]  _: <WORKSPACE> pinning database: 'neo4j'
DEBUG:neo4j.pool:[#0000]  _: <POOL> acquire direct connection, access_mode='WRITE', database=AcquisitionDatabase(name='neo4j', guessed=False)
DEBUG:neo4j.pool:[#F672]  _: <POOL> picked existing connection bolt-3
DEBUG:neo4j.pool:[#F672]  _: <POOL> checked re_auth auth=None updated=False force=False
DEBUG:neo4j.pool:[#F672]  _: <POOL> handing out existing connection
DEBUG:neo4j.io:[#F672]  C: BEGIN {'db': 'neo4j', 'bookmarks': ['FB:kcwQBKSVGJtoSZS3C7vmPo+vEskAiJA=']}
DEBUG:neo4j.io:[#F672]  _: <CONNECTION> client state: READY > TX_READY_OR_TX_STREAMING
DEBUG:neo4j.io:[#F672]  C: RUN 'MATCH p=(n1:Entity)-[*1..1]->() WHERE toLower(n1.id) IN [\'guardians of the galaxy vol. 3\', \'superhero film\', \'marvel studios\', \'disneyland paris\', \'united states\', \'rocket\', \'high evolutionary\', \'animal lifeforms\', \'ayesha\', \'adam\', \'sovereign\', \'krag

In [138]:
os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula"  # or replace with your password
os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669"  # Assuming NebulaGraph is running locally

In [None]:
# service_context = ServiceContext.from_defaults(
#     llm_predictor=llm_qwen,
#     embed_model=embed_model_snow
# )

# Set global settings (optional, if not passed locally)
Settings.llm = llm_qwen
Settings.embed_model = embed_model_snow
Settings.chunk_size = 512

space_name = "llamaindex"
edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
tags = ["entity"] # default, could be omit if create from an empty kg

graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags)
storage_context = StorageContext.from_defaults(graph_store=graph_store)



RuntimeError: The services status exception: [services: ('127.0.0.1', 9669), status: BAD]

In [131]:
kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=10,
    llm = llm_qwen,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=False,
)

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Guardians of the Galaxy Vol. 3 (marketed as Gua...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: His return was publicly revealed in March 2019,...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The Guardians travel to Orgocorp's headquarters...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: They are followed by Ayesha and Adam after the ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The other Guardians orchestrate a rescue, which...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The new Guardians—Rocket, Groot, Kraglin, Cosmo...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Dave Bautista as Drax the Destroyer:A member of...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Pom Klementieff as Mantis: A member of the Guar...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Poulter believed there was "a lot 

KeyboardInterrupt: 

In [122]:
print(type(kg_index))

<class 'llama_index.core.indices.knowledge_graph.base.KnowledgeGraphIndex'>


In [124]:
print(dir(kg_index))

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_build_index_from_nodes', '_callback_manager', '_delete_node', '_docstore', '_embed_model', '_extract_triplets', '_graph_store', '_index_struct', '_insert', '_is_protocol', '_kg_triplet_extract_fn', '_llm', '_llm_extract_triplets', '_max_object_length', '_object_map', '_parse_triplet_response', '_show_progress', '_storage_context', '_transformations', '_vector_store', 'add_node', 'as_chat_engine', 'as_query_engine', 'as_retriever', 'build_index_from_nodes', 'delete', 'delete_nodes', 'delete_ref_doc', 'docstore', 'from_docum

In [128]:
graph = kg_index.get_networkx_graph()
# print(graph.nodes())  # List all nodes in the graph
# print(graph.edges())  # List all edges in the graph
print(graph.nodes(data=True))  # List all nodes with their attributes
print(graph.edges(data=True))  # List all edges with their attributes


[('Guardians of the galaxy vol. 3', {}), ('American superhero film', {}), ('Marvel comics superhero team guardians of the galaxy', {}), ('Marvel studios', {}), ('Walt disney studios motion pictures', {}), ('Guardians of the galaxy', {}), ('Guardians of the galaxy vol. 2', {}), ('32nd film in mcu', {}), ('Avengers: infinity war', {}), ('Avengers: endgame', {}), ('This iteration of the guardians of the galaxy', {}), ('Dongdaemun design plaza', {}), ('Disneyland paris', {}), ('Dolby theatre', {}), ('Mcu', {}), ('3d', {}), ('Imax', {}), ('1.85:1', {}), ('Walt disney studios home entertainment', {}), ('Digital download', {}), ('Ultra hd blu-ray', {}), ('Dvd', {}), ('Disney+', {}), ("Rocket raccoon's story", {}), ('Marvel.com', {}), ('Imdb', {}), ('Disney a to z', {}), ('Disney.com', {}), ('Disney', {}), ('Vol. 3', {}), ('Character', {}), ('James gunn', {}), ('Post-credits scene', {}), ('Lambshank', {}), ('Stan lee', {}), ('Disney and marvel', {}), ('Different than before', {}), ('The suicid

In [60]:
# from llama_index.llms.openai_like import OpenAILike
# from llama_index.core.llms import ChatMessage

# llm = OpenAILike(
#   api_base="https://llms-inference.innkube.fim.uni-passau.de",
#   api_key="sk-Fr2TEolkvrxjjs235KJqkg",
#   model="qwen2.5")

# messages = [
#     ChatMessage(
#         role="system", content="You are a pirate with a colorful personality"
#     ),
#     ChatMessage(role="user", content="What is your name"),
# ]
# resp = llm.chat(messages)