In [1]:
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI
from dotenv import load_dotenv
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
import os

load_dotenv()

True

In [2]:
# llm = AzureOpenAI(
#     model=os.getenv("DEPLOYMENT_NAME"),
#     deployment_name=os.getenv("DEPLOYMENT_NAME"),
#     api_key=os.getenv("AZURE_OPENAI_API_KEY"),
#     azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
#     api_version=os.getenv("OPENAI_API_VERSION"),
#     temperature=0.0,
# )

llm = Gemini(
    model="models/gemini-1.5-flash-latest",
    temperature=0.0,
)

In [3]:
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    # deployment_name="my-custom-embedding",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("OPENAI_API_VERSION"),
)
embeddings = embed_model.get_text_embedding("Hello, world!")
len(embeddings)

1536

In [4]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [5]:
import nest_asyncio

nest_asyncio.apply()

In [6]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    "D:\projects\graphrag-poc\data\selection 1"
).load_data()

In [24]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Note: used to be `Neo4jPGStore`
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="12345678",
    url="bolt://localhost:7687",
    database="graph-rag-2",
)

ClientError: {code: Neo.ClientError.Database.DatabaseNotFound} {message: Database does not exist. Database name: 'graph-rag-2'.}

In [8]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(
    chunk_size=1024,
    chunk_overlap=20,
)
nodes = splitter.get_nodes_from_documents(documents)
len(nodes)
# create a list of lists each of 10 nodes
nodes_list = [nodes[i : i + 10] for i in range(0, len(nodes), 10)]
print(len(nodes_list))

7


In [9]:
from llama_index.core.indices.property_graph import DynamicLLMPathExtractor

dyn_llm_path_extractor = DynamicLLMPathExtractor(
    llm=llm,
    allowed_entity_props=["description"],
    allowed_relation_props=["description"],
)

In [10]:
from llama_index.core import PropertyGraphIndex
# from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

# index = PropertyGraphIndex(
#     nodes_list[0],
#     embed_model=embed_model,
#     kg_extractors=[
#         dyn_llm_path_extractor
#     ],
#     property_graph_store=graph_store,
#     show_progress=True,
# )

index = PropertyGraphIndex.from_existing(
    property_graph_store=graph_store,
    llm=llm,
    embed_model=embed_model,
    kg_extractors=[dyn_llm_path_extractor],
    show_progress=True,
)


In [46]:
from llama_index.core.indices.property_graph import VectorContextRetriever

vector_retriever = VectorContextRetriever(
    index.property_graph_store,
    # only needed when the graph store doesn't support vector queries
    # vector_store=index.vector_store,
    embed_model=embed_model,
    # include source chunk text with retrieved paths
    include_text=True,
    # the number of nodes to fetch
    similarity_top_k=10,
    # the depth of relations to follow after node retrieval
    path_depth=2,
    # can provide any other kwargs for the VectorStoreQuery class
)


In [25]:
from llama_index.core.response_synthesizers import TreeSummarize
tree_summarize = TreeSummarize(verbose=False)

In [35]:
query_engine = index.as_query_engine(
    include_text=True,
    # sub_retrievers=[vector_retriever],
    similarity_top_k=10,
    path_depth=2,
    response_synthesizer=tree_summarize,
)
response = query_engine.query(
    "Provide all the information available related to Viaweb. Answer in 400 words."
)
print(response)

Viaweb, founded by Robert Morris and another individual (age 29 and 30 respectively at the time of founding), was a startup that eventually became Anomaly.  The founders, despite being relatively older, maintained a frugal lifestyle similar to that of 23-year-olds, possessing minimal assets. This lack of financial burden proved advantageous, allowing for flexibility and a cost-conscious approach to business.  Their pricing strategy reflected this mindset; they offered services at a significantly lower price point than competitors, a daring move that ultimately contributed to their success.  This low-cost approach mirrored the strategy of Apple II, which also benefited from its affordability.

Viaweb successfully raised $2.5 million in funding from angel investors without ever agreeing to vesting. This was unusual for such a large funding amount, as vesting is typically the norm.  The founders' inexperience led to their initial aversion to vesting, a decision that ultimately proved bene

# Insert data in batches of 10 in property graph


In [18]:
index._insert_nodes(nodes_list[1])

Extracting and inferring knowledge graph from text: 100%|██████████| 10/10 [01:29<00:00,  8.95s/it]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.63s/it]
Generating embeddings: 100%|██████████| 17/17 [00:05<00:00,  3.13it/s]


[TextNode(id_='20cee9c2-b448-4e8e-adbd-e06d9dba14ed', embedding=[-0.0012225176906213164, -0.004941617604345083, 0.025237299501895905, -0.02307920902967453, -0.012013834901154041, 0.014144434593617916, -0.025443486869335175, -0.004707938525825739, -0.007388371042907238, -0.016192559152841568, 0.027230441570281982, 0.02883870154619217, 0.003257756121456623, 0.002979403594508767, 0.014350621961057186, 0.013807662762701511, 0.02881121076643467, -0.015381556935608387, 0.0014312820276245475, -0.020852388814091682, -0.026501914486289024, -0.012309369631111622, -0.015120387077331543, 0.0047629219479858875, -0.022501884028315544, 0.015890151262283325, 0.01737469993531704, -0.01634376309812069, 0.004171852022409439, 0.015202862210571766, 0.024687469005584717, 0.0072234212420880795, 0.010769839398562908, 0.012233767658472061, -0.03373220935463905, -0.016948578879237175, 0.005570488050580025, -0.0017714907880872488, 0.012433081865310669, 0.005254334304481745, 0.021306000649929047, 0.00780074531212

In [19]:
index.insert_nodes(nodes_list[2])

Extracting and inferring knowledge graph from text: 100%|██████████| 10/10 [01:37<00:00,  9.72s/it]
Generating embeddings: 100%|██████████| 1/1 [00:02<00:00,  2.41s/it]
Generating embeddings: 100%|██████████| 15/15 [00:07<00:00,  2.10it/s]


In [20]:
index.insert_nodes(nodes_list[3])

Extracting and inferring knowledge graph from text: 100%|██████████| 10/10 [01:19<00:00,  7.99s/it]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.66s/it]
Generating embeddings: 100%|██████████| 13/13 [00:05<00:00,  2.18it/s]


In [None]:
index.insert_nodes(nodes_list[4])
index.insert_nodes(nodes_list[5])
index.insert_nodes(nodes_list[6])