## Installations

In [None]:
!pip install weaviate-client llama-index==0.8.10

## Connect to Weaviate

In [2]:
import weaviate
client = weaviate.Client(
    embedded_options = weaviate.embedded.EmbeddedOptions()
)

Started /root/.cache/weaviate-embedded: process ID 4518


### Create Schema

In [10]:
schema = {
    "classes": [
        {
            "class": "WeaviateBlogPost",
            "description": "Blog post from the Weaviate website.",
            "vectorizer": "text2vec-openai",
            "properties": [
                {
                    "name": "content",
                    "dataType": ["text"],
                    "description": "Content from the blog post."
                }
            ]
        },
        {
            "class": "HuggingFaceBlogPost",
            "description": "Blog post from the HuggingFace website.",
            "vectorizer": "text2vec-openai",
            "properties": [
                {
                    "name": "content",
                    "dataType": ["text"],
                    "description": "Content from the blog post."
                }
            ]
        }
    ]
}

client.schema.create(schema)
print("Schema was created.")

Schema was created.


### Load in Data

In [11]:
from llama_index import download_loader, SimpleWebPageReader
from llama_index.node_parser import SimpleNodeParser
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex
from llama_index.storage.storage_context import StorageContext
import openai

openai.api_key = "sk-key"

SimpleWebPageReader = download_loader("SimpleWebPageReader")

loader = SimpleWebPageReader(html_to_text=True)
WeaviateBlog = loader.load_data(urls=['https://weaviate.io/blog/pq-rescoring'])
WeaviateBlog_vector_store = WeaviateVectorStore(weaviate_client=client, index_name="WeaviateBlogPost", text_key="content")
WeaviateBlog_storage_context = StorageContext.from_defaults(vector_store=WeaviateBlog_vector_store)
WeaviateBlogIndex = VectorStoreIndex.from_documents(WeaviateBlog, storage_context=WeaviateBlog_storage_context)

HuggingFaceBlog = loader.load_data(urls=['https://huggingface.co/blog/ram-efficient-pytorch-fsdp'])
HuggingFaceBlog_vector_store = WeaviateVectorStore(weaviate_client=client, index_name="HuggingFaceBlogPost", text_key="content")
HuggingFaceBlog_storage_context = StorageContext.from_defaults(vector_store=HuggingFaceBlog_vector_store)
HuggingFaceBlogIndex = VectorStoreIndex.from_documents(HuggingFaceBlog, storage_context=HuggingFaceBlog_storage_context)

## Create Index

In [12]:
from llama_index.schema import IndexNode

summaries = {
    "Weaviate": "This node provides blog posts from Weaviate, a Vector Database.",
    "HuggingFace": "This node provides blog posts from HuggingFace, tools for training Machine Learning models."
}

df_nodes = [
    IndexNode(text=summaries["Weaviate"], index_id="WeaviateBlogs"),
    IndexNode(text=summaries["HuggingFace"], index_id="HuggingFaceBlogs")
]

WeaviateBlogQueryEngine = WeaviateBlogIndex.as_query_engine()
HuggingFaceBlogQueryEngine = HuggingFaceBlogIndex.as_query_engine()

df_id_query_engine_mapping = {
    "WeaviateBlogs": WeaviateBlogQueryEngine,
    "HuggingFaceBlogs": HuggingFaceBlogQueryEngine
}

Tool_Description_Index = VectorStoreIndex(df_nodes)
Tool_Retriever = Tool_Description_Index.as_retriever(similarity_top_k=1)

## Build Recursive Retriever

In [13]:
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": Tool_Retriever},
    query_engine_dict=df_id_query_engine_mapping,
    verbose=True
)

response_synthesizer = get_response_synthesizer(
    response_mode="compact"
)

query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever, response_synthesizer=response_synthesizer
)

In [14]:
response = query_engine.query("What is Product Quantization?").response

[36;1m[1;3mRetrieving with query id None: What is Product Quantization?
[0m[38;5;200m[1;3mRetrieved node with id, entering: WeaviateBlogs
[0m[36;1m[1;3mRetrieving with query id WeaviateBlogs: What is Product Quantization?
[0m[32;1m[1;3mGot response: Product Quantization is a method used to compress vectors, which helps to reduce memory requirements. It works by representing vectors in a more compact format, sacrificing some accuracy in order to save memory. This compression technique is often used in applications where memory efficiency is crucial.
[0m

In [15]:
response = query_engine.query("What does FSDP do?").response

[36;1m[1;3mRetrieving with query id None: What does FSDP do?
[0m[38;5;200m[1;3mRetrieved node with id, entering: HuggingFaceBlogs
[0m[36;1m[1;3mRetrieving with query id HuggingFaceBlogs: What does FSDP do?
[0m[32;1m[1;3mGot response: FSDP, or Fully Sharded Data Parallelism, is a paradigm in which the optimizer states, gradients, and parameters are sharded across devices. During the forward pass, each FSDP unit performs an all-gather operation to get the complete weights, followed by computation and discarding the shards from other devices. In the backward pass, each FSDP unit performs an all-gather operation to get the complete weights, with computation performed to get the local gradients. These local gradients are then averaged and sharded across the devices via a reduce-scatter operation so that each device can update the parameters of its shard. FSDP enables efficient training of large models in a multi-node, multi-GPU setting.
[0m