## Connect to Weaviate

In [1]:
import weaviate

client = weaviate.Client(
  url="https://llamaindex-sub-question-demo-drx6wazb.weaviate.network",  # URL to Weaviate instance
)

client.schema.get()  # Get the schema to test connection

{'classes': [{'class': 'BlogPost',
   'description': 'Blog post from the Weaviate website.',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'generative-openai': {'model': 'gpt-3.5-turbo'},
    'text2vec-openai': {'model': 'ada',
     'modelVersion': '002',
     'type': 'text',
     'vectorizeClassName': True}},
   'properties': [{'dataType': ['text'],
     'description': 'Content from the blog post',
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'content',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'description': "This property was generated by Weaviate's auto-schema feature on Thu Jul  6 15:56:15 2023",
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'sk

## Create Schema

In [2]:
schema = {
   "classes": [
       {
           "class": "BlogPost",
           "description": "Blog post from the Weaviate website.",
           "vectorizer": "text2vec-openai",
           "moduleConfig": {
               "generative-openai": { 
                    "model": "gpt-3.5-turbo"
                }
           },
           "properties": [
               {
                  "name": "Content",
                  "dataType": ["text"],
                  "description": "Content from the blog post",
               }
            ]
        }
    ]
}

client.schema.delete_all()

client.schema.create(schema)

print("Schema was created.")

Schema was created.


## Load in Data

In [3]:
from llama_index import download_loader, SimpleWebPageReader

SimpleWebPageReader = download_loader("SimpleWebPageReader")

loader = SimpleWebPageReader()
blog = loader.load_data(urls=['https://weaviate.io/blog/llamaindex-and-weaviate'])

  for item in lines:


## Parse the Documents 

In [4]:
from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser()

nodes = parser.get_nodes_from_documents(blog)

## Construct Vector Store

In [5]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext
import os

os.getenv("OPENAI_API_KEY")

# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="BlogPost", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)

# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)

query_engine = index.as_query_engine()

## Set up Sub Question Query Engine

In [6]:
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine 

query_engine_tools = [
    QueryEngineTool(
        query_engine = query_engine, 
        metadata = ToolMetadata(name='BlogPost', description='Blog post about the integration of LlamaIndex and Weaviate')
    )
]

query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

## Query Time

In [7]:
response = await query_engine.aquery('How does LlamaIndex help data indexing in Weaviate?')

Generated 2 sub questions.
[36;1m[1;3m[BlogPost] Q: What is the integration of LlamaIndex and Weaviate
[0m[36;1m[1;3m[BlogPost] A: 
The integration of LlamaIndex and Weaviate is a combination of Weaviate as the vector database that acts as the external storage provider and LlamaIndex as a data framework for building LLM applications. LlamaIndex provides a comprehensive toolkit for ingestion, management, and querying of external data so that it can be used with an LLM app. It offers connectors to 100+ data sources, ranging from different file formats to APIs to web scrapers. It also supports indexing unstructured, semi-structured, and structured data. Finally, it offers the ability to query the data with a wide variety of data structures and storage integration options.
[0m[33;1m[1;3m[BlogPost] Q: How does LlamaIndex help data indexing in Weaviate
[0m[33;1m[1;3m[BlogPost] A: 
LlamaIndex helps data indexing in Weaviate by providing a comprehensive toolkit for ingestion, manage

In [8]:
print(response)


LlamaIndex helps data indexing in Weaviate by providing a comprehensive toolkit for ingestion, management, and querying of external data. It offers connectors to 100+ data sources, ranging from different file formats (.pdf, .docx, .pptx) to APIs (Notion, Slack, Discord, etc.) to web scrapers (Beautiful Soup, Readability, etc.). These data connectors are primarily hosted on LlamaHub, making it easy for users to integrate data from their existing files and applications. It also supports indexing unstructured, semi-structured, and structured data. Finally, it offers the ability to query the data with a wide variety of data structures and storage integration options.
