## Connect to Weaviate

In [1]:
import weaviate

client = weaviate.Client(
  url="https://llamaindex-test-hopqzn3c.weaviate.network",  # URL to Weaviate instance
)

client.schema.get()  # Get the schema to test connection

{'classes': [{'class': 'BlogPost',
   'description': 'Blog post from the Weaviate website.',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'generative-openai': {'model': 'gpt-3.5-turbo'},
    'text2vec-openai': {'model': 'ada',
     'modelVersion': '002',
     'type': 'text',
     'vectorizeClassName': True}},
   'properties': [{'dataType': ['text'],
     'description': 'Content from the blog post',
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'content',
     'tokenization': 'word'}],
   'replicationConfig': {'factor': 1},
   'shardingConfig': {'virtualPerPhysical': 128,
    'desiredCount': 1,
    'actualCount': 1,
    'desiredVirtualCount': 128,
    'actualVirtualCount': 128,
    'key': '_id',
    'strategy': 'hash',
    'f

## Create Schema

In [2]:
schema = {
   "classes": [
       {
           "class": "BlogPost",
           "description": "Blog post from the Weaviate website.",
           "vectorizer": "text2vec-openai",
           "moduleConfig": {
               "generative-openai": { 
                    "model": "gpt-3.5-turbo"
                }
           },
           "properties": [
               {
                  "name": "Content",
                  "dataType": ["text"],
                  "description": "Content from the blog post",
               }
            ]
        }
    ]
}

client.schema.delete_all()

client.schema.create(schema)

print("Schema was created.")

Schema was created.


## Load in Data

In [3]:
from llama_index import download_loader, SimpleWebPageReader

SimpleWebPageReader = download_loader("SimpleWebPageReader")

loader = SimpleWebPageReader()
blog = loader.load_data(urls=['https://weaviate.io/blog/llamaindex-and-weaviate'])

  for item in lines:


## Parse the Documents 

In [4]:
from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser()

nodes = parser.get_nodes_from_documents(blog)

## Construct Vector Store

In [7]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext
import os

os.environ["OPENAI_API_KEY"] = "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" 

# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="BlogPost", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)

# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)

query_engine = index.as_query_engine()

RetryError: RetryError[<Future at 0x13c020ad0 state=finished raised AuthenticationError>]

## Set up Sub Question Query Engine

In [None]:
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine 

query_engine_tools = [
    QueryEngineTool(
        query_engine = query_engine, 
        metadata = ToolMetadata(name='BlogPost', description='Blog post about the integration of LlamaIndex and Weaviate')
    )
]

query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

## Query Time

In [None]:
response = await query_engine.aquery('How does LlamaIndex help data indexing in Weaviate?')

In [None]:
print(response)