## Connect to Weaviate

In [1]:
import weaviate 

client = weaviate.Client(
  url="https://llama2-example-tmf15eda.weaviate.network",  # URL to Weaviate instance
)

client.schema.get()  # Get the schema to test connection

{'classes': [{'class': 'BlogPost',
   'description': 'Blog post from the Weaviate website.',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'text2vec-openai': {'model': 'ada',
     'modelVersion': '002',
     'type': 'text',
     'vectorizeClassName': True}},
   'multiTenancyConfig': {'enabled': False},
   'properties': [{'dataType': ['text'],
     'description': 'Content from the blog post',
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'content',
     'tokenization': 'word'}],
   'replicationConfig': {'factor': 1},
   'shardingConfig': {'virtualPerPhysical': 128,
    'desiredCount': 1,
    'actualCount': 1,
    'desiredVirtualCount': 128,
    'actualVirtualCount': 128,
    'key': '_id',
    'strategy': 'hash',
    'function'

## Create Schema 

### Blog Post Schema

In [2]:
blog_post_schema = {
   "classes": [
       {
           "class": "BlogPost",
           "description": "Blog post from the Weaviate website.",
           "vectorizer": "text2vec-openai",
           "properties": [
               {
                  "name": "Content",
                  "dataType": ["text"],
                  "description": "Content from the blog post",
               }
            ]
        }
    ]
}

client.schema.delete_all()

client.schema.create(blog_post_schema)

print("Schema was created.")

Schema was created.


### Podcast Schema

In [3]:
podcast_schema = {
   "classes": [
       {
           "class": "Podcast",
           "description": "Weaviate podcast",
           "vectorizer": "text2vec-openai",
           "properties": [
               {
                  "name": "Content",
                  "dataType": ["text"],
                  "description": "Content from the podcasts.",
               }
            ]
        }
    ]
}

client.schema.create(podcast_schema)

print("Schema was created.")

Schema was created.


## Load Data

### Upload blogs

In [4]:
from llama_index import SimpleDirectoryReader

# load the blogs in using the reader
blogs = SimpleDirectoryReader('./data').load_data()

### Upload podcasts (sticking to the release podcasts only)

In [5]:
from llama_index import download_loader

YoutubeTranscriptReader = download_loader("YoutubeTranscriptReader")

loader = YoutubeTranscriptReader()
podcasts = loader.load_data(ytlinks=['https://www.youtube.com/watch?v=xk28RMhRy1U&t=2302s', 'https://www.youtube.com/watch?v=Du6IphCcCec', 
'https://www.youtube.com/watch?v=Q7f2JeuMN7E&t=578s', 'https://www.youtube.com/watch?v=nSCUk5pHXlo&t=22s'])

  for item in lines:
  return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch()
  return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch()
  return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch()
  return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch()


## Build the Indices

### Blogs Index

In [None]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, ListIndex
from llama_index.storage.storage_context import StorageContext

vector_store = WeaviateVectorStore(weaviate_client=client, class_prefix="Blogs_index")

storage_context = StorageContext.from_defaults(vector_store=vector_store)

blogs_index = VectorStoreIndex.from_documents(blogs, storage_context=storage_context)

### Podcast Index

In [None]:
vector_store = WeaviateVectorStore(weaviate_client=client, class_prefix="Podcasts_index")

storage_context = StorageContext.from_defaults(vector_store=vector_store)

podcasts_index = VectorStoreIndex.from_documents(podcasts, storage_context=storage_context)

### Meeting Notes Index

In [6]:
from llama_index import SimpleDirectoryReader

meetingNotes = SimpleDirectoryReader('./meeting-notes').load_data()

In [7]:
from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(meetingNotes)

In [9]:
from llama_index import VectorStoreIndex, ListIndex


notes_index = ListIndex(nodes)

## Summary of each Index

In [None]:
blogs_index_summary = """
This index contains all of the blog posts that are on Weaviate.io.
"""

In [None]:
podcasts_index_summary = """
This index contains the Weaviate podcasts about new releases.
"""

In [None]:
meeting_index_summary = """
This index contains notes from a client named Connor.
"""

In [None]:
index_summaries = [blogs_index_summary, podcasts_index_summary, meeting_index_summary]
blogs_index.set_index_id("blogs_index")
podcasts_index.set_index_id("podcasts_index")
notes_index.set_index_id("notes_index")

## Query Time

In [33]:
from llama_index.indices.composability import ComposableGraph

graph = ComposableGraph.from_indices(
    ListIndex,
    [blogs_index, podcasts_index, notes_index],
    index_summaries=index_summaries
)

In [34]:
custom_query_engines = {
    graph.root_id: graph.root_index.as_query_engine(
        retriever_mode="default" )
}

query_engine = graph.as_query_engine(
    custom_query_engines=custom_query_engines,
)

In [35]:
response = query_engine.query(
    "What is multi-tenancy? Why is it an important feature for Connor's application?"
)

print(str(response))


Multi-tenancy is a software architecture that allows multiple tenants to use the same instance of an application, with each tenant having their own isolated data and resources. It is an important feature for Connor's application because it allows them to scale to millions of tenants, while still providing each tenant with their own isolated environment. This ensures that each tenant's data is secure and that their experience is tailored to their specific needs. Additionally, multi-tenancy allows for access isolation, speed, easy on and offboarding, resource boundaries, cost-efficiency, GDPR-compliant deletes with one command, efficient querying, and massive scale.


In [36]:
print(response.get_formatted_sources())

> Source (Doc id: 7b6f1041-f4b3-448d-98f1-dcc51ddef203): Multi-tenancy is a feature that allows multiple distinct users or user groups to be served from a...

> Source (Doc id: 92c81eb0-661b-40bd-ac2a-d69ee0060cb5): Multi-tenancy is a feature that allows an application to serve multiple tenants (users or organiz...

> Source (Doc id: 03be7386-0ab1-4d55-8264-48352bcf674f): Multi-tenancy is a software architecture that allows multiple tenants to use the same instance of...

> Source (Doc id: eb7879c8-16c2-40a1-b1e0-7049a7c6791f): title: Multi-Tenancy Vector Search with millions of tenants


Large-scale setups were always a gr...

> Source (Doc id: f62822a9-c7a0-4f92-8550-17bc39c127a2): would typically query less than 0.01% of the index. What a waste of resources. Additionally, drop...

> Source (Doc id: 46284cbd-ff6f-4fb8-9bbc-d512fdf31ca6): testing you do this before any
release but I think
for for this release this was the most
amount ...

> Source (Doc id: f50e8f31-3f89-4428-b2d9-5cdb