https://docs.nomic.ai/vector_database.html#weaviate

In [None]:
import weaviate
import nomic
from nomic import AtlasProject
import numpy as np
import os
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv('NOMIC_API_KEY')
nomic.login(api_key)       

print("Let's a go!")

print("Connecting to weaviate instance on localhost:8080...")
client = weaviate.Client("http://localhost:8080")
print("Client created")

Next we'll gather all of the classes and their respective properties from the database. To do this we will iterate through the database schema and append the classes and properties list.

In [None]:
schema = client.schema.get()

classes = []
props = []
for c in schema["classes"]:
    classes.append(c["class"])
    temp = []
    for p in c["properties"]:
        if p["dataType"] == ["text"]:
            temp.append(p["name"])
    props.append(temp)

Now we will make a helper function, this will allow us to map classes that are larger than 10,000 data points. It queries the database while allowing us to use a cursor to store our place.

In [None]:
def get_batch_with_cursor(
    client, class_name, class_properties, batch_size, cursor=None
):
    query = (
        client.query.get(class_name, class_properties)
        .with_additional(["vector", "id"])
        .with_limit(batch_size)
    )

    if cursor is not None:
        return query.with_after(cursor).do()
    else:
        return query.do()


The rest of the tutorial will be inside of a for loop. This allows us to create an Atlas Map for all of the classes in the database.

If you would like to map only a single class set c equal to the class name and p equal to a list with the class properties

In [None]:
for c, p in zip(classes, props):
    project = AtlasProject(
    name=c,
    unique_id_field="id",
    modality="embedding",
)

Now we use a while loop to access all of the data from each class, which we do in batches using our helper function, in this case we have a batch size of 25, but it could be 10,000. We break the while loop when a call to the helper function returns no values.
We then set our cursor to the id of the datapoint we left off at, and append the vectors to a list, which we then convert into a numpy array.

## To Not Include Properties
To not include a property add the property name to the list titled not_data. If it the property is an additional property add the property name to un_data

We then parse our data only including the properties we want. Finally we add the embeddings to our atlas project along with our parsed data.

In [None]:
cursor = None
while True:
    response = get_batch_with_cursor(client, c, p, 25, cursor)
    if len(response["data"]["Get"][c]) == 0:
        break
    cursor = response["data"]["Get"][c][-1]["_additional"]["id"]
    vectors = []
    for i in response["data"]["Get"][c]:
        vectors.append(i["_additional"]["vector"])

    embeddings = np.array(vectors)
    data = []
    not_data = ["_additional"]
    un_data = ["vector"]
    for i in response["data"]["Get"][c]:
        j = {key: value for key, value in i.items() if key not in not_data}
        k = {
            key: value
            for key, value in i["_additional"].items()
            if key not in un_data
        }
        j = j | k
        data.append(j)
    with project.wait_for_project_lock():
        project.add_embeddings(
            embeddings=embeddings,
            data=data,
        )


Finally we will build our map with the given parameters using create_index()

Add Topic Labels

If you want labels on your atlas map add the following line of code using the property name that you want to build the labels for: topic_label_field= "PROPERTY NAME"

In [None]:
project.create_index(
    name=c,
    colorable_fields=p,
    build_topic_model=True,
)

https://raw.githubusercontent.com/nomic-ai/maps/main/maps/weaviate_script.py

In [None]:
import weaviate
from nomic import AtlasProject
import numpy as np
import nomic
import os
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv('NOMIC_API_KEY')
nomic.login(api_key)  

print("Let's a go!")

print("Connecting to weaviate instance on localhost:8080...")
client = weaviate.Client("http://localhost:8080")
print("Client created")

schema = client.schema.get()

classes = []
props = []
for c in schema["classes"]:
    classes.append(c["class"])
    temp = []
    for p in c["properties"]:
        if p["dataType"] == ["text"]:
            temp.append(p["name"])
    props.append(temp)


def get_batch_with_cursor(
    client, class_name, class_properties, batch_size, cursor=None
):
    query = (
        client.query.get(class_name, class_properties)
        .with_additional(["vector", "id"])
        .with_limit(batch_size)
    )

    if cursor is not None:
        return query.with_after(cursor).do()
    else:
        return query.do()

# If you get one of these errors:
# nomic.project:wait_for_project_lock:904 - DriftBenchmark: Waiting for Project Lock Release.
# All you can do is restart you python kernel
# And change the name of the project, specified below by 'c'
# In this drift benchmark project, there should only be one class: DriftBenchmark
for c, p in zip(classes, props):
    project = AtlasProject(
        name='DriftBenchmark002', # change this
        unique_id_field="id",
        modality="embedding",
    )
    count = 0
    cursor = None
    while True:
        response = get_batch_with_cursor(client, c, p, 25, cursor)
        count += 1
        if len(response["data"]["Get"][c]) == 0:
            break
        cursor = response["data"]["Get"][c][-1]["_additional"]["id"]
        vectors = []
        for i in response["data"]["Get"][c]:
            vectors.append(i["_additional"]["vector"])

        embeddings = np.array(vectors)
        data = []
        not_data = ["_additional"]
        un_data = ["vector"]
        for i in response["data"]["Get"][c]:
            j = {key: value for key, value in i.items() if key not in not_data}
            k = {
                key: value
                for key, value in i["_additional"].items()
                if key not in un_data
            }
            j = j | k
            data.append(j)
        with project.wait_for_project_lock():
            project.add_embeddings(
                embeddings=embeddings,
                data=data,
            )
    project.rebuild_maps()
    project.create_index(
        name=c,
        colorable_fields=p,
        build_topic_model=True,
    )

If all goes well, you should see something like this:
nomic.project:_create_project:779 - Creating project `DriftBenchmark002` in organization `zaidanseiko`
nomic.project:_add_data:1401 - Upload succeeded.
nomic.project:rebuild_maps:1472 - Updating maps in project `DriftBenchmark002`
nomic.project:create_index:1111 - Created map `DriftBenchmark` in project `DriftBenchmark002`: 
https://atlas.nomic.ai/map/13567d0e-61f6-425a-b27d-54483931ef0a/3d26669f-746a-48d6-9755-9a6d60305a1b

Open that URL and explore your benchmark data.
You should see something like this:

![Shows a visualization of the vectors generated above](img//frightened-bronco.png)