## Dependencies

In [None]:
!pip install weaviate-client

## Configuration

In [None]:
import weaviate
import json

client = weaviate.Client(
  url="WEAVIATE-INSTANCE-URL",  # URL of your Weaviate instance
  auth_client_secret=weaviate.AuthApiKey(api_key="AUTH-KEY"), # (Optional) If the Weaviate instance requires authentication
  additional_headers={
    "X-PALM-Api-Key": "PALM-API-KEY", # Replace with your PALM key
  }
)

client.schema.get()  # Get the schema to test connection

### Expired Google Cloud Token

The Google Cloud's OAuth 2.0 access tokens only have a **one** hour lifetime. This means you have to replace the expired token with a valid one and it to Weaviate by re-instantiating the client. 

#### Option 1: With Google Cloud CLI

In [None]:
import subprocess
import weaviate

def refresh_token() -> str:
    result = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Error refreshing token: {result.stderr}")
        return None
    return result.stdout.strip()

def re_instantiate_weaviate() -> weaviate.Client:
    token = refresh_token()

    client = weaviate.Client(
      url = "https://some-endpoint.weaviate.network",  # Replace with your Weaviate URL
      additional_headers = {
        "X-Palm-Api-Key": token,
      }
    )
    return client

# Run this every ~60 minutes
client = re_instantiate_weaviate()

Then you could run the below cell periodically.

In [None]:
client = re_instantiate_weaviate()

#### Option 2: With `google-auth`

See the links to google-auth in [Python](https://google-auth.readthedocs.io/en/master/index.html) and [Node.js](https://cloud.google.com/nodejs/docs/reference/google-auth-library/latest) libraries.

In [None]:
from google.auth.transport.requests import Request
from google.oauth2.service_account import Credentials
import weaviate

def get_credentials() -> Credentials:
    credentials = Credentials.from_service_account_file('path/to/your/service-account.json', scopes=['openid'])
    request = Request()
    credentials.refresh(request)
    return credentials

def re_instantiate_weaviate() -> weaviate.Client:
    credentials = get_credentials()
    token = credentials.token

    client = weaviate.Client(
      url = "https://some-endpoint.weaviate.network",  # Replace with your Weaviate URL
      additional_headers = {
        "X-Palm-Api-Key": token,
      }
    )
    return client

# Run this every ~60 minutes
client = re_instantiate_weaviate()

Then run the below periodically:

In [None]:
client = re_instantiate_weaviate()

## Schema

In [None]:
# resetting the schema. CAUTION: THIS WILL DELETE YOUR DATA 
client.schema.delete_all()

schema = {
   "classes": [
       {
           "class": "JeopardyQuestion",
           "description": "List of jeopardy questions",
           "vectorizer": "text2vec-palm",
           "moduleConfig": { # specify the vectorizer and model type you're using
               "text2vec-palm": { 
                    "projectId": "YOUR-GOOGLE-CLOUD-PROJECT-ID", # required. replace with your value: (e.g. "cloud-large-language-models")
                    "apiEndpoint": "YOUR-API-ENDPOINT", # optional. defaults to "us-central1-aiplatform.googleapis.com".
                    "modelId": "YOUR-GOOGLE-CLOUD-MODEL-ID" # optional. defaults to "textembedding-gecko".
                }
           },
           "properties": [
               {
                   "name": "Category",
                   "dataType": ["text"],
                   "description": "Category of the question",
               },
               {
                "name": "Question",
                "dataType": ["text"],
                "description": "The question",
               },
               {
                   "name": "Answer",
                   "dataType": ["text"],
                   "description": "The answer",
                }
            ]
        }
    ]
}

client.schema.create(schema)

print("Successfully created the schema.")

## Import the Data

In [None]:
import requests
url = 'https://raw.githubusercontent.com/weaviate/weaviate-examples/main/jeopardy_small_dataset/jeopardy_tiny.json'
resp = requests.get(url)
data = json.loads(resp.text)

if client.is_ready():

# Configure a batch process
  with client.batch as batch:
      batch.batch_size=100
      # Batch import all Questions
      for i, d in enumerate(data):
          print(f"importing question: {i+1}")

          properties = {
              "answer": d["Answer"],
              "question": d["Question"],
              "category": d["Category"],
          }

          client.batch.add_data_object(properties, "JeopardyQuestion")
else:
  print("The Weaviate cluster is not connected.")

## Query Weaviate: Similarity Search (Text objects)

Similarity search options for text objects in **Weaviate**:

1. [nearText](https://weaviate.io/developers/weaviate/api/graphql/vector-search-parameters#neartext)

2. [nearObject](https://weaviate.io/developers/weaviate/api/graphql/vector-search-parameters#nearobject)

3. [nearVector](https://weaviate.io/developers/weaviate/api/graphql/vector-search-parameters#nearvector)

### nearText Example

Find a `JeopardyQuestion` about "animals in movies". Limit it to only 2 responses and report the distance.

In [None]:
response = (
    client.query
    .get("JeopardyQuestion", ["question", "answer"])
    .with_near_text({
        "concepts": ["question about animals"]
    })
    .with_limit(2) # limit the output to only 2
    .with_additional(["distance", "id"]).do() # output the distance of the query vector to the objects in the database along with the objects ID
)

print(json.dumps(response, indent=2))

### nearObject Example

Search through the `JeopardyQuestion` class to find the top 2 objects closest to id `5e99ed1d-aef8-41b2-a55b-105810e41560`. (The id was taken from the query above)

In [None]:
response = (
    client.query
    .get("JeopardyQuestion", ["question", "answer"])
    .with_near_object({
        "id": "5e99ed1d-aef8-41b2-a55b-105810e41560"
    })
    .with_limit(2) # limit the output to only 2
    .with_additional(["distance"]) # output the distance of the query vector to the objects in the database
    .do()
)

print(json.dumps(response, indent=2))

### nearVector Example

Search through the `JeopardyQuestion` class to find the top 2 objects closest to the query vector `[-0.0125526935, -0.021168863, ... ]`

In [None]:
response = (
    client.query
    .get("JeopardyQuestion", ["question", "answer"])
    .with_near_vector({
        "vector": [-0.0125526935, -0.021168863, ...]
    })
    .with_limit(2) # limit the output to only 2
    .with_additional(["distance"]) # output the distance of the query vector to the objects in the database
    .do()
)

print(json.dumps(response, indent=2))