# Tutorials

## Schemas in detail

In [1]:
import weaviate
import json
import os

api_tkn = os.environ["OPENAI_API_KEY"]

client = weaviate.Client(
    url="https://jptemp.weaviate.network/",  # Replace with your endpoint
    additional_headers={
        "X-OpenAI-Api-Key": api_tkn  # Or "X-Cohere-Api-Key" or "X-HuggingFace-Api-Key" 
    }
)

In [2]:
# delete class "YourClassName" - THIS WILL DELETE ALL DATA IN THIS CLASS
client.schema.delete_class("Question")  # Replace with your class name - e.g. "Question"

In [3]:
schema = client.schema.get()
print(json.dumps(schema, indent=4))

{
    "classes": []
}


In [4]:
# import weaviate
# import json

# client = weaviate.Client("https://jptemp.weaviate.network/")  # Replace with your endpoint

# we will create the class "Question"
class_obj = {
    "class": "Question",
    "description": "Information from a Jeopardy! question",  # description of the class
    "properties": [
        {
            "dataType": ["text"],
            "description": "The question",
            "name": "question",
        },
        {
            "dataType": ["text"],
            "description": "The answer",
            "name": "answer",
        },
        {
            "dataType": ["text"],
            "description": "The category",
            "name": "category",
        },        
    ],
    "vectorizer": "text2vec-openai",
}

# add the schema
client.schema.create_class(class_obj)

# get the schema
schema = client.schema.get()

# print the schema
print(json.dumps(schema, indent=4))

{
    "classes": [
        {
            "class": "Question",
            "description": "Information from a Jeopardy! question",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "moduleConfig": {
                "text2vec-openai": {
                    "model": "ada",
                    "modelVersion": "002",
                    "type": "text",
                    "vectorizeClassName": true
                }
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "description": "The question",
                    "moduleConfig": {
                    

## Imports in detail

In [5]:
# import weaviate
# import json
# import os

# client = weaviate.Client(
#     url="https://jptemp.weaviate.network/",  # Replace with your endpoint
#     additional_headers={
#         "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]  # Or "X-Cohere-Api-Key" or "X-HuggingFace-Api-Key" 
#     }
# )

# ===== import data ===== 
# Load data 
import requests
url = 'https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json'
resp = requests.get(url)
data = json.loads(resp.text)

# Prepare a batch process
with client.batch as batch:
    batch.batch_size=100
    # Batch import all Questions
    for i, d in enumerate(data):
        # print(f"importing question: {i+1}")  # To see imports

        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],             
        }

        client.batch.add_data_object(properties, "Question")

In [6]:
client.query.get("Question", ["question answer"]).do()

{'data': {'Get': {'Question': [{'answer': 'the diamondback rattler',
     'question': 'Heaviest of all poisonous snakes is this North American rattlesnake'},
    {'answer': 'Antelope',
     'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa'},
    {'answer': 'wire',
     'question': 'A metal that is ductile can be pulled into this while cold & under pressure'},
    {'answer': 'the nose or snout',
     'question': 'The gavial looks very much like a crocodile except for this bodily feature'},
    {'answer': 'Liver',
     'question': 'This organ removes excess glucose from the blood & stores it as glycogen'},
    {'answer': 'DNA',
     'question': 'In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance'},
    {'answer': 'the atmosphere',
     'question': 'Changes in the tropospheric layer of this are what gives us weather'},
    {'answer': 'species',
     'question': "2000 news: the Gunnison sage grou

## Queries in detail

In [7]:
# import weaviate
# import json

# client = weaviate.Client(
#     url="https://some-endpoint.weaviate.network/",  # Replace with your endpoint
#     additional_headers={
#         "X-OpenAI-Api-Key": "<THE-KEY>"  # Replace with your API key
#     }
# )

nearText = {"concepts": ["biology"]}

result = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_near_text(nearText)
    .with_limit(2)
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                },
                {
                    "answer": "species",
                    "category": "SCIENCE",
                    "question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification"
                }
            ]
        }
    }
}


In [8]:
import openai

openai.api_key = api_tkn
model="text-embedding-ada-002"
oai_resp = openai.Embedding.create(input = ["biology"], model=model)

oai_embedding = oai_resp['data'][0]['embedding']

result = (
    client.query
    .get("Question", ["question", "answer"])
    .with_near_vector({
        "vector": oai_embedding,
        "certainty": 0.7
    })
    .with_limit(2)
    .do()
)

In [9]:
result

{'data': {'Get': {'Question': [{'answer': 'DNA',
     'question': 'In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance'},
    {'answer': 'species',
     'question': "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification"}]}}}

In [10]:
nearText = {"concepts": ["biology"]}

result = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_near_text(nearText)
    .with_limit(2)
    .with_additional(['certainty'])
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "_additional": {
                        "certainty": 0.9016839563846588
                    },
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                },
                {
                    "_additional": {
                        "certainty": 0.8992441892623901
                    },
                    "answer": "species",
                    "category": "SCIENCE",
                    "question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification"
                }
            ]
        }
    }
}


In [11]:
where_filter = {
    "path": ["category"],
    "operator": "Equal",
    "valueText": "ANIMALS",
}

result = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_where(where_filter)
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "answer": "the diamondback rattler",
                    "category": "ANIMALS",
                    "question": "Heaviest of all poisonous snakes is this North American rattlesnake"
                },
                {
                    "answer": "the nose or snout",
                    "category": "ANIMALS",
                    "question": "The gavial looks very much like a crocodile except for this bodily feature"
                },
                {
                    "answer": "Antelope",
                    "category": "ANIMALS",
                    "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
                },
                {
                    "answer": "Elephant",
                    "category": "ANIMALS",
                    "question": "It's the only living mammal in the order Proboseidea"
                }
            ]
   

In [12]:
nearText = {"concepts": ["biology"]}
where_filter = {
    "path": ["category"],
    "operator": "Equal",
    "valueText": "ANIMALS",
}

result = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_near_text(nearText)
    .with_limit(2)
    .with_additional(['certainty'])    
    .with_where(where_filter)
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "_additional": {
                        "certainty": 0.8898123800754547
                    },
                    "answer": "the nose or snout",
                    "category": "ANIMALS",
                    "question": "The gavial looks very much like a crocodile except for this bodily feature"
                },
                {
                    "_additional": {
                        "certainty": 0.8876416385173798
                    },
                    "answer": "Elephant",
                    "category": "ANIMALS",
                    "question": "It's the only living mammal in the order Proboseidea"
                }
            ]
        }
    }
}


In [13]:
result = (
    client.query
    .aggregate("Question")
    .with_fields("meta {count}")
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Aggregate": {
            "Question": [
                {
                    "meta": {
                        "count": 10
                    }
                }
            ]
        }
    }
}


In [14]:
where_filter = {
    "path": ["category"],
    "operator": "Equal",
    "valueText": "ANIMALS",
}

result = (
    client.query
    .aggregate("Question")
    .with_fields("meta {count}")
    .with_where(where_filter)
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Aggregate": {
            "Question": [
                {
                    "meta": {
                        "count": 4
                    }
                }
            ]
        }
    }
}
