In [1]:
## NEED TO SPIN UP THE WEAVIATE CONTAINER FIRST (docker-compose up)
import weaviate
import weaviate.classes as wvc
import os
import requests
import json
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_KEY")

client = weaviate.connect_to_local(
    port=8080,
    grpc_port=50051,
    additional_config=weaviate.config.AdditionalConfig(timeout=(10, 25)),
    headers={
        "X-OpenAI-Api-Key": openai_api_key  # Replace with your inference API key
    }
)

In [2]:
questions = client.collections.create(
    name="Question",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(),  # If set to "none" you must always provide vectors yourself. Could be any other "text2vec-*" also.
    generative_config=wvc.config.Configure.Generative.openai()  # Ensure the `generative-openai` module is used for generative queries
)

In [3]:
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

question_objs = list()
for i, d in enumerate(data):
    question_objs.append({
        "answer": d["Answer"],
        "question": d["Question"],
        "category": d["Category"],
    })

questions = client.collections.get("Question")
questions.data.insert_many(question_objs)

BatchObjectReturn(all_responses=[UUID('68fab113-99bf-42f3-b4cb-033ae93afb26'), UUID('fb606e50-ed78-4a18-a2e8-83496b6a02cd'), UUID('324a4d24-6ddc-4468-befd-dc591c16ab09'), UUID('d3be47a4-b6e0-4d9c-97f8-5c995ae989ef'), UUID('b7f85617-e5c9-4063-be2f-e018bead71e9'), UUID('18e4a4f8-a62f-4565-9eff-7c975758a298'), UUID('6072f010-9b41-49d4-a193-147f5d642714'), UUID('2cdcc266-301c-4bdb-9e65-d00b859b4386'), UUID('4298ab2d-4082-413a-8ad6-d76e60dd2b64'), UUID('a28b10df-df9d-42b5-ac65-dbddcced06c3')], elapsed_seconds=0.5401656627655029, errors={}, uuids={0: UUID('68fab113-99bf-42f3-b4cb-033ae93afb26'), 1: UUID('fb606e50-ed78-4a18-a2e8-83496b6a02cd'), 2: UUID('324a4d24-6ddc-4468-befd-dc591c16ab09'), 3: UUID('d3be47a4-b6e0-4d9c-97f8-5c995ae989ef'), 4: UUID('b7f85617-e5c9-4063-be2f-e018bead71e9'), 5: UUID('18e4a4f8-a62f-4565-9eff-7c975758a298'), 6: UUID('6072f010-9b41-49d4-a193-147f5d642714'), 7: UUID('2cdcc266-301c-4bdb-9e65-d00b859b4386'), 8: UUID('4298ab2d-4082-413a-8ad6-d76e60dd2b64'), 9: UUID('a2

In [10]:
response = questions.query.near_text(
    query="technology",
    limit=2
)

print(response.objects[0].properties)  # Inspect the first object

{'answer': 'Sound barrier', 'question': 'In 70-degree air, a plane traveling at about 1,130 feet per second breaks it', 'category': 'SCIENCE'}


In [11]:
response = questions.query.near_text(
    query="biology",
    limit=2,
    filters=wvc.query.Filter.by_property("category").equal("ANIMALS")
)

print(response.objects[0].properties)  # Inspect the first object

{'answer': 'the nose or snout', 'question': 'The gavial looks very much like a crocodile except for this bodily feature', 'category': 'ANIMALS'}


In [14]:
response = questions.generate.near_text(
    query="biology",
    limit=2,
    single_prompt="Explain {answer} as you might to a five-year-old."
)

print(response.objects[0].generated)  # Inspect the generated text

DNA is like a recipe book that tells our bodies how to grow and work. It is made up of tiny instructions that are passed down from our parents and help make us who we are. Just like how a recipe tells you how to make a cake, DNA tells our bodies how to make us!


In [15]:
response = questions.generate.near_text(
    query="biology",
    limit=2,
    grouped_task="Write a tweet with emojis about these facts."
)

print(response.generated)  # Inspect the generated text

🧬 In 1953 Watson & Crick built a model of the molecular structure of DNA, the gene-carrying substance! 🧬🔬

🐦 2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new species! 🦆🌿 #ScienceFacts #DNA #SpeciesDiscovery


In [2]:
# from langchain_community.document_loaders import PyPDFLoader
# from langchain_community.embeddings import OllamaEmbeddings
# from langchain_text_splitters import CharacterTextSplitter
# from langchain_community.vectorstores import FAISS


# embeddings = OllamaEmbeddings()

# path_to_pdf = 'Pregnancy and social welfare payments.pdf'
# loader = PyPDFLoader(path_to_pdf)
# pages = loader.load_and_split()
# text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
# docs = text_splitter.split_documents(pages)
# retriever = FAISS.from_documents(docs, embeddings).as_retriever()