In [1]:
# Pip install required dependencies
! pip install openai
! pip install pinecone-client[grpc]
! pip install pd 


import os
import csv

import openai
import pinecone 
from kaggle_secrets import UserSecretsClient
import pandas as pd

# Load OpenAI and Pinecone API keys via Kaggle secrets, to avoid leaking secrets via Jupyter notebook
OPENAI_API_KEY = UserSecretsClient().get_secret("OPENAI_API_KEY")
PINECONE_API_KEY = UserSecretsClient().get_secret("PINECONE_API_KEY")

index_name = "the-office-oracle"

pinecone.init(api_key=PINECONE_API_KEY, environment='us-west4-gcp-free')

index = pinecone.Index(index_name)

Collecting openai
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.27.8
[0mCollecting pinecone-client[grpc]
  Downloading pinecone_client-2.2.2-py3-none-any.whl (179 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.1/179.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting loguru>=0.5.0 (from pinecone-client[grpc])
  Downloading loguru-0.7.0-py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting dnspython>=2.0.0 (from pinecone-client[grpc])
  Downloading dnspython-2.3.0-py3-none-any.whl (283 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m283.7/283.7 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
Collecting grpc-gateway-protoc-gen-openapiv2==0.1.0 (f

  from tqdm.autonotebook import tqdm


In [3]:
user_query = "What did Dwight find in the ceiling?"

model_name = "text-embedding-ada-002"

vector = openai.Embedding.create(
    api_key=OPENAI_API_KEY, 
    model=model_name,
    input=user_query
)

query_response = index.query(
    top_k=3,
    include_values=True,
    include_metadata=True,
    vector=vector.data[0].embedding,
)

if query_response is not None: 
    print("Got query_response...")

for match in query_response['matches']: 
    print(f'text: {match.metadata["text"]}')

Got query_response...
text: Dwight spots animal droppings on the carpet. He peeks above the ceiling tiles and initially concludes that there is a bird stuck in the air vents, but is surprised to discover that it is actually a bat. The bat escapes, and everyone panics (except for Jim, Oscar, and a somewhat amused Creed). Kelly and Meredith scream, Kevin runs into the camera, Pam hides near the coat rack, Karen hides under her desk, and Angela falls flat on the floor and prays. Stanley heads home (with a very dry "Good-bye") as Dwight shuts the bat in a conference room. Angela puts on a clear plastic rain bonnet so the bat won't poop on her head.
Jim calls animal control and tells Dwight that it won't be at the office until 6 p.m. As Jim and Dwight talk, Jim tells Dwight that he doesn't feel good. He tells Dwight he might have been bitten by the bat and implies he might be turning into a vampire. Dwight looks on in confusion. Jim and Karen continue pranking Dwight, speaking in slow, deli

In [4]:
! pip install langchain
! pip install sentence_transformers 

Collecting langchain
  Downloading langchain-0.0.219-py3-none-any.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting langchainplus-sdk>=0.0.17 (from langchain)
  Downloading langchainplus_sdk-0.0.17-py3-none-any.whl (25 kB)
Collecting openapi-schema-pydantic<2.0,>=1.2 (from langchain)
  Downloading openapi_schema_pydantic-1.2.4-py3-none-any.whl (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openapi-schema-pydantic, langchainplus-sdk, langchain
Successfully installed langchain-0.0.219 langchainplus-sdk-0.0.17 openapi-schema-pydantic-1.2.4
[0mCollecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (s

In [6]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

from langchain.vectorstores import Pinecone
db = Pinecone.from_existing_index(index_name, embeddings)

query = "What happens with Ryan and his desk?"
docs = db.similarity_search(query)

for doc in docs: 
    print(f'Retrieved doc:{doc}')


Retrieved doc:page_content='Ryan redirects the speech into a question-and-answer session. Michael solemnly states that "real business is done on paper," and tells the students to write that fact down. The audience quickly complies by typing it on their laptops.\nThe Q&A session intensifies, with students wondering aloud how a small business could survive against the "five Goliaths" of the paper industry. Michael defends his company, commenting at the students\' inability to understand the situation based upon their age and lack of experience. He also states that the United States faces "five goliaths" as well but only states four: Al-Qaeda, Global Warming, sex predators, and mercury poisoning. When a student uses Ryan\'s prediction of Dunder Mifflin\'s obsolescence as proof of his point, Michael is shocked to hear that Ryan had made such a statement. He informs the room that Ryan still hasn\'t made a sale, started a fire in his attempt to make a cheese pita, and "a lot of people think 

In [7]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo-16k"), chain_type="stuff", retriever=db.as_retriever())

query = "List some dangerous situations Dwight has caused"
resp = qa.run(query)
print(resp)



1. Dwight's actions that could have burned down the building or killed Stanley.
2. Dwight mutilating the CPR dummy, costing the company $3500.
3. Dwight tricking his coworkers into signing an apology sheet by pretending it was an attendance sheet.
4. Dwight nearly suffocating Clark with shrink wrap during a hazing plan.
5. Dwight attempting to ride a bicycle across a tight-rope and needing to be rescued by firefighters.
