We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key.

In [3]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv(override=True)

if not os.environ.get("OPENAI_API_KEY"): 
    os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [4]:
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings

from langchain_iris import IRISVector


In [5]:
loader = TextLoader("../data/state_of_the_union.txt", encoding='utf-8')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [6]:
username = 'demo'
password = 'demo' 
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '1972' 
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"

In [7]:
print(CONNECTION_STRING)

iris://demo:demo@localhost:1972/USER


In [8]:
# Under the hood, this becomes a SQL table. CANNOT have '.' in the name
COLLECTION_NAME = "state_of_the_union_test"

# This creates a persistent vector store (a SQL table). You should run this ONCE only
db = IRISVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [9]:
# Subsequent calls to reconnect to the database and make searches should use this.  

# db = IRISVector(
#     embedding_function=embeddings,
#     dimension=1536,
#     collection_name=COLLECTION_NAME,
#     connection_string=CONNECTION_STRING,
# )

In [10]:
# To add documents to an existing vector store:

# db.add_documents(docs)

In [11]:
print(f"Number of docs in vector store: {len(db.get()['ids'])}")

Number of docs in vector store: 114


In [12]:
query = "new technology"
docs_with_score = db.similarity_search_with_score(query, 2)

In [13]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.171027508784767
Up to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. 

Some of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. 

Smartphones. The Internet. Technology we have yet to invent. 

But that’s just the beginning.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.200533043742121
But to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. 

That’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. 

Let me give you one example of why it’s so important to pass it.
---------

In [14]:
db.add_documents([Document(page_content="foo")])
docs_with_score = db.similarity_search_with_score("foo")
docs_with_score[0]

(Document(page_content='foo'), 0.0)

In [15]:
docs_with_score

[(Document(page_content='foo'), 0.0),
 (Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Up to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \n\nSome of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. \n\nSmartphones. The Internet. Technology we have yet to invent. \n\nBut that’s just the beginning.'),
  0.232852233974567),
 (Document(metadata={'source': '../data/state_of_the_union.txt'}, page_content='Powered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. \n\nAs Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n\nIt’s time. \n\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills.'),
  0.235663888984952),
 (Document(metadata={'source': '../data/sta

In [16]:
retriever = db.as_retriever()
print(retriever)

tags=['IRISVector'] vectorstore=<langchain_iris.vectorstores.IRISVector object at 0x1441386b0>
