# Lesson 3: Vectorstores and embeddings

# Vectorstore ingestion

In [None]:
import "dotenv/config";

In [None]:
import { OpenAIEmbeddings } from "@langchain/openai";

const embeddings = new OpenAIEmbeddings();

await embeddings.embedQuery("This is some sample text");

In [None]:
import { similarity } from "ml-distance";

const vector1 = await embeddings.embedQuery(
    "What are vectors useful for in machine learning?"
);
const unrelatedVector = await embeddings.embedQuery(
    "A group of parrots is called a pandemonium."
);

In [None]:
similarity.cosine(vector1, unrelatedVector);

In [None]:
const similarVector = await embeddings.embedQuery(
    "Vectors are representations of information."
);

similarity.cosine(vector1, similarVector);

In [None]:
// Peer dependency
import * as parse from "pdf-parse";
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { 
    RecursiveCharacterTextSplitter
} from "langchain/text_splitter";

const loader = new PDFLoader("./data/MachineLearning-Lecture01.pdf");

const rawCS229Docs = await loader.load();

const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 128,
  chunkOverlap: 0,
});

const splitDocs = await splitter.splitDocuments(rawCS229Docs);

In [None]:
import { MemoryVectorStore } from "langchain/vectorstores/memory";

const vectorstore = new MemoryVectorStore(embeddings);

In [None]:
await vectorstore.addDocuments(splitDocs);

In [None]:
const retrievedDocs = await vectorstore.similaritySearch(
    "What is deep learning?", 
    4
);

const pageContents = retrievedDocs.map(doc => doc.pageContent);

pageContents

# Retrievers

In [None]:
const retriever = vectorstore.asRetriever();

In [None]:
await retriever.invoke("What is deep learning?")