In [None]:
import logging
import json
import os
from pathlib import Path

import tiktoken
import faiss
import numpy as np
from tenacity import (
    before_sleep_log,
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_exponential,
)
import openai

logger = logging.getLogger(__name__)
openai.api_key = os.environ['OPENAI_API_KEY']

## Helpers

In [None]:
# OpenAI API functions
retry_openai_decorator = retry(
        reraise=True,
        stop=stop_after_attempt(4),
        wait=wait_exponential(multiplier=1, min=4, max=10),
        retry=(
            retry_if_exception_type(openai.error.Timeout)
            | retry_if_exception_type(openai.error.APIError)
            | retry_if_exception_type(openai.error.APIConnectionError)
            | retry_if_exception_type(openai.error.RateLimitError)
            | retry_if_exception_type(openai.error.ServiceUnavailableError)
        ),
        before_sleep=before_sleep_log(logger, logging.WARNING),
    )

@retry_openai_decorator
def openai_embed(model, input):
    return openai.Embedding.create(input = input, model=model)

@retry_openai_decorator
def openai_chatcompletion(model, messages):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
        messages = messages
    )

In [None]:
# Helper to efficiently embed a set of documents using the OpenAI embedding API
# This is from langchain

embedding_ctx_length = 8191
OPENAI_EMBEDDING_MODEL = "text-embedding-ada-002"
chunk_size = 1000

from typing import List

def embed_texts(texts: List[str], embedding_model: str) -> List[List[float]]:
    embeddings: List[List[float]] = [[] for _ in range(len(texts))]
    tokens = []
    indices = []
    encoding = tiktoken.model.encoding_for_model(embedding_model)
    for i, text in enumerate(texts):
        if embedding_model.endswith("001"):
            # See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
            # replace newlines, which can negatively affect performance.
            text = text.replace("\n", " ")
        token = encoding.encode(
            text,
            disallowed_special="all",
        )
        for j in range(0, len(token), embedding_ctx_length):
            tokens += [token[j : j + embedding_ctx_length]]
            indices += [i]

    batched_embeddings = []
    _chunk_size = chunk_size
    for i in range(0, len(tokens), _chunk_size):
        response = openai_embed(
            embedding_model,
            input=tokens[i : i + _chunk_size],
        )
        batched_embeddings += [r["embedding"] for r in response["data"]]

    results: List[List[List[float]]] = [[] for _ in range(len(texts))]
    num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]
    for i in range(len(indices)):
        results[indices[i]].append(batched_embeddings[i])
        num_tokens_in_batch[indices[i]].append(len(tokens[i]))

    for i in range(len(texts)):
        _result = results[i]
        if len(_result) == 0:
            average = embed_with_retry(
                embedding_model,
                input="",
            )["data"][0]["embedding"]
        else:
            average = np.average(
                _result, axis=0, weights=num_tokens_in_batch[i]
            )
        embeddings[i] = (average / np.linalg.norm(average)).tolist()

    return embeddings

# Miminal end-to-end version

In [None]:
# Get markdown files from our docs repo

DOC_DIR = '/Users/shawn/code2/docodile'
DOC_SUFFIX = '.md'

docs = []
for file in Path(DOC_DIR).glob('**/*' + DOC_SUFFIX):
    with file.open('r') as f:
        docs.append({'path': file.name, 'contents': f.read()})
        
docs = docs[:100]
len(docs)

In [None]:
# Actually embed the docs

doc_embeddings = embed_texts([d['contents'] for d in docs], OPENAI_EMBEDDING_MODEL)
#doc_embeddings

In [None]:
# Create a vector index using FAISS

faiss_index = faiss.IndexFlatL2(len(doc_embeddings[0]))
doc_embeddings_vector = np.array(doc_embeddings, dtype=np.float32)
faiss_index.add(doc_embeddings_vector)

In [None]:
# Embed our query

query = 'Who are you?'
query_embedding = openai_embed(OPENAI_EMBEDDING_MODEL, query)['data'][0]['embedding']

In [None]:
# Find the most similar documents to our embedded query

query_vector = np.array([query_embedding], dtype=np.float32)
query_result_scores, query_result_indices = faiss_index.search(query_vector, 4)
query_result_docs = [docs[i] for i in query_result_indices[0]]
# query_result_docs

In [None]:
# Put the documents into a prompt along with our question

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:"""

prompt = prompt_template.format(
    context='\n\n'.join([d['contents'] for d in query_result_docs]),
    question=query
)
#prompt

In [None]:
# Get the response
response = openai_chatcompletion(
    model="gpt-3.5-turbo", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
    messages = [{"role": "user", "content": prompt}]
)
response

# Object-oriented version

In [None]:
import dataclasses
import typing

@dataclasses.dataclass
class Document:
    contents: str
    metadata: typing.Any
        
class Embeddings:
    def embed_texts(self, texts: list[str]) -> List[List[float]]:
        raise NotImplementedError

@dataclasses.dataclass
class OpenAIEmbeddings(Embeddings):
    model: str = "text-embedding-ada-002"

    def embed_texts(self, texts: list[str]) -> List[List[float]]:
        return embed_texts(texts, self.model)

    def embed_query(self, query: str) -> List[float]:
        return self.embed_texts(query)[0]

class ChatMessage(typing.TypedDict):
    role: str
    content: str
    
@dataclasses.dataclass
class ChatModel:
    temperature: float = 0.7
    def complete(self, messages: list[ChatMessage]) -> typing.Any:
        return openai_chatcompletion(
            model="gpt-3.5-turbo", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
            messages = [{"role": "user", "content": prompt}]
        )
        
class ChatOpenAI(ChatModel):
    def complete(self, messages: list[str]) -> typing.Any:
        raise NotImplementedError

class VectorIndex:
    def search(self, query: str) -> list[int]:
        raise NotImplementedError

@dataclasses.dataclass
class FAISS(VectorIndex):
    faiss_index: faiss.IndexFlatL2
    
    def search(self, query: str) -> list[list[int], list[int]]:
        return self.faiss_index.search(query, 4)
    
def make_faiss(docs: list[Document], embedder: Embeddings) -> FAISS:
    doc_embeddings = embedder.embed_texts([d['contents'] for d in docs])
    faiss_index = faiss.IndexFlatL2(len(doc_embeddings[0]))
    doc_embeddings_vector = np.array(doc_embeddings, dtype=np.float32)
    faiss_index.add(doc_embeddings_vector)
    return VectorStore(FAISS(faiss_index), docs, embedder.embed_query)
    
@dataclasses.dataclass
class VectorStore:
    index: VectorIndex
    docs: list[Document]
    embed_fn: typing.Callable[[str], Document]
    
    def search(self, query: str) -> list[Document]:
        embedded_query = self.embed_fn(query)
        query_vector = np.array([embedded_query], dtype=np.float32)
        scores, indices = self.index.search(query_vector)
        return [self.docs[i] for i in indices[0]]

@dataclasses.dataclass
class QA:
    vector_store: VectorStore
    chat_model: ChatModel
    prompt_template: str
        
    def query(self, query: str) -> typing.Any:    
        return openai_chatcompletion(
            model="gpt-3.5-turbo", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
            messages = [{"role": "user", "content": prompt}]
        )
        
def make_qa(vector_store: VectorStore, chat_model: ChatModel, prompt_template: str) -> QA:
    return QA(vector_store, chat_model, prompt_template)
    
def doc_qa(vector_store: VectorStore, prompt_template: str, query: str):
    query_result_docs = vector_store.search(query)
    prompt = prompt_template.format(
        context='\n\n'.join([d['contents'] for d in query_result_docs]),
        question=query
    )
    return openai_chatcompletion(
        model="gpt-3.5-turbo", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
        messages = [{"role": "user", "content": prompt}]
    )

In [None]:
embeddings = OpenAIEmbeddings()
vs = make_faiss(docs, OpenAIEmbeddings())

In [None]:
chat_model = ChatOpenAI()
qa = make_qa(vs, chat_model, prompt_template)

In [None]:
doc_qa(vs, prompt_template, "Who are you?")