## Install dependencies

In [64]:
!pip install openai
!pip install langchain
!pip install unstructured
!pip install tiktoken
!pip install chromadb



## Imports

In [1]:
from pathlib import Path

from IPython.display import display, Markdown

from langchain.vectorstores import Chroma
from langchain.text_splitter import MarkdownTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import MarkdownTextSplitter
from langchain.schema import Document

import openai
import tiktoken

# Store OpenAI key in env var: OPENAI_API_KEY

## Load Docs

#### First, we load the docs and split any that are longer than 1500 characters
This could also be done with a different format of doc, by using a different splitter

Note: Because ChatGPT can understand markdown, we are loading the docs with the raw
`TextLoader` instead of something like the `UnstructuredMarkdownLoader` which strips
out the markdown and leaves plain text.

In [2]:
# Load the docs
loader = DirectoryLoader('markdown_docs/', glob="**/*.md", loader_cls=TextLoader)
docs = loader.load()

# Split the docs into chunks
text_splitter = MarkdownTextSplitter(chunk_size=1500, chunk_overlap=500)
docs = text_splitter.split_documents(docs)
# If these markdown docs are short enough we can load full documents, otherwise we need to split them into chunks. https://python.langchain.com/en/latest/modules/indexes/text_splitters/examples/markdown.html

#### Then, we create a ChromaDB vector store with source metadata

In [3]:
# This will create the Chroma vector database with embeddings for each chunk of text

embeddings = OpenAIEmbeddings() # leave embeddings on default text-embedding-ada-002

database_persistent_directory = ".db_storage"
vector_db = Chroma.from_texts([doc.page_content for doc in docs], embeddings, persist_directory=database_persistent_directory, metadatas=[{"source": f"https://github.com/duplocloud/terraform-provider-duplocloud/blob/develop/docs/{Path(*Path(doc.metadata['source']).parts[1:]).as_posix()}" } for doc in docs])

vector_db.persist()

Using embedded DuckDB with persistence: data will be stored in: .db_storage


In [28]:
# If changes have been persisted already, we can load from local storage instead of re-creating the database
database_persistent_directory = ".db_storage"

vector_db = Chroma(persist_directory=database_persistent_directory, embedding_function=embeddings)

Using embedded DuckDB with persistence: data will be stored in: .db_storage


## OpenAI Functions

Some custom functions to make interacting with the OpenAI API easier

In [55]:
from typing import List, Dict

def get_response(question: str, history: List[Dict[str, str]], model: str="gpt-3.5-turbo", temperature: float=0.5, stream: bool=True, timeout=None):
    if not timeout:
        timeout = 2.0 if stream else 60.0
    response = openai.ChatCompletion.create(
        model= model,
        messages= history + [
            {'role': 'user', 'content': f"{question}"},
        ],
        temperature=temperature,
        stream=stream,
        request_timeout=timeout
    )

    LINE_LENGTH = 80
    this_line_length = 0
    full_response = ""
    if not stream:
        response = [{"choices": [{"delta": {"content": f"{chunk} "}}]} for chunk in response["choices"][0]["message"]["content"].split(" ")]
    for chunk in response:
        chunk_text = chunk["choices"][0]["delta"].get("content", "")
        full_response += chunk_text

        if "\n" in chunk_text:
            parts = chunk_text.split("\n")
            for part in parts[:-1]:
                print(f"{part}\n", end='', flush=True)
            this_line_length = 0
            chunk_text = parts[-1]
        if this_line_length + len(chunk_text) > LINE_LENGTH:
            first_char = chunk_text[:1] if chunk_text[:1] in (" ", ".", "!", "?", ",", ";", ":") else ""
            print(f"{first_char}", end='\n', flush=True)
            chunk_text = chunk_text[len(first_char):]
            this_line_length = 0
        this_line_length += len(chunk_text)
        print(f"{chunk_text}", end='', flush=True)
    return full_response

In [72]:
res = get_response("Give an example of some basic Markdown syntax, using Markdown for formatting.", [], model="gpt-3.5-turbo", temperature=0.5, stream=True)

display(Markdown(res)) # This will display the markdown formatting in the Notebook


# Heading 1
## Heading 2
### Heading 3

*Italic*
**Bold**
***Bold and Italic***

- List item 1
- List item 2
- List item 3

[Link](https://www.example.com)

> Blockquote

`Inline code`

```
Code block
```

# Heading 1
## Heading 2
### Heading 3

*Italic*
**Bold**
***Bold and Italic***

- List item 1
- List item 2
- List item 3

[Link](https://www.example.com)

> Blockquote

`Inline code`

```
Code block
```

In [71]:
def num_tokens_in_text(text):
    encoding = tiktoken.get_encoding("cl100k_base") # This is encoding for the chat models
    return len(encoding.encode(text))

In [70]:
# Function from OpenAI to get the number of tokens used by a list of messages

def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":
        print("Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
    elif model == "gpt-4":
        print("Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.")
        return num_tokens_from_messages(messages, model="gpt-4-0314")
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif model == "gpt-4-0314":
        tokens_per_message = 3
        tokens_per_name = 1
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

## Document Lookup Examples

In [None]:
# This returns the top k most similar documents to the query

doc_lookup = vector_db.similarity_search(query="Does Duplocloud have a terraform provider?", k=4)
for doc in doc_lookup:
    print(f"{doc.metadata['source']}")
    display(Markdown(doc.page_content))
    print("")

In [None]:
# the MMR function returns the top k most similar documents to the query, but with a diversity penalty to avoid returning documents that are too similar to each other

doc_lookup_mmr = vector_db.max_marginal_relevance_search(query="How do I initialize a the duplocloud terraform provider?", k=4)
for doc in doc_lookup_mmr:
    print(f"{doc.metadata['source']}")
    display(Markdown(doc.page_content))
    print("")

## TEMPLATES

In [84]:
def create_document_list(documents: List[Document], max_tokens = 1000):
    template = "----------------------------------------\nDocument {index}: {source}\n{doc.page_content}\n\n"
    total_tokens = 0
    result = ""
    for index, doc in enumerate(documents):
        doc_text = template.format(index=index+1, source=doc.metadata["source"], doc=doc)
        total_tokens += num_tokens_in_text(doc_text)
        if total_tokens > max_tokens:
            break
        result += doc_text
    return result

## Question Asking

In [92]:
def question_from_sources(question):
    doc_lookup = vector_db.max_marginal_relevance_search(query=question, k=4)
    documents = create_document_list(doc_lookup, max_tokens=1000)
    prompt = f"Please use the following documents as well as your existing knowledge to answer this question: {question}\n\n" + documents
    print("Generating answer using the following documents:\n" + "\n".join([f"{doc.metadata['source']}" for doc in doc_lookup]))
    response = get_response(prompt, [], model="gpt-3.5-turbo", temperature=0.5, stream=True)
    return response

In [95]:
res = question_from_sources("Please write a short example .tf file for the Duplocloud terraform provider.")
display(Markdown(res))

Generating answer using the following documents:
https://github.com/duplocloud/terraform-provider-duplocloud/blob/develop/docs/resources/k8_ingress.md
https://github.com/duplocloud/terraform-provider-duplocloud/blob/develop/docs/resources/aws_cloudfront_distribution.md
https://github.com/duplocloud/terraform-provider-duplocloud/blob/develop/docs/resources/azure_virtual_machine_scale_set.md
https://github.com/duplocloud/terraform-provider-duplocloud/blob/develop/docs/resources/other_agents.md
Example .tf file for Duplocloud Terraform Provider:

```terraform
provider "duplocloud" {
  # add your provider configuration here
}

resource "duplocloud_duplo_service" "example_service" {
  tenant_id = "your_tenant_id"
  name = "example_service"
  replicas = 2
  lb_synced_deployment = true
  cloud_creds_from_k8s_service_account = false
  is_daemonset = false
  agent_platform = 7
  cloud = 0
  docker_image = "nginx:latest"
}

resource "duplocloud_duplo_service_lbconfigs" "example_service_lb" {
  t