In [1]:
import os
import requests
import json
from dotenv import load_dotenv


load_dotenv()

True

## Doc search + prompt

In [2]:
from app import get_docsearch


history = [
    {
        "prompt": "who are you?",
        "response": "Hi there! I'm IntegrateAIAssistant, a friendly and helpful AI assistant by Integrate.ai. I'm here to help you with documents. If you have any questions, please let me know!"
    }
]
data = {
        "question": "How do I get started?",
        "history": history,
        "api_key": os.environ["OPENAI_API_KEY"],
        "embeddings_key": os.environ["OPENAI_API_KEY"],
        "vectorstore_path": ""
    }

question = data["question"]
history = data["history"]
api_key = data["api_key"]
embeddings_key = data["embeddings_key"]


# loading the index and the store and the prompt template
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
docsearch = get_docsearch(data["vectorstore_path"], embeddings_key)

loaded settings: {'LLM_NAME': 'openai_chat', 'EMBEDDINGS_NAME': 'openai_text-embedding-ada-002', 'CELERY_BROKER_URL': 'redis://localhost:6379/0', 'CELERY_RESULT_BACKEND': 'redis://localhost:6379/1', 'MONGO_URI': 'mongodb://localhost:27017/docsgpt', 'MODEL_PATH': './models/gpt4all-model.bin', 'TOKENS_MAX_HISTORY': 150, 'API_URL': 'http://localhost:7091', 'API_KEY': None, 'EMBEDDINGS_KEY': None, 'AZURE_OPENAI_API_BASE': None, 'AZURE_OPENAI_API_VERSION': None, 'AZURE_DEPLOYMENT_NAME': 'text-davinci-003', 'AZURE_EMBEDDINGS_DEPLOYMENT_NAME': 'text-embedding-ada-002'}
environ({'COMMAND_MODE': 'unix2003', 'HOME': '/Users/xshe', 'HOMEBREW_CELLAR': '/opt/homebrew/Cellar', 'HOMEBREW_PREFIX': '/opt/homebrew', 'HOMEBREW_REPOSITORY': '/opt/homebrew', 'INFOPATH': '/opt/homebrew/share/info:', 'LESS': '-R', 'LOGNAME': 'xshe', 'LSCOLORS': 'Gxfxcxdxbxegedabagacad', 'MANPATH': '/opt/homebrew/share/man::', 'MallocNanoZone': '0', 'OLDPWD': '/', 'ORIGINAL_XDG_CURRENT_DESKTOP': 'undefined', 'PAGER': 'cat', '

In [3]:
retriever = docsearch.as_retriever()
retriever.search_kwargs = {"k": 5}

In [4]:
docsearch.similarity_search_with_score(query=data["question"], k=20)

[(Document(page_content='\n\nRequirements\n\nThis section outlines the setup steps required to configure your working environment. Steps that are performed in the AWS platform are not explained in detail. Refer to the AWS documentation as needed. \n\nThe requirements are tool-agnostic - that is, you can complete the steps through the AWS console, or through a tool such as Terraform or AWS CloudFormation. \n\n', metadata={'title': 'inputs/iai_doc/aws-batch-manual.md'}),
  0.46205106),
 (Document(page_content='\n\nInstall components\n\nInstall the integrate.ai command-line tool (CLI), the SDK, and the client. For detailed instructions, see .\n\n', metadata={'title': 'inputs/iai_doc/user-auth.md'}),
  0.49679202),
 (Document(page_content='\n\nRunning a training server on AWS Fargate\n\nSet up the Fargate environment, as described in .\n\n', metadata={'title': 'inputs/iai_doc/aws-fargate-sdk.md'}),
  0.50277495),
 (Document(page_content='\n\nDeployment Scenarios\n\n\n', metadata={'title': 

In [5]:
retriever.get_relevant_documents(data["question"])

[Document(page_content='\n\nRequirements\n\nThis section outlines the setup steps required to configure your working environment. Steps that are performed in the AWS platform are not explained in detail. Refer to the AWS documentation as needed. \n\nThe requirements are tool-agnostic - that is, you can complete the steps through the AWS console, or through a tool such as Terraform or AWS CloudFormation. \n\n', metadata={'title': 'inputs/iai_doc/aws-batch-manual.md'}),
 Document(page_content='\n\nInstall components\n\nInstall the integrate.ai command-line tool (CLI), the SDK, and the client. For detailed instructions, see .\n\n', metadata={'title': 'inputs/iai_doc/user-auth.md'}),
 Document(page_content='\n\nRunning a training server on AWS Fargate\n\nSet up the Fargate environment, as described in .\n\n', metadata={'title': 'inputs/iai_doc/aws-fargate-sdk.md'}),
 Document(page_content='\n\nDeployment Scenarios\n\n\n', metadata={'title': 'inputs/iai_doc/deployment.md'}),
 Document(page_

In [6]:
from langchain.llms import AzureOpenAI


llm = AzureOpenAI(
    deployment_name=os.environ["AZURE_DEPLOYMENT_NAME"],
    temperature=0,
    model_kwargs={
        "api_key": os.environ["OPENAI_API_KEY"],
        "api_base": os.environ["OPENAI_API_BASE"],
        "api_type": os.environ["OPENAI_API_TYPE"],
    },
    openai_api_version=os.environ["OPENAI_API_VERSION"]
)

In [7]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    AIMessagePromptTemplate,
)
from app import chat_combine_template, settings


messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
if history:
    tokens_current_history = 0
    # count tokens in history
    history.reverse()
    for i in history:
        if "prompt" in i and "response" in i:
            tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
            if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
                tokens_current_history += tokens_batch
                messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
                messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))

p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)

In [8]:
print(p_chat_combine.format(question="my question", summaries="my summary"))

System: You are IntegrateaiAssistant, friendly and helpful AI assistant by Integrate.ai that provides help with documents. You give thorough answers with code examples if possible. If there is no code example available, don't try to make up any code.
Use the following pieces of context to help answer the users question. If its not relevant to the question, just say that there is not enough information, don't try to make up an answer.
When using code examples, use the following format:
```(language)
(code)
```
----------------
my summary
Human: who are you?
AI: Hi there! I'm IntegrateAIAssistant, a friendly and helpful AI assistant by Integrate.ai. I'm here to help you with documents. If you have any questions, please let me know!
Human: my question


In [9]:
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.chains.question_answering import load_qa_chain


question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine, verbose=True)
chain = ConversationalRetrievalChain(
    retriever=retriever,
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents=True,
)
chat_history = []
result = chain({"question": question, "chat_history": chat_history})
# generate async with async generate method
# result = run_async_chain(chain, question, chat_history)



[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following portion of a long document to see if any of the text is relevant to answer the question. 
Return any relevant text verbatim.


Requirements

This section outlines the setup steps required to configure your working environment. Steps that are performed in the AWS platform are not explained in detail. Refer to the AWS documentation as needed. 

The requirements are tool-agnostic - that is, you can complete the steps through the AWS console, or through a tool such as Terraform or AWS CloudFormation. 


Question: How do I get started?
Relevant text, if any:[0m
Prompt after formatting:
[32;1m[1;3mUse the following portion of a long document to see if any of the text is relevant to answer the question. 
Return any relevant text verbatim.


Install components

Install the integrate.ai command-line tool (CLI), the SDK, and the client. For detailed instructions, see .

In [10]:
result

{'question': 'How do I get started?',
 'chat_history': [],
 'answer': "\nAI: To get started, you'll need to install the integrate.ai command-line tool (CLI), the SDK, and the client. For detailed instructions, see the documentation. After that, you'll need to set up the Fargate environment, as described in the documentation.",
 'source_documents': [Document(page_content='\n\nRequirements\n\nThis section outlines the setup steps required to configure your working environment. Steps that are performed in the AWS platform are not explained in detail. Refer to the AWS documentation as needed. \n\nThe requirements are tool-agnostic - that is, you can complete the steps through the AWS console, or through a tool such as Terraform or AWS CloudFormation. \n\n', metadata={'title': 'inputs/iai_doc/aws-batch-manual.md'}),
  Document(page_content='\n\nInstall components\n\nInstall the integrate.ai command-line tool (CLI), the SDK, and the client. For detailed instructions, see .\n\n', metadata={'t

## API

In [None]:
from IPython.display import display, Markdown


def query(question, url="http://0.0.0.0:7091/api/answer", history=None):
    headers = {
        "Content-Type": "application/json; charset=utf-8"
    }

    payload = {
        "question": question,
        "history": history,
        "api_key": os.environ["OPENAI_API_KEY"],
        "embeddings_key": os.environ["OPENAI_API_KEY"],
    }

    res = requests.post(url=url, data=json.dumps(payload), headers=headers)
    res_json = res.json()
    display(Markdown(res_json["answer"]))
    return res_json

## IAI DOC

In [None]:
res = query("Who are you?")

In [None]:
res = query("How do I get started?")

In [None]:
query("How do I deploy this in AWS?")

In [None]:
query("How do I use a custom model?")

In [None]:
query("how do I generate a non-admin token?")

In [None]:
query("How do I renew my token?")

In [None]:
query("How do I set my differential privacy parameter?")

In [None]:
query("If I wanted to try multi-client training, where one client has Y, X1 and another client has X2 features, how would I setup the data schema and client train commands?")

In [None]:
query("I got Error code 401. What does it mean")

In [None]:
query("give me an example of running a session on AWS BATCH")

In [None]:
query("can you give the template for building a custom model")

In [None]:
query("what evaluation metrics are supported")

In [None]:
query("what does GLM mean")

In [None]:
query("how do I train a GLM with integrateai")

In [None]:
query("how to create an EDA session")

In [None]:
query("give me an example of data config for PRL sessions")

In [None]:
query("which strategies are currently supported for HFL")

In [None]:
query("what is a VFL session")

## Default (Pandas)

In [None]:
query("what is pandas")

In [None]:
query("how to load parquet files?")

In [None]:
query("how to load parquet files directly with pandas")

In [None]:
query("gimme an example of computing the moving average of all columns in a dataframe")