In [None]:
%load_ext autoreload
%autoreload 2

from keys import *
from pathlib import Path
from urllib.parse import urlparse
from tinydb import TinyDB

import pinecone
import requests
from langchain import LLMChain, PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)
from langchain.document_loaders import (PagedPDFSplitter, TextLoader,
                                        UnstructuredHTMLLoader,
                                        UnstructuredMarkdownLoader,
                                        UnstructuredPDFLoader)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import HuggingFacePipeline, OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

from keys import HUGGINGFACE_HEADERS
from slackbot import obj

from pprint import PrettyPrinter
pp = PrettyPrinter()

# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/pinecone.html?highlight=pinecone

In [None]:
class TestModel():
    def __init__(self):
        # llm = OpenAI()
       
        self.llm_model_id = "gpt2"
        # This model is pretty bad but using it for tests because it is free and
        # relatively small.

        # model_id = "decapoda-research/llama-7b-hf"
        # model_id = "decapoda-research/llama-13b-hf"

        self.model = AutoModelForCausalLM.from_pretrained(
            self.llm_model_id,
            device_map='auto',
            torch_dtype=torch.float16,
            local_files_only=True)

        self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model_id,
                                                       local_files_only=True)

        self.pipe = pipeline("text-generation",
                             model=self.model,
                             tokenizer=self.tokenizer,
                             max_new_tokens=16,
                             device_map="auto",
                             early_stopping=True)

        self.llm = HuggingFacePipeline(pipeline=self.pipe)

        self.memory=ConversationBufferWindowMemory(k=2)

        template = """Q: {question} A:"""
        self.prompt = PromptTemplate(template=template, input_variables=["question"])
        self.llm_chain = LLMChain(prompt=self.prompt, llm=self.llm, verbose=True, memory=self.memory)


t = TestModel()

In [None]:
from tru_chain import TruChain, Record

In [None]:
db = TinyDB("db.json")
db.drop_table("records")
tc = TruChain(t.llm_chain, db=db)

tc.model

In [None]:
tc("hello there")
tc("hello there general kanobi")

In [None]:
tc.records

In [None]:
tc.select(
    Record.chain.prompt.template,
    Record.chain.llm._call.input.prompt,
    Record.chain._call.input.inputs.question,
    Record.chain._call.output.text,
    where=Record.chain._call.output.text != None
)

In [None]:
template = """Q: {question} A:"""
prompt = PromptTemplate(template=template,
                        input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2,
                            input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm)

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2],
                                    input_key="question",
                                    output_key="answer")

tc = TruChain(seq_chain, db=db)
tc.model

In [None]:
tc("hello there")
tc("hello there mister bond")

In [None]:
tc.records

In [None]:
tc.select(Record.chain.chains[1]._call[1])

In [None]:
tc.model

# Notes

1. Langchain does not have support for classification models: https://python.langchain.com/en/latest/modules/models.html

    - Will have to figure out out-of-band retrieval and execution of feedback models that are not LLM's.

2. Can add steps to chain to capture text at various points in a chain: https://python.langchain.com/en/latest/reference/modules/chains.html#langchain.chains.SequentialChain .


# Links

- https://huggingface.co/docs/transformers/v4.28.1/en/model_doc/llama#transformers.LlamaForCausalLM

- https://huggingface.co/docs/transformers/main_classes/text_generation


# Pinecone



In [None]:
from slackbot import obj
import langchain
import dill

In [None]:
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)

verb = True

template = """Q: {question} A:"""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm, verbose=verb)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2, input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm, verbose=verb)

# print(llm_chain.run(question="What is the average air speed velocity of a laden swallow?"))

print(llm_chain_2.run(sentence="How are you doing?"))

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2], input_key="question", output_key="answer")
seq_chain.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2 = TruChain(seq_chain)

In [None]:
seq_chain.run(question="What is the average air speed velocity of a laden swallow? again")
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
for r in tru_chain_2.records:
    print(pp.pformat(r))

In [None]:
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2.model

# TruBot testing

In [131]:
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory, ConversationSummaryBufferMemory
from langchain.chains import ConversationChain

pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENV  # next to api key in console
)

index_name = "llmdemo"

verb = True

embedding = OpenAIEmbeddings(model='text-embedding-ada-002')  # 1536 dims

docsearch = Pinecone.from_existing_index(
    index_name=index_name, embedding=embedding
)

llm = OpenAI(temperature=0, max_tokens=128, verbose=verb)
retriever = docsearch.as_retriever()

convos = dict()

db = TinyDB("test.records.json", default=lambda o: f"NON-SERIALIZED OBJECT: {o}")

def get_convo(cid):
    if cid in convos:
        return convos[cid]
    
    memory = ConversationSummaryBufferMemory(max_token_limit = 650, llm=llm, memory_key="chat_history", output_key='answer')
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=retriever, verbose=verb, return_source_documents=True, memory=memory, get_chat_history=lambda h : h,
        max_tokens_limit=4096
    )
    return TruChain(chain, db=db)

c1 = get_convo("piotrm")
#chain = ConversationChain(
#    llm=llm, memory=memory, verbose=verb
#)



In [132]:
c1.chain.max_tokens_limit

4096

In [None]:
c1.dict(json=True)

In [133]:
ret = c1(dict(question="What is QII?"))
# chain.predict(input="Who is Piotr?")

Calling wrapped chain.


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

uence (QII)ative Input In
As stressed in the previous section, at the heart of our framework developed in this paper is an
uence (QII) approach. Weon approach - the Quantitative Input In
outline it in this section, before moving to combining it with other approaches. Traditionally,
uence measures have been studied for feature selection, i.e. informing the choice of which
uence measures have been used as explain-cently, in
uence measures explain the behavior ofex models. In
models by indicating the relative importance of inputs and their direction. While the space
uence measures is quite large, we point out two requirements that they need
to satisfy: (i) taking into acco

In [134]:
ret

{'question': 'What is QII?',
 'chat_history': '',
 'answer': ' QII stands for Quantitative Input Influence and is a measure of the influence of a set of inputs on a quantity of interest. It is used to explain the behavior of models by indicating the relative importance of inputs and their direction.',
 'source_documents': [Document(page_content='2.2 Quantitative Input In\ruence (QII)\nAs stressed in the previous section, at the heart of our framework developed in this paper is an\ninstance-based explanation approach - the Quantitative Input In\ruence (QII) approach. We\noutline it in this section, before moving to combining it with other approaches. Traditionally,\nin\ruence measures have been studied for feature selection, i.e. informing the choice of which\nvariables to include in the model [8]. Recently, in\ruence measures have been used as explain-\nability mechanisms [1, 7, 9] for complex models. In\ruence measures explain the behavior of\nmodels by indicating the relative importa

In [None]:
c1.records

In [None]:
tc.model

In [None]:
memory.chat_memory.messages

In [None]:
chain.predict(input="What does he work on?")

In [None]:
memory