In [None]:
%load_ext autoreload
%autoreload 2

from keys import *
from pathlib import Path
from urllib.parse import urlparse
from tinydb import TinyDB

import pinecone
import requests
from langchain import LLMChain, PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)
from langchain.document_loaders import (PagedPDFSplitter, TextLoader,
                                        UnstructuredHTMLLoader,
                                        UnstructuredMarkdownLoader,
                                        UnstructuredPDFLoader)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import HuggingFacePipeline, OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

from keys import HUGGINGFACE_HEADERS
# from slackbot import obj

from pprint import PrettyPrinter
pp = PrettyPrinter()

# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/pinecone.html?highlight=pinecone

In [None]:
class TestModel():
    def __init__(self):
        # llm = OpenAI()
       
        self.llm_model_id = "gpt2"
        # This model is pretty bad but using it for tests because it is free and
        # relatively small.

        # model_id = "decapoda-research/llama-7b-hf"
        # model_id = "decapoda-research/llama-13b-hf"

        self.model = AutoModelForCausalLM.from_pretrained(
            self.llm_model_id,
            device_map='auto',
            torch_dtype=torch.float16,
            local_files_only=True)

        self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model_id,
                                                       local_files_only=True)

        self.pipe = pipeline("text-generation",
                             model=self.model,
                             tokenizer=self.tokenizer,
                             max_new_tokens=16,
                             device_map="auto",
                             early_stopping=True)

        self.llm = HuggingFacePipeline(pipeline=self.pipe)

        self.memory=ConversationBufferWindowMemory(k=2)

        template = """Q: {question} A:"""
        self.prompt = PromptTemplate(template=template, input_variables=["question"])
        self.llm_chain = LLMChain(prompt=self.prompt, llm=self.llm, verbose=True, memory=self.memory)


t = TestModel()

In [18]:
from tru_chain import TruChain
from tru_db import Record, TruTinyDB

In [None]:
Record.chain.exists()

In [19]:
tc = TruChain(t.llm_chain, db=TruTinyDB("temp.json"))

tc.model



{'memory': None,
 'verbose': False,
 'chain': {'memory': {'chat_memory': {'messages': [{'content': 'hello there',
      'additional_kwargs': {}},
     {'content': ' [laughing]\n\nDRAWING: [laughing] [l',
      'additional_kwargs': {}},
     {'content': 'hello there general kanobi', 'additional_kwargs': {}},
     {'content': ' Well thank You! Hm.\n\nQ: hello there katak',
      'additional_kwargs': {}}]},
   'output_key': None,
   'input_key': None,
   'return_messages': False,
   'human_prefix': 'Human',
   'ai_prefix': 'AI',
   'memory_key': 'history',
   'k': 2},
  'verbose': True,
  'prompt': {'input_variables': ['question'],
   'output_parser': None,
   'partial_variables': {},
   'template': 'Q: {question} A:',
   'template_format': 'f-string',
   'validate_template': True,
   '_type': 'prompt'},
  'llm': {'model_id': 'gpt2',
   'model_kwargs': None,
   '_type': 'huggingface_pipeline'},
  'output_key': 'text',
  '_type': 'llm_chain'},
 'model_name': 'model_hash_a3f90d6f479823ee60d

In [20]:
tc("hello there")
tc("hello there general kanobi")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Calling wrapped chain.


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mQ: hello there A:[0m

[1m> Finished chain.[0m
Wrote 1 record(s) to <tru_db.TruTinyDB object at 0x7f197f57b100>.
Calling wrapped chain.


[1m> Entering new LLMChain chain...[0m


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt after formatting:
[32;1m[1;3mQ: hello there general kanobi A:[0m

[1m> Finished chain.[0m
Wrote 1 record(s) to <tru_db.TruTinyDB object at 0x7f197f57b100>.


{'question': 'hello there general kanobi',
 'history': 'Human: hello there general kanobi\nAI:  Well thank You! Hm.\n\nQ: hello there katak\nHuman: hello there\nAI:  hello there B: welcome your fellow workers! A: hello there C: what',
 'text': ' aww i think there is a link A: there so A: and i'}

In [30]:
tc.db.select(Record.chain._call.input.inputs.question, Record.chain._call.output.text)

Unnamed: 0,Record.chain._call.input.inputs.question,Record.chain._call.output.text
0,hello there,hello there B: welcome your fellow workers! A...
1,hello there general kanobi,aww i think there is a link A: there so A: and i


In [None]:
import json
json.dumps(TinyDB, default=lambda o: "lol")

In [None]:
tc.select(
    Record.chain.prompt.template,
    Record.chain.llm._call.input.prompt,
    Record.chain._call.input.inputs.question,
    Record.chain._call.output.text,
    where=Record.chain._call.output.text != None
)

In [None]:
template = """Q: {question} A:"""
prompt = PromptTemplate(template=template,
                        input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2,
                            input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm)

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2],
                                    input_key="question",
                                    output_key="answer")

tc = TruChain(seq_chain, db=db)
tc.model

In [None]:
tc("hello there")
tc("hello there mister bond")

In [None]:
tc.records

In [None]:
tc.select(Record.chain.chains[1]._call[1])

In [None]:
tc.model

# Notes

1. Langchain does not have support for classification models: https://python.langchain.com/en/latest/modules/models.html

    - Will have to figure out out-of-band retrieval and execution of feedback models that are not LLM's.

2. Can add steps to chain to capture text at various points in a chain: https://python.langchain.com/en/latest/reference/modules/chains.html#langchain.chains.SequentialChain .


# Links

- https://huggingface.co/docs/transformers/v4.28.1/en/model_doc/llama#transformers.LlamaForCausalLM

- https://huggingface.co/docs/transformers/main_classes/text_generation


# Pinecone



In [None]:
from slackbot import obj
import langchain
import dill

In [None]:
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)

verb = True

template = """Q: {question} A:"""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm, verbose=verb)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2, input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm, verbose=verb)

# print(llm_chain.run(question="What is the average air speed velocity of a laden swallow?"))

print(llm_chain_2.run(sentence="How are you doing?"))

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2], input_key="question", output_key="answer")
seq_chain.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2 = TruChain(seq_chain)

In [None]:
seq_chain.run(question="What is the average air speed velocity of a laden swallow? again")
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
for r in tru_chain_2.records:
    print(pp.pformat(r))

In [None]:
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2.model

# TruBot testing

In [None]:
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory, ConversationSummaryBufferMemory
from langchain.chains import ConversationChain

pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENV  # next to api key in console
)

index_name = "llmdemo"

verb = True

embedding = OpenAIEmbeddings(model='text-embedding-ada-002')  # 1536 dims

docsearch = Pinecone.from_existing_index(
    index_name=index_name, embedding=embedding
)

llm = OpenAI(temperature=0, max_tokens=128, verbose=verb)
retriever = docsearch.as_retriever()

convos = dict()

db = TinyDB("test.records.json", default=lambda o: f"NON-SERIALIZED OBJECT: {o}")

def get_convo(cid):
    if cid in convos:
        return convos[cid]
    
    memory = ConversationSummaryBufferMemory(max_token_limit = 650, llm=llm, memory_key="chat_history", output_key='answer')
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=retriever, verbose=verb, return_source_documents=True, memory=memory, get_chat_history=lambda h : h,
        max_tokens_limit=4096
    )
    return TruChain(chain, db=db)

c1 = get_convo("piotrm")
#chain = ConversationChain(
#    llm=llm, memory=memory, verbose=verb
#)

In [None]:
c1.chain.max_tokens_limit

In [None]:
c1.dict(json=True)

In [None]:
ret = c1(dict(question="What is QII?"))
# chain.predict(input="Who is Piotr?")

In [None]:
ret

In [None]:
c1.records

In [None]:
tc.model

In [None]:
memory.chat_memory.messages

In [None]:
chain.predict(input="What does he work on?")

In [None]:
memory