In [1]:
%load_ext autoreload
%autoreload 2

from keys import *
from pathlib import Path
from urllib.parse import urlparse

import pinecone
import requests
from langchain import LLMChain, PromptTemplate
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)
from langchain.document_loaders import (PagedPDFSplitter, TextLoader,
                                        UnstructuredHTMLLoader,
                                        UnstructuredMarkdownLoader,
                                        UnstructuredPDFLoader)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import HuggingFacePipeline, OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

from keys import HUGGINGFACE_HEADERS
from slackbot import chain

from pprint import PrettyPrinter
pp = PrettyPrinter()

# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/pinecone.html?highlight=pinecone

got OPENAI_API_KEY
got PINECONE_API_KEY
got PINECONE_ENV
got HUGGINGFACE_API_KEY
got SLACK_TOKEN
got SLACK_SIGNING_SECRET


  from tqdm.autonotebook import tqdm


In [40]:
class TestModel():
    def __init__(self):
        # llm = OpenAI()
       
        self.llm_model_id = "gpt2"
        # This model is pretty bad but using it for tests because it is free and
        # relatively small.

        # model_id = "decapoda-research/llama-7b-hf"
        # model_id = "decapoda-research/llama-13b-hf"

        self.model = AutoModelForCausalLM.from_pretrained(
            self.llm_model_id,
            device_map='auto',
            torch_dtype=torch.float16,
            local_files_only=True)

        self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model_id,
                                                       local_files_only=True)

        self.pipe = pipeline("text-generation",
                             model=self.model,
                             tokenizer=self.tokenizer,
                             max_new_tokens=16,
                             device_map="auto",
                             early_stopping=True)

        self.llm = HuggingFacePipeline(pipeline=self.pipe)

        template = """Q: {question} A:"""
        self.prompt = PromptTemplate(template=template, input_variables=["question"])
        self.llm_chain = LLMChain(prompt=self.prompt, llm=self.llm, verbose=True)

t = TestModel()

In [67]:
from tru_chain import TruChain

In [68]:
tc = TruChain(t.llm_chain)
tc("hello there")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt after formatting:
[32;1m[1;3mQ: hello there A:[0m


{'question': 'hello there',
 'text': ' hello there\n\n\nPTSD: hello there\n\n[16:39'}

In [69]:
tc.model_dict

{'cls': 'LLMChain',
 'module': 'langchain.chains.llm',
 'value': None,
 'dump': None,
 'ident': 140367896061136,
 'address': (),
 'fields': {'memory': {'cls': 'NoneType',
   'module': 'builtins',
   'value': None,
   'dump': None,
   'ident': 7653056,
   'address': ('memory',),
   'fields': None},
  'callback_manager': {'cls': 'SharedCallbackManager',
   'module': 'langchain.callbacks.shared',
   'value': <langchain.callbacks.shared.SharedCallbackManager at 0x7fa9ba4233d0>,
   'dump': None,
   'ident': 140366951101392,
   'address': ('callback_manager',),
   'fields': None},
  'verbose': True,
  'prompt': {'cls': 'PromptTemplate',
   'module': 'langchain.prompts.prompt',
   'value': None,
   'dump': None,
   'ident': 140367896058832,
   'address': ('prompt',),
   'fields': {'input_variables': ['question'],
    'partial_variables': {'cls': 'dict',
     'module': 'builtins',
     'value': {},
     'dump': None,
     'ident': 140367853876608,
     'address': ('prompt', 'partial_variables'

In [70]:
tc.records

[defaultdict(list,
             {(): [2176693/2176693 2023-05-06 20:48:51.621800 - 2023-05-06 20:48:51.756299
               input:
                 {'inputs': {'question': 'hello there'}}
               output:
                 {'text': ' hello there\n\n\nPTSD: hello there\n\n[16:39'}
               chain call stack:
                 [()]]})]

In [72]:
from tru_chain import Selection

tc._select(select=[
    Selection(param=("chain", "prompt", "template")),
    Selection(param=("chain", "llm", "model_id")),
    Selection(record=("input", "inputs", "question"))
])

{(): [{'input': {'inputs': {'question': 'hello there'}}, 'output': {'text': ' hello there\n\n\nPTSD: hello there\n\n[16:39'}, 'error': None, 'stack': [()], 'start_time': datetime.datetime(2023, 5, 6, 20, 48, 51, 621800), 'end_time': datetime.datetime(2023, 5, 6, 20, 48, 51, 756299), 'pid': 2176693, 'tid': 2176693}]}


In [66]:
tc.model

langchain.chains.llm.LLMChain (at ('chain',), loaded at 140367896061136)
  memory: builtins.NoneType (at ('chain', 'memory'), loaded at 7653056)
  callback_manager: langchain.callbacks.shared.SharedCallbackManager (at ('chain', 'callback_manager'), loaded at 140366951101392) = <langchain.callbacks.shared.SharedCallbackManager object at 0x7fa9ba4233d0>
  verbose = True
  prompt: langchain.prompts.prompt.PromptTemplate (at ('chain', 'prompt'), loaded at 140367896058832)
    input_variables = ['question']
    partial_variables: builtins.dict (at ('chain', 'prompt', 'partial_variables'), loaded at 140367853876608) = {}
    template = 'Q: {question} A:'
    template_format = 'f-string'
    validate_template = True
  llm: langchain.llms.huggingface_pipeline.HuggingFacePipeline (at ('chain', 'llm'), loaded at 140367896059936)
    verbose = False
    pipeline: transformers.pipelines.text_generation.TextGenerationPipeline (at ('chain', 'llm', 'pipeline'), loaded at 140367937543888) = <transform

In [None]:
template = """Q: {question} A:"""
prompt = PromptTemplate(template=template,
                        input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2,
                            input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm)

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2],
                                    input_key="question",
                                    output_key="answer")

tc = TruChain(seq_chain)
tc.model

In [None]:
tc("hello there")

In [None]:
tc.records

In [None]:
tc._get_obj_at_address(address=("chains", 0, ))

In [None]:
tc.model

# Notes

1. Langchain does not have support for classification models: https://python.langchain.com/en/latest/modules/models.html

    - Will have to figure out out-of-band retrieval and execution of feedback models that are not LLM's.

2. Can add steps to chain to capture text at various points in a chain: https://python.langchain.com/en/latest/reference/modules/chains.html#langchain.chains.SequentialChain .


# Links

- https://huggingface.co/docs/transformers/v4.28.1/en/model_doc/llama#transformers.LlamaForCausalLM

- https://huggingface.co/docs/transformers/main_classes/text_generation


# Pinecone



In [None]:
from slackbot import chain
import langchain
import dill

In [None]:
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)

verb = False

template = """Q: {question} A:"""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm, verbose=verb)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2, input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm, verbose=verb)

# print(llm_chain.run(question="What is the average air speed velocity of a laden swallow?"))

print(llm_chain_2.run(sentence="How are you doing?"))

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2], input_key="question", output_key="answer")
seq_chain.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2 = TruChain(seq_chain)

In [None]:
seq_chain.run(question="What is the average air speed velocity of a laden swallow? again")
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
for r in tru_chain_2.records:
    print(pp.pformat(r))

In [None]:
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2.model

In [None]:
import inspect
for frame_info in inspect.stack():
    frame = frame_info.frame
    print(frame_info.function)
    # print(frame.f_code)
    print(frame.f_locals.keys())