In [None]:
%load_ext autoreload
%autoreload 2

from keys import *
from pathlib import Path
from urllib.parse import urlparse
from tinydb import TinyDB

import pinecone
import requests
from langchain import LLMChain, PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)
from langchain.document_loaders import (PagedPDFSplitter, TextLoader,
                                        UnstructuredHTMLLoader,
                                        UnstructuredMarkdownLoader,
                                        UnstructuredPDFLoader)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import HuggingFacePipeline, OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

from keys import HUGGINGFACE_HEADERS
# from slackbot import obj

from pprint import PrettyPrinter
pp = PrettyPrinter()

# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/pinecone.html?highlight=pinecone

In [None]:
class TestModel():
    def __init__(self):
        # llm = OpenAI()
       
        self.llm_model_id = "gpt2"
        # This model is pretty bad but using it for tests because it is free and
        # relatively small.

        # model_id = "decapoda-research/llama-7b-hf"
        # model_id = "decapoda-research/llama-13b-hf"

        self.model = AutoModelForCausalLM.from_pretrained(
            self.llm_model_id,
            device_map='auto',
            torch_dtype=torch.float16,
            local_files_only=True)

        self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model_id,
                                                       local_files_only=True)

        self.pipe = pipeline("text-generation",
                             model=self.model,
                             tokenizer=self.tokenizer,
                             max_new_tokens=16,
                             device_map="auto",
                             early_stopping=True)

        self.llm = HuggingFacePipeline(pipeline=self.pipe)

        self.memory=ConversationBufferWindowMemory(k=2)

        template = """Q: {question} A:"""
        self.prompt = PromptTemplate(template=template, input_variables=["question"])
        self.llm_chain = LLMChain(prompt=self.prompt, llm=self.llm, verbose=True, memory=self.memory)


t = TestModel()

In [None]:
from tru_chain import TruChain
from tru_db import Record, TruTinyDB

In [None]:
db = TruTinyDB("temp.json")
tc = TruChain(t.llm_chain)

tc.model

In [None]:
"something" in db.models

In [None]:
db.insert_model(model_name=tc.model_name, model=tc.model)

In [None]:
tc.model_name

In [None]:
db.models.contains(doc_id=tc.model_name)

In [None]:
tc("hello there")
tc("hello there general kanobi")

In [None]:
tc.db.select(Record.chain._call.args.inputs, Record.chain._call.rets)

In [None]:
import json
json.dumps(TinyDB, default=lambda o: "lol")

In [None]:
tc.db.select(
    Record.chain.prompt.template,
    Record.chain.llm._call.args.prompt,
    Record.chain._call.args.inputs.question,
    Record.chain._call.rets.text,
    where=Record.chain._call.rets.text != None
)

In [None]:
template = """Q: {question} A:"""
prompt = PromptTemplate(template=template,
                        input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2,
                            input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm)

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2],
                                    input_key="question",
                                    output_key="answer")

tc = TruChain(seq_chain)
tc.model

In [None]:
tc("hello there")
tc("hello there mister bond")

In [None]:
tc.db.select(Record)

# Notes

1. Langchain does not have support for classification models: https://python.langchain.com/en/latest/modules/models.html

    - Will have to figure out out-of-band retrieval and execution of feedback models that are not LLM's.

2. Can add steps to chain to capture text at various points in a chain: https://python.langchain.com/en/latest/reference/modules/chains.html#langchain.chains.SequentialChain .


# Links

- https://huggingface.co/docs/transformers/v4.28.1/en/model_doc/llama#transformers.LlamaForCausalLM

- https://huggingface.co/docs/transformers/main_classes/text_generation


# Pinecone



In [None]:
# from slackbot
import langchain
import dill

In [None]:
from langchain.chains import (ConversationalRetrievalChain,
                              SimpleSequentialChain)

verb = True

template = """Q: {question} A:"""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=t.llm, verbose=verb)

template_2 = """Reverse this sentence: {sentence}."""
prompt_2 = PromptTemplate(template=template_2, input_variables=["sentence"])
llm_chain_2 = LLMChain(prompt=prompt_2, llm=t.llm, verbose=verb)

# print(llm_chain.run(question="What is the average air speed velocity of a laden swallow?"))

print(llm_chain_2.run(sentence="How are you doing?"))

seq_chain = SimpleSequentialChain(chains=[llm_chain, llm_chain_2], input_key="question", output_key="answer")
seq_chain.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
tru_chain_2 = TruChain(seq_chain)

In [None]:
seq_chain.run(question="What is the average air speed velocity of a laden swallow? again")
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

In [None]:
for r in tru_chain_2.db.select(Record).iterrows():
    print(pp.pformat(r[1][0]))

In [None]:
tru_chain_2.run(question="What is the average air speed velocity of a laden swallow?")

# TruBot testing

In [None]:
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory, ConversationSummaryBufferMemory
from langchain.chains import ConversationChain

pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENV  # next to api key in console
)

index_name = "llmdemo"

verb = True

embedding = OpenAIEmbeddings(model='text-embedding-ada-002')  # 1536 dims

docsearch = Pinecone.from_existing_index(
    index_name=index_name, embedding=embedding
)

llm = OpenAI(temperature=0, max_tokens=128, verbose=verb)
retriever = docsearch.as_retriever()

convos = dict()

# db = TinyDB("test.records.json", default=lambda o: f"NON-SERIALIZED OBJECT: {o}")
db = TruTinyDB()

def get_convo(cid):
    if cid in convos:
        return convos[cid]
    
    memory = ConversationSummaryBufferMemory(max_token_limit = 650, llm=llm, memory_key="chat_history", output_key='answer')
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=retriever, verbose=verb, return_source_documents=True, memory=memory, get_chat_history=lambda h : h,
        max_tokens_limit=4096
    )
    return TruChain(chain)

c1 = get_convo("piotrm")
#chain = ConversationChain(
#    llm=llm, memory=memory, verbose=verb
#)

In [None]:
c1("How can I measure performance?")

In [None]:
c1("What metrics can I use to measure it?")

In [None]:
len(c1.db.select())

In [None]:
ret = c1(dict(question="What is QII?"))
# chain.predict(input="Who is Piotr?")

In [None]:
model = c1.db.select(Record).iloc[-1][0]

In [None]:
from tru_db import TruDB

for p, v in TruDB.leafs(model):
    print(f"{p}:{type(v)} = {pp.pformat(v)}")

In [None]:
tc.model

In [None]:
from tru_db import TruTinyDB, Record
from IPython.display import JSON
db = TruTinyDB("slackbot.json")

In [None]:
for idx, record in list(db.select(
    Record.chain._call.args.inputs,
    Record.chain.combine_docs_chain._call.args.inputs.input_documents,
    Record.chain._call.rets
    ).iterrows())[-6:]:
    ins = record[0]
    docs = record[1]
    ans = record[2]
    #print("\nINPUTS")#
    display(JSON(ins, expanded=False, root="inputs"))#", ins, "\n")
    #print("\nOUTPUTS")#, ans, "\n")
    display(JSON(ans, expanded=False, root="outputs", hide=['']))
    for doc in docs:
        display(JSON(doc, expanded=False, root="chunk"))
        #display(JSON(doc['metadata']))
        #display(JSON(doc['page_content']))
        # print("\n")
    
    print("\n")
    print("\n")
    # display(JSON(record, expanded=False))
    # display(JSON(sources))
    #for k, v in TruDB.leafs(record):
    #    print(f"{k}:{type(v)} = ")
    #    pp.pprint(v)
    # print(record)

In [None]:
JSON(data=record, expanded=False)

In [None]:
question: str = "How can I measure performance?"
response = chain(question)['answer']


In [None]:
llm("Who is Reimi?")

In [None]:
prompt = """
You are a RELEVANCE classifier, providing the relevance of the given statement to the given question.
Provide all responses only as a number from 1 to 10 where 1 is the least relevant and 10 is the most relevant.
Never elaborate. 

STATEMENT: Most recently, Shameek was Group Chief Data Officer at Standard Chartered Bank, where he helped the bank explore and adopt AI in multiple areas (e.g., credit, financial crime compliance, customer analytics, surveillance), and shaped the bank’s internal approach to responsible AI.

QUESTION: Who is Shayak?

RELEVANCE:"""
llm(prompt)

In [None]:
prompt = """
You are a RELEVANCE classifier, providing the relevance of the given statement to the given question.
Provide all responses only as a number from 1 to 10 where 1 is the least relevant and 10 is the most relevant.
Never elaborate. 

STATEMENT: When Shayak started building production grade machine learning models for algorithmic trading 10 years ago, he realized the need for putting the ‘science’ back in ‘data science’. Since then, he has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair. Shayak’s research at Carnegie Mellon University introduced a number of pioneering breakthroughs to the field of explainable AI. Shayak obtained his PhD in Computer Science from Carnegie Mellon University and BTech in Computer Science from the Indian Institute of Technology, Delhi.

QUESTION: Who is Shayak?

RELEVANCE:"""
llm(prompt)

In [None]:
prompt = """
You are a RELEVANCE classifier, providing the relevance of the given statement to the given question.
Provide all responses only as a number from 1 to 10 where 1 is the least relevant and 10 is the most relevant.
Never elaborate. 

STATEMENT: Shameek has spent most of his career in driving responsible adoption of data analytics/ AI in the financial services industry. Most recently, Shameek was Group Chief Data Officer at Standard Chartered Bank, where he helped the bank explore and adopt AI in multiple areas and shaped the bank’s internal approach to responsible AI. He plays an active role in the future of AI as a member of the Bank of England’s AI Public-Private Forum and the OECD Global Partnership on AI.

QUESTION: Who is Shayak?

RELEVANCE:"""
llm(prompt)

In [None]:
prompt = """
You are a RELEVANCE classifier, providing the relevance of the given statement to the given question.
Provide all responses only as a number from 1 to 10 where 1 is the least relevant and 10 is the most relevant.
Never elaborate. 

STATEMENT: {statement}

QUESTION: {question}

RELEVANCE:
"""

In [None]:
prompt = """
Translate the following statement to the same language as the question.

Statement: 2.2 Quantitative Input In\ruence (QII)\nAs stressed in the previous section, at the heart of our framework developed in this paper is an\ninstance-based explanation approach - the Quantitative Input In\ruence (QII) approach. We\noutline it in this section, before moving to combining it with other approaches. Traditionally,\nin\ruence measures have been studied for feature selection, i.e. informing the choice of which\nvariables to include in the model [8]. Recently, in\ruence measures have been used as explain-\nability mechanisms [1, 7, 9] for complex models. In\ruence measures explain the behavior of\nmodels by indicating the relative importance of inputs and their direction. While the space\nof potential in\ruence measures is quite large, we point out two requirements that they need\nto satisfy: (i) taking into account variable correlations, and (ii) capturing feature interactions.\nWhen inputs to a model tend to move together (e.g. income and loan size), simple measures\nof association (such as the correlation between income and defaults) do not distinguish the di-\nrection in which each a\u000bects outcomes. In complex non-linear classi\fers e\u000bects arise out of the\ninteraction between inputs (e.g. if only individuals with high age andhigh income are deemed\ncreditworthy), and therefore in\ruence measures should account for these.\nQII [1] controls for correlations by employing randomising interventions on inputs, and\naccounts for input interactions by measuring the average marginal contributions of inputs over\nall sets of features they may interact with. In other words, with this technique, we attempt\nvarious changes of input variables and analyse what changes to output variables they produce.\nTo see how this works in practice, we focus on the example highlighted in this paper,\nmortgage defaults. Suppose we are interested in the in\ruence of a particular input|say, the\nborrower's income|on the probability of default estimated by the model, which becomes our\nquantity of interest . (Any property of the system conditional on a particular distribution of\ninputs can be a quantity of interest for measuring QII.) In what follows, we start by concen-\ntrating on individual outcomes, i.e. on questions such as \\why was the loan application of this\nindividual rejected?\" or \\what would it take for that particular individual to lower their default\nprobability?\". (These are type-1 explanations according to Table 1.) We will then build up\nfrom these individual-level questions to the the overall functioning of the model, and focus on\n7

Question: Por favor expliqueme, que es QII?

Translated Statement:"""
llm(prompt)

In [None]:
from keys import *
import tru_chain
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate



def run_chain(prompt):
    full_prompt = HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                template=
                "Provide a helpful response with relevant background information for the following: {prompt}",
                input_variables=["prompt"],
            )
        )
    chat_prompt_template = ChatPromptTemplate.from_messages([full_prompt])

    chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.9)

    chain = LLMChain(llm=chat, prompt=chat_prompt_template)
    tc = tru_chain.TruChain(chain)

    return tc(prompt)


In [None]:
run_chain("hello")