### Necessary imports

In [None]:
!pip install -q -U torch datasets tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7
!pip install -U git+https://github.com/huggingface/transformers.git
!pip install -U git+https://github.com/huggingface/accelerate.git

In [2]:
import os
import torch
from transformers import (
  AutoTokenizer,
  AutoModelForCausalLM,
  BitsAndBytesConfig,
  pipeline
)

from transformers import BitsAndBytesConfig

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

import nest_asyncio
import transformers

In [None]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [4]:
from langchain.llms import HuggingFaceHub
from google.colab import userdata
import os

os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get('HUGGINGFACEHUB_API_TOKEN')

model = HuggingFaceHub(repo_id=model_name, model_kwargs={
    # "tokenizer":tokenizer,
    "temperature":0.01,"repetition_penalty":1.1,"return_full_text":True,"max_new_tokens":1000})

  warn_deprecated(


In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!npx playwright install

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.vectorstores import FAISS
import nest_asyncio

nest_asyncio.apply()

articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
            "https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
            "https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
            "https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
            "https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))


# Connect query to FAISS index using a retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}
)

In [10]:
query = "What did Laporta say?"
docs = db.similarity_search(query)
print(docs[0].page_content)

. I tapped LaPorta due to a more favorable matchup.


In [71]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

prompt_template = """
### [INST]
Instruction: Answer the question based on your
fantasy football knowledge. Here is context to help:

{context}

### QUESTION:
{question}

[/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=model, prompt=prompt)

In [72]:
llm_chain.invoke({"context":"",
                  "question": "Should I pick up Alvin Kamara for my fantasy team?"})

{'context': '',
 'question': 'Should I pick up Alvin Kamara for my fantasy team?',
 'text': '\n### [INST]\nInstruction: Answer the question based on your\nfantasy football knowledge. Here is context to help:\n\n\n\n### QUESTION:\nShould I pick up Alvin Kamara for my fantasy team?\n\n[/INST]\n \nBased on your fantasy football knowledge, it depends on several factors such as the specific league rules, roster requirements, and the current performance of Alvin Kamara. However, in general, Alvin Kamara can be a valuable addition to any fantasy football team due to his versatility and potential for high production. He is a dual-threat running back who can also catch passes, making him a valuable asset in PPR (Punt Returner) leagues. Additionally, he has shown consistency in his performance over the past few seasons, with multiple top-24 finishes in standard leagues. Therefore, if you have an open spot on your roster and are looking for a reliable player to add depth to your offense, Alvin Ka

In [34]:
standalone_query_generation_llm = HuggingFaceHub(repo_id=model_name, model_kwargs={
    "temperature":0.01,"repetition_penalty":1.1,"return_full_text":True,"max_new_tokens":1000})

response_generation_llm = HuggingFaceHub(repo_id=model_name, model_kwargs={
    "temperature":0.01,"repetition_penalty":1.1,"return_full_text":True,"max_new_tokens":1000})

In [63]:
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate
_template = """
[INST]
Given the following conversation and a follow up question,
rephrase the follow up question to be a standalone question, in its original language,
that can be used to query a FAISS index. This query will be used to retrieve documents with additional context.

Let me share a couple examples.

If you do not see any chat history, you MUST return the "Follow Up Input" as is:
```
Chat History:
Follow Up Input: How is Lawrence doing?
Standalone Question:
How is Lawrence doing?
```

If this is the second question onwards, you should properly rephrase the question like this:
```
Chat History:
Human: How is Lawrence doing?
AI:
Lawrence is injured and out for the season.
Follow Up Input: What was his injury?
Standalone Question:
What was Lawrence's injury?
```

Now, with those examples, here is the actual chat history and input question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
[your response here]
[/INST]
"""

STANDALONE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [64]:
from langchain.schema import format_document
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.runnables import RunnableParallel
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.memory import ConversationBufferMemory
from operator import itemgetter


# Instantiate ConversationBufferMemory
memory = ConversationBufferMemory(
 return_messages=True, output_key="answer", input_key="question"
)
# First, load the memory to access chat history
loaded_memory = RunnablePassthrough.assign(
 chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Define the standalone_question step to process the question and chat history
standalone_question = {
 "standalone_question": {
 "question": lambda x: x["question"],
 "chat_history": lambda x: get_buffer_string(x["chat_history"]),
 }
 | STANDALONE_QUESTION_PROMPT,
}
# Finally, output the result of the CONDENSE_QUESTION_PROMPT
output_prompt = {
 "standalone_question_prompt_result": itemgetter("standalone_question"),
}
# Combine the steps into a final chain
standalone_query_generation_prompt = loaded_memory | standalone_question | output_prompt

In [65]:
from langchain.memory import ConversationBufferMemory

# Instantiate ConversationBufferMemory
memory = ConversationBufferMemory(
 return_messages=True, output_key="answer", input_key="question"
)
# First, load the memory to access chat history
loaded_memory = RunnablePassthrough.assign(
 chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

In [69]:
inputs = {"question": "how is mahomes doing?"}
memory.save_context(inputs, {"answer": "mahomes is not looking great! bench him!"})

In [70]:
inputs = {"question": "who should I replace him with?"}
standalone_query_generation_prompt.invoke(inputs)['standalone_question_prompt_result']

StringPromptValue(text='\n[INST]\nGiven the following conversation and a follow up question,\nrephrase the follow up question to be a standalone question, in its original language,\nthat can be used to query a FAISS index. This query will be used to retrieve documents with additional context.\n\nLet me share a couple examples.\n\nIf you do not see any chat history, you MUST return the "Follow Up Input" as is:\n```\nChat History:\nFollow Up Input: How is Lawrence doing?\nStandalone Question:\nHow is Lawrence doing?\n```\n\nIf this is the second question onwards, you should properly rephrase the question like this:\n```\nChat History:\nHuman: How is Lawrence doing?\nAI:\nLawrence is injured and out for the season.\nFollow Up Input: What was his injury?\nStandalone Question:\nWhat was Lawrence\'s injury?\n```\n\nNow, with those examples, here is the actual chat history and input question.\nChat History:\n\nFollow Up Input: who should I replace him with?\nStandalone question:\n[your respon

In [31]:
def runnable():
  standalone_query_generation_chain = (
    loaded_memory
    | {
    "question": lambda x: x["question"],
    "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | STANDALONE_QUESTION_PROMPT
    | standalone_query_generation_llm
  )
  return standalone_query_generation_chain

inputs = {"question": "who should I replace him with?"}
runnable().invoke(inputs)

'\n[INST]\nGiven the following conversation and a follow up question,\nrephrase the follow up question to be a standalone question, in its original language,\nthat can be used to query a FAISS index. This query will be used to retrieve documents with additional context.\n\nLet me share a couple examples.\n\nIf you do not see any chat history, you MUST return the "Follow Up Input" as is:\n```\nChat History:\nFollow Up Input: How is Lawrence doing?\nStandalone Question:\nHow is Lawrence doing?\n```\n\nIf this is the second question onwards, you should properly rephrase the question like this:\n```\nChat History:\nHuman: How is Lawrence doing?\nAI:\nLawrence is injured and out for the season.\nFollow Up Input: What was his injury?\nStandalone Question:\nWhat was Lawrence\'s injury?\n```\n\nNow, with those examples, here is the actual chat history and input question.\nChat History:\nHuman: how is mahomes doing?\nAI: mahomes is not looking great! bench him!\nFollow Up Input: who should I re

In [50]:
template = """
[INST]
Answer the question based only on the following context:
{context}

Question: {standalone_question}
[/INST]
"""

RESPONSE_PROMPT = PromptTemplate.from_template(template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
def _combine_documents(
 docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
 doc_strings = [format_document(doc, document_prompt) for doc in docs]
 return document_separator.join(doc_strings)

# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
 chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | STANDALONE_QUESTION_PROMPT
    | standalone_query_generation_llm,
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "standalone_question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "standalone_question": itemgetter("standalone_question"),
}

# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | RESPONSE_PROMPT | response_generation_llm,
    "standalone_question": itemgetter("standalone_question"),
    "context": final_inputs["context"]
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [51]:
inputs = {"question": "How is Mahomes doing?"}
result = final_chain.invoke(inputs)
result

{'answer': '\n[INST]\nAnswer the question based only on the following context:\nCurrent Article\n\n|\n\n__2 min read\n\nCurrent Article\n\n|\n\n__3 min read\n\nCurrent Article\n\n|\n\n__3 min read\n\nCurrent Article\n\n|\n\n__3 min read\n\nQuestion: \n[INST]\nGiven the following conversation and a follow up question,\nrephrase the follow up question to be a standalone question, in its original language,\nthat can be used to query a FAISS index. This query will be used to retrieve documents with additional context.\n\nLet me share a couple examples.\n\nIf you do not see any chat history, you MUST return the "Follow Up Input" as is:\n```\nChat History:\nFollow Up Input: How is Lawrence doing?\nStandalone Question:\nHow is Lawrence doing?\n```\n\nIf this is the second question onwards, you should properly rephrase the question like this:\n```\nChat History:\nHuman: How is Lawrence doing?\nAI:\nLawrence is injured and out for the season.\nFollow Up Input: What was his injury?\nStandalone Q

In [None]:
# Save previous question and answer to memory
memory.save_context(inputs, {"answer": result["answer"]})

inputs = {"question": "Who are good alternatives to him right now?"}
result = final_chain.invoke(inputs)
result

In [73]:
standalone_query_generation_llm = HuggingFaceHub(repo_id=model_name, model_kwargs={
    "temperature":0.01,"repetition_penalty":1.1,
    # "return_full_text":True,
    "max_new_tokens":1000
    })

response_generation_llm = HuggingFaceHub(repo_id=model_name, model_kwargs={
    "temperature":0.01,"repetition_penalty":1.1,
    # "return_full_text":True,
    "max_new_tokens":1000
    })
# Instantiate ConversationBufferMemory
memory = ConversationBufferMemory(
 return_messages=True, output_key="answer", input_key="question"
)
loaded_memory = RunnablePassthrough.assign(
 chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | STANDALONE_QUESTION_PROMPT
    | standalone_query_generation_llm,
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "standalone_question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "standalone_question": itemgetter("standalone_question"),
}
answer = {
    "answer": final_inputs | RESPONSE_PROMPT | response_generation_llm
}
final_chain = loaded_memory | standalone_question | retrieved_documents | answer
inputs = {"question": "Should I start Gibbs next week for fantasy?"}
result = final_chain.invoke(inputs)
result

{'answer': '\n[INST]\nAnswer the question based only on the following context:\nCurrent Article\n\n|\n\n__2 min read\n\nCurrent Article\n\n|\n\n__3 min read\n\nCurrent Article\n\n|\n\n__3 min read\n\nCurrent Article\n\n|\n\n__3 min read\n\nQuestion: \n[INST]\nGiven the following conversation and a follow up question,\nrephrase the follow up question to be a standalone question, in its original language,\nthat can be used to query a FAISS index. This query will be used to retrieve documents with additional context.\n\nLet me share a couple examples.\n\nIf you do not see any chat history, you MUST return the "Follow Up Input" as is:\n```\nChat History:\nFollow Up Input: How is Lawrence doing?\nStandalone Question:\nHow is Lawrence doing?\n```\n\nIf this is the second question onwards, you should properly rephrase the question like this:\n```\nChat History:\nHuman: How is Lawrence doing?\nAI:\nLawrence is injured and out for the season.\nFollow Up Input: What was his injury?\nStandalone Q

In [59]:
mistral_llm = HuggingFaceHub(repo_id=model_name, model_kwargs={
    "temperature":0.01,"repetition_penalty":1.1,
    # "return_full_text":True,"max_new_tokens":1000
    })

prompt_template = """
### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("Should I start Gibbs next week for fantasy?")

In [60]:
result['text']

"\n### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:\n\n[Document(page_content='This week, Harris faces the bottom-of-the-barrel Packers’ run defense that\\nallows the ninth-most fantasy points per game to the running back position.\\nHarris will give you a higher-volume RB with a low rostership percentage this\\nweek.', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/'}), Document(page_content='could start cutting into his workload. Furthermore, his rest of the season\\nschedule isn’t fantasy-friendly. Try to flip Edwards and a WR3 for Kenneth\\nWalker or Tony Pollard', metadata={'source': 'https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/'}), Document(page_content='“**Gus Edwards** has been on fire lately. He is the RB1 over the past three\\nweeks, averaging 22.2 half-point PPR fantasy points and two rushi