In [1]:
import os
import torch
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.llms import HuggingFacePipeline
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    pipeline,
)

  from .autonotebook import tqdm as notebook_tqdm


Examples from this Youtube video: https://www.youtube.com/watch?v=Kn7SX2Mx_Jk

## Using HuggingFaceHub

In [None]:
#
# First, set up a prompt template.
#
template = """Question: {question}

Answer: Let's think step by step."""

prompt_tpl = PromptTemplate(
    template=template,
    input_variables=["question"],
)

In [None]:
#
# Example from video using HuggingFaceHub (NOT OFFLINE)
#
llm_chain = LLMChain(
    prompt=prompt_tpl,
    llm=HuggingFaceHub(
        repo_id="google/flan-t5-x1",
        model_kwargs={
            "temperature": 0.0,
            "max_length": 64,
        }
    )
)

## Using an Offline LLM

In [None]:
question = "What is the capital of France?"
print(llm_chain.run(question))

In [None]:
#
# TODO Look into other models that are available
#
# Flan T5 Large is an encoder and decoder model
# https://huggingface.co/google/flan-t5-large
#
model_id = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    load_in_8bit=True,  # optional, depending on GPU RAM
)

#
# Simplifies the tokenization
#
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Example WITHOUT a prompt
print(local_llm("What is the capital of France?"))

In [None]:

# Example WITH a prompt
prompt_tpl = PromptTemplate(
    template=template,
    input_variables=["question"],
)

llm_chain = LLMChain(
    prompt=prompt_tpl,
    llm=local_llm,
)
question = "What is the capital of England?"
print(llm_chain.run(question))

In [None]:
# Decoder-only model example
model_id = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# pipeline uses text generation only
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Example with blenderbot encoder-decoder model
model_id = "facebook/blenderbot-1B-distill"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
prompt_tpl = PromptTemplate(
    template=template,
    input_variables=["question"],
)

llm_chain = LLMChain(
    prompt=prompt_tpl,
    llm=local_llm,
)
question = "What area of France is best for growing wine?"
print(llm_chain.run(question))

## Embedding Models

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
hf = HuggingFaceEmbeddings(model_name=model_name)

In [None]:
hf.embed_query("What is the capital of France?")

In [None]:
hf.embed_documents(["Paris is the capital of France.", "London is the capital of England."])