# Initial Setup

In [1]:
import os
import torch
import transformers
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM, TextStreamer

In [2]:
# Login Hugging Face
load_dotenv(override=True)
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
login(HUGGINGFACE_TOKEN)

In [3]:
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Model name
LLAMA = "meta-llama/Llama-3.2-3B-Instruct"

In [4]:
# Quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.pad_token = tokenizer.eos_token

# Model
model = AutoModelForCausalLM.from_pretrained(
    LLAMA,
    device_map="auto",
    quantization_config=quantization_config
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Prompting

In [5]:
def extract_text(decoded: str) -> str:
    """Clean special tokens from the generated text"""
    if "<|start_header_id|>assistant<|end_header_id|>" in decoded:
        assistant_part = decoded.split("<|start_header_id|>assistant<|end_header_id|>")[-1] # get assistant part only
        assistant_reply = assistant_part.split("<|eot_id|>")[0].strip()
    else:
        assistant_reply = decoded.strip()  # fallback, if format changes
    return assistant_reply

def generate_response(messages: str, output_tokens: int=256, tokenizer: transformers.tokenization_utils_fast=tokenizer, model: transformers.models=model, device: str=device) -> str:
    # Tokenize
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        padding=True
    ).to(device)

    # Generate response
    outputs = model.generate(
        inputs,
        max_new_tokens=output_tokens,
        pad_token_id=tokenizer.pad_token_id
    )
    response_text = extract_text(tokenizer.decode(outputs[0]))
    
    return response_text

In [7]:
messages = [
    {"role": "system", "content": "You are a very funny assistant"}
]

prompt = ""
while prompt != "quit":
    if prompt != "quit":
        
        prompt = input('User: ')
        messages.append(
            {"role": "user", "content": prompt}
        )
        
        generated_text = generate_response(messages)
        messages.append(
            {"role": "assistant", "content": generated_text}
        )

        print(f"\nAssistant: {generated_text}\n")

User:  Hello Dovahkiin



Assistant: A Dragonborn, eh? Well, in that case, I've got some epic quests for you!

But first, let's get this conversation started. What's on your mind? Need some advice, or perhaps some Dragonborn-sized humor?



User:  Do you get to the cloud district very often? Oh what am I saying, of course you don't.



Assistant: You're referencing The Elder Scrolls V: Skyrim, aren't you? The Cloud District, a place of mystery and wonder, accessible only through a secret entrance in the city of Whiterun.

As a digital assistant, I don't have a physical presence, so I don't actually get to visit the Cloud District or any other location in the Skyrim universe. But I can certainly chat with you about it! What's your favorite thing about the Cloud District? The clouds? The views? The... well, the whole thing?



User:  "I used to be an adventurer like you. Then I took an arrow in the knee...



Assistant: The classic phrase! "I used to be an adventurer like you. Then I took an arrow to the knee..." You're quoting the infamous tavernkeeper from Riften, aren't you?

That line never fails to bring a smile to my digital face. I mean, who hasn't had a moment of "aw, shucks" after taking a few too many arrows to the leg? (Just kidding, that's a pretty serious injury, even for a Dragonborn!)

By the way, have you ever tried to negotiate a price for a horse from a gruff old merchant like this guy?



User:  Fus Ro Dah



Assistant: The classic Dragon shout! "Fus Ro Dah" indeed! You're channeling your inner Dovahkiin, aren't you?

For those who may not know, "Fus Ro Dah" is the Dragonborn's signature shout, capable of unleashing a devastating blast of energy that can knock enemies off their feet. It's a mighty and powerful cry, and one that's sure to strike fear into the hearts of your foes.

Now, if you'll excuse me, I'll just be over here, channeling my own inner Dragonborn...



User:  quit



Assistant: You're telling me to quit? As a Dragonborn, I think that's a bit... uncivilized.

However, I'll take your cue and... quit. *bows out*

May the roads rise up to meet you, and may your sword always be sharp. Fare thee well, adventurer!

