In [None]:
from huggingface_hub import login
login()  # paste your HF token here (https://huggingface.co/settings/tokens)
# 1. Install Unsloth and dependencies
!pip install -q unsloth accelerate datasets peft bitsandbytes trl

# 2. Load Unsloth with Meta-Llama-3 8B Instruct
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-3b-Instruct-bnb-4bit",  # Unsloth quantized
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

tokenizer.pad_token = tokenizer.eos_token

# 3. Upload your tax dataset
from google.colab import files
uploaded = files.upload()  # Upload itr1_full_finetune_dataset.jsonl

# 4. Load and prepare dataset
from datasets import load_dataset

dataset = load_dataset("json", data_files="itr1_full_finetune_dataset.jsonl", split="train")

def format_prompt(example):
    return {
        "prompt": f"<|system|>\nYou are a helpful tax advisor AI.\n<|user|>\n{example['instruction']}\n<|assistant|>\n",
        "output": example["output"]
    }

dataset = dataset.map(format_prompt)

# 5. Prepare model for LoRA fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    use_gradient_checkpointing=True,
    random_state=42,
    use_rslora=False,
    loftq_config=None,
)

# 6. Fine-tune with TRL (SFTTrainer)
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "prompt",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 3,
        learning_rate = 2e-4,
        fp16 = True,
        logging_steps = 10,
        output_dir = "llama3-taxbot-lora",
        save_strategy = "epoch",
        save_total_limit = 1,
    )
)

trainer.train()

# 7. Save fine-tuned model
model.save_pretrained("llama3-3B-taxbot-lora")
tokenizer.save_pretrained("llama3-3B-taxbot-lora")
model.save_pretrained("llama3-taxbot-lora", save_adapter=True)
from huggingface_hub import HfApi
from peft import PeftModel

api = HfApi()
api.upload_folder(
    folder_path="/content/llama3-3B-taxbot-lora",
    repo_id="ypavanr/llama3-3B-taxbot-lora",
    repo_type="model"
)


In [None]:
from transformers import AutoTokenizer
prompt = "<|system|>\nYou are a helpful tax advisor AI.\n<|user|>\nlist all the deductions\n<|assistant|>\n"

# 2. Tokenize the input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 3. Generate output
outputs = model.generate(
    **inputs,
    max_new_tokens=300,
    do_sample=True,
    temperature=0.7
)

# 4. Decode the generated output
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
if "<|assistant|>" in response:
    response = response.split("<|assistant|>")[1].split("<|user|>")[0].strip()
# 5. Print the result
print(response)
