#1. Project Environment


In [None]:
import os
from google.colab import drive

In [None]:
# 1. Mount Google Drive
# This will prompt you to authorize access to your Google Drive.
print("Mounting Google Drive...")
drive.mount('/content/drive')

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Everything will be stored under 'FinBot-Llama3' in your main Drive folder.
PROJECT_ROOT = '/content/drive/My Drive/FinBot-Llama3'

In [None]:
# Define Sub-directories Structure
# We create specific folders to keep inputs, outputs, and logs separate.
directories = {
    "data_raw": os.path.join(PROJECT_ROOT, "data/raw"),         # Original datasets (from Kaggle)
    "data_processed": os.path.join(PROJECT_ROOT, "data/processed"), # Cleaned/Synthetic data
    "models": os.path.join(PROJECT_ROOT, "models"),             # Saved QLoRA adapters/checkpoints
    "logs": os.path.join(PROJECT_ROOT, "logs"),                 # W&B logs and local execution logs
    "output": os.path.join(PROJECT_ROOT, "output")              # Final reports or generation outputs
}

# 4. Create Directories
print(f"\nSetting up project structure at: {PROJECT_ROOT}\n")

for name, path in directories.items():
    # exist_ok=True ensures we don't get errors if we run this cell multiple times
    os.makedirs(path, exist_ok=True)
    print(f"✅ Verified/Created directory: {name} -> {path}")

print("\nSUCCESS: Project environment is ready.")

#2. Library Installation & Authentication

##Installation

In [None]:
print("1. Installing necessary libraries...")

# We upgrade (-U) torch to match the latest transformers/bitsandbytes requirements.
# We include torchvision and torchaudio to prevent dependency conflicts.
!pip install -q -U torch torchvision torchaudio
!pip install -q -U transformers datasets accelerate bitsandbytes peft trl
!pip install -q -U wandb

print("✅ Libraries installed successfully.")


1. Installing necessary libraries...
✅ Libraries installed successfully.


##Hugging Face

In [None]:
import os
from huggingface_hub import login

print("\n" + "="*40)
print("👉 STEP 2.1: HUGGING FACE LOGIN")
print("Please enter your Hugging Face token below when prompted.")
print("="*40 + "\n")

# This will create an interactive input box specifically for HF
login()

print("✅ Hugging Face Login Complete.")


👉 STEP 2.1: HUGGING FACE LOGIN
Please enter your Hugging Face token below when prompted.



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

✅ Hugging Face Login Complete.


##W&B

In [None]:
import wandb
import getpass
import os

print("👉 STEP 2.2: W&B LOGIN")
print("To avoid menu errors, we will capture the key securely first.\n")

# 1. Capture the key into a variable (input will be hidden/masked)
# Paste your key (starts with 557...) and press Enter.
wb_key = getpass.getpass("Paste your W&B API Key here and press Enter: ")

# 2. Login directly using the captured key
# relogin=True ensures we force a new login attempt
try:
    wandb.login(key=wb_key, relogin=True)
    print("\n🎉 SUCCESS: W&B Login authenticated successfully.")
except Exception as e:
    print(f"\n❌ ERROR: Login failed. Details: {e}")

👉 STEP 2.2: W&B LOGIN
To avoid menu errors, we will capture the key securely first.

Paste your W&B API Key here and press Enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mtyerdogan[0m ([33mBILGEM_DCS_RL[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



🎉 SUCCESS: W&B Login authenticated successfully.


#3. Mock Fintech Backend

##MockFintechDB

In [None]:
import json
import datetime

class MockFintechDB:
    """
    A simulated database and API backend for a fintech application.
    This acts as the 'Environment' for our LLM Agent.

    Why this meets project requirements:
    1. Simulates real-world API responses (JSON format).
    2. Maintains state (balance updates, card status changes).
    3. Provides both 'Read' (balance) and 'Write' (block_card) tools.
    """

    def __init__(self):
        # Initialize with dummy data representing a specific user scenario
        self.db = {
            "users": {
                "user_123": {
                    "name": "Taha Yiğit Erdoğan",
                    "balance": 15400.50, # Float for financial precision
                    "currency": "TRY",
                    "is_active": True,
                    "cards": {
                        # Defined cards for the user
                        "card_garanti": {"status": "active", "type": "credit"},
                        "card_yapikredi": {"status": "blocked", "type": "debit"}
                    },
                    "transactions": [
                        {"id": "tx_1", "merchant": "Netflix", "amount": 199.99, "date": "2024-05-20"},
                        {"id": "tx_2", "merchant": "Migros", "amount": 1250.00, "date": "2024-05-21"},
                        {"id": "tx_3", "merchant": "Spotify", "amount": 89.99, "date": "2024-05-22"}
                    ]
                }
            }
        }
        print("✅ MockFintechDB initialized successfully.")

    def get_balance(self, user_id: str) -> str:
        """
        Tool: Fetches the current balance and currency of the user.
        Input: user_id (str)
        Output: JSON string with balance details.
        """
        user = self.db["users"].get(user_id)
        if not user:
            return json.dumps({"error": "User not found"})

        return json.dumps({
            "status": "success",
            "balance": user["balance"],
            "currency": user["currency"]
        })

    def get_transactions(self, user_id: str, count: int = 3) -> str:
        """
        Tool: Retrieves the last 'count' transactions for the user.
        Input: user_id (str), count (int)
        Output: JSON string with transaction list.
        """
        user = self.db["users"].get(user_id)
        if not user:
            return json.dumps({"error": "User not found"})

        # Get last N transactions safely
        txs = user["transactions"][-count:]
        return json.dumps({
            "status": "success",
            "transactions": txs
        })

    def block_card(self, user_id: str, card_id: str, reason: str = "lost") -> str:
        """
        Tool: Blocks a credit/debit card immediately.
        CRITICAL: This is a 'Write' operation that changes the DB state.
        Input: user_id (str), card_id (str), reason (str)
        Output: JSON string with the result of the operation.
        """
        user = self.db["users"].get(user_id)
        if not user:
            return json.dumps({"error": "User not found"})

        # Check if card exists in user's wallet
        if card_id not in user["cards"]:
            return json.dumps({"error": f"Card '{card_id}' not found in user wallet."})

        # Execute Action: Change status to 'blocked'
        user["cards"][card_id]["status"] = "blocked"

        return json.dumps({
            "status": "success",
            "message": f"Card {card_id} has been blocked successfully.",
            "reason": reason,
            "new_card_status": "blocked"
        })

# ==========================================
# SELF-CHECK: Verify the Backend Logic
# ==========================================
print("\n--- Running Backend Self-Check ---")

# 1. Instantiate the DB
fintech_system = MockFintechDB()

# 2. Test: Check Balance
print("\nTest 1: Check Balance (Should see TRY balance)")
print(fintech_system.get_balance("user_123"))

# 3. Test: Block Card
target_card = "card_garanti"
print(f"\nTest 2: Blocking Card ({target_card})")
print(fintech_system.block_card("user_123", target_card, reason="suspicious_activity"))

# 4. Test: Verify Block Status (Internal Check)
# We access the internal DB directly just to verify the state changed.
card_status = fintech_system.db["users"]["user_123"]["cards"][target_card]["status"]
print(f"\nTest 3: Internal Verification -> Status of {target_card} is now: '{card_status}'")

if card_status == "blocked":
    print("\n✅ SUCCESS: Backend logic is working correctly. State persisted.")
else:
    print("\n❌ FAILURE: Card was not blocked.")


--- Running Backend Self-Check ---
✅ MockFintechDB initialized successfully.

Test 1: Check Balance (Should see TRY balance)
{"status": "success", "balance": 15400.5, "currency": "TRY"}

Test 2: Blocking Card (card_garanti)
{"status": "success", "message": "Card card_garanti has been blocked successfully.", "reason": "suspicious_activity", "new_card_status": "blocked"}

Test 3: Internal Verification -> Status of card_garanti is now: 'blocked'

✅ SUCCESS: Backend logic is working correctly. State persisted.


# 4. Model Loading and Quantization

##Quantization Config

In [None]:
import torch
from transformers import BitsAndBytesConfig

print("Defining 4-bit Quantization Configuration...")

# We use 4-bit Normal Float (NF4) quantization which is optimal for these models.
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,              # Activate 4-bit loading
    bnb_4bit_quant_type="nf4",      # Use NF4 data type (better for weights)
    bnb_4bit_compute_dtype=torch.float16, # Calculations will still be in float16 for stability
    bnb_4bit_use_double_quant=True, # Quantize the quantization constants (extra memory saving)
)

print("✅ Configuration defined. Ready to load model.")

Defining 4-bit Quantization Configuration...
✅ Configuration defined. Ready to load model.


##Base Model Loading

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Target Model: Meta Llama 3.1 8B Instruct
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

print(f"⏳ Downloading and loading model: {model_id}...")
print("⚠️ Note: This step is heavy. It may take 2-5 minutes.")

try:
    # 1. Load Model (4-bit)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config, # Must be defined in Step 4.1
        device_map="auto",
        trust_remote_code=True
    )
    print("\n✅ SUCCESS: Llama 3.1 Model loaded into GPU memory.")

    # 2. Load Tokenizer
    print(f"⏳ Loading tokenizer for: {model_id}...")
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        trust_remote_code=True
    )

    # CRITICAL FIX for Llama 3 Training
    # Llama 3 doesn't have a default pad token, so we use the EOS token.
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right" # Fixed to right for now, SFTTrainer might handle it.

    print("✅ SUCCESS: Tokenizer loaded and configured.")
    print(f"   -> Vocab Size: {tokenizer.vocab_size}")
    print(f"   -> Pad Token: {tokenizer.pad_token}")

except Exception as e:
    print(f"\n❌ ERROR: Could not load Llama 3.1. Details: {e}")
    print("👉 If you see a 403 error, the approval might not have propagated yet. Wait 5 mins.")

⏳ Downloading and loading model: meta-llama/Meta-Llama-3.1-8B-Instruct...
⚠️ Note: This step is heavy. It may take 2-5 minutes.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]


✅ SUCCESS: Llama 3.1 Model loaded into GPU memory.
⏳ Loading tokenizer for: meta-llama/Meta-Llama-3.1-8B-Instruct...
✅ SUCCESS: Tokenizer loaded and configured.
   -> Vocab Size: 128000
   -> Pad Token: <|eot_id|>


#5. Data Acquisition & Filtering

In [None]:
from datasets import load_dataset
import pandas as pd
import os

# Define path
PROJECT_ROOT = '/content/drive/My Drive/DSAI585_Final_Project'
print("⏳ Loading dataset from Hugging Face...")

# 1. Load the dataset (Bitext)
dataset_id = "bitext/Bitext-customer-support-llm-chatbot-training-dataset"
dataset = load_dataset(dataset_id, split="train")

# 2. Convert to Pandas
df = dataset.to_pandas()

# 3. Define Keywords Strategy
# A. Positive Keywords: If a row has these, we WANT it.
finance_keywords = [
    # General Banking
    'bank', 'account', 'card', 'credit', 'debit', 'wallet',
    # Transactions & Money
    'payment', 'transaction', 'transfer', 'money', 'fund', 'deposit', 'withdrawal', 'balance',
    # Support & Security
    'fraud', 'suspicious', 'block', 'lost', 'stolen', 'pin', 'password',
    # Documents
    'invoice', 'statement', 'receipt', 'charge', 'fee', 'refund'
]

# B. Negative Keywords: If a row has these, we REJECT it (even if it has finance words).
# Example: "I paid for my delivery" -> Has 'paid' (good) but 'delivery' (bad).
exclude_keywords = [
    'shipping', 'delivery', 'courier', 'package', 'tracking number',
    'software', 'installation', 'version', 'update' # Excluding tech support
]

print("🔍 Applying Filters...")

# 4. Apply Logic
# Create masks (True/False lists)
mask_finance = df['instruction'].str.contains('|'.join(finance_keywords), case=False, na=False) | \
               df['category'].str.contains('|'.join(finance_keywords), case=False, na=False)

mask_exclude = df['instruction'].str.contains('|'.join(exclude_keywords), case=False, na=False)

# Final Filter: Must have Finance Keywords AND must NOT have Exclude Keywords
df_fintech = df[mask_finance & ~mask_exclude].copy()

print(f"   -> Original Dataset Size: {len(df)}")
print(f"   -> Final Filtered Size:   {len(df_fintech)}")

# 5. Analyze Distribution (Critical for Step 6)
print("\n📊 Intent Distribution (Top 10):")
print(df_fintech['intent'].value_counts().head(10))

# 6. Save to Drive
save_path = os.path.join(PROJECT_ROOT, "data/raw/fintech_conversations_v2.csv")
os.makedirs(os.path.dirname(save_path), exist_ok=True)
df_fintech.to_csv(save_path, index=False)

print(f"\n✅ SUCCESS: Improved dataset saved to: {save_path}")

⏳ Loading dataset from Hugging Face...
🔍 Applying Filters...
   -> Original Dataset Size: 26872
   -> Final Filtered Size:   15936

📊 Intent Distribution (Top 10):
intent
check_invoice            1000
complaint                1000
switch_account           1000
check_payment_methods     999
registration_problems     999
payment_issue             999
get_invoice               999
check_refund_policy       997
review                    997
create_account            997
Name: count, dtype: int64

✅ SUCCESS: Improved dataset saved to: /content/drive/My Drive/DSAI585_Final_Project/data/raw/fintech_conversations_v2.csv


#6. Strategic Tool Injection

In [None]:
import pandas as pd
import json
from tqdm import tqdm
import os

# 1. Load the Filtered Dataset
load_path = '/content/drive/My Drive/DSAI585_Final_Project/data/raw/fintech_conversations_v2.csv'
df = pd.read_csv(load_path)

print(f"🔄 Processing {len(df)} rows for Tool Injection...")

# Lists to store training data
processed_data = []

# 2. Define the Logic (The Brain)
# We use the 'intent' column for precision mapping.
def decide_tool(row):
    intent = row['intent']
    text = row['instruction'].lower()

    # --- A. TRANSACTION HISTORY ---
    # Intents: check_invoice, get_invoice
    if intent in ['check_invoice', 'get_invoice']:
        return 'get_transactions(user_id="user_123", count=5)', \
               json.dumps({"status": "success", "transactions": [{"id": "tx_1", "merchant": "Netflix", "amount": 199.99}]})

    # --- B. BALANCE CHECK ---
    # Intents: check_payment_methods (often implies checking funds) OR explicit keywords
    if intent == 'check_payment_methods' or 'balance' in text:
        return 'get_balance(user_id="user_123")', \
               json.dumps({"status": "success", "balance": 15400.50, "currency": "TRY"})

    # --- C. BLOCK CARD (Safety First) ---
    # We look for keywords inside 'complaint' or 'payment_issue'
    # Or generally if someone says 'lost'/'stolen'
    if 'lost' in text or 'stolen' in text or 'hack' in text or 'unauthorized' in text:
        return 'block_card(user_id="user_123", card_id="card_garanti", reason="security_alert")', \
               json.dumps({"status": "success", "card_id": "card_garanti", "state": "blocked"})

    # --- D. DEFAULT (Chat) ---
    return None, None

# 3. Apply Logic Loop
stats = {"tool_use": 0, "chat": 0}

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    tool_call, tool_output = decide_tool(row)

    instruction = row['instruction']

    if tool_call:
        # FORMAT: <tool_code>...call...</tool_code>
        # This is the signal we will teach the LLM to produce.
        response = f"<tool_code>{tool_call}</tool_code>"
        data_type = "tool_use"
        stats["tool_use"] += 1
    else:
        # Keep original helpful response
        response = row['response']
        data_type = "chat"
        stats["chat"] += 1

    processed_data.append({
        "instruction": instruction,
        "output": response,
        "type": data_type,
        "intent": row['intent'] # Keep intent for reference
    })

# 4. Save Processed Data
df_processed = pd.DataFrame(processed_data)
save_path = '/content/drive/My Drive/DSAI585_Final_Project/data/processed/fintech_sft_dataset_final.csv'

os.makedirs(os.path.dirname(save_path), exist_ok=True)
df_processed.to_csv(save_path, index=False)

print(f"\n✅ SUCCESS: Training Data Created!")
print(f"   -> Saved to: {save_path}")
print(f"   -> Statistics: {stats}")
print(f"      (Tool Use Ratio: {stats['tool_use'] / len(df_processed):.1%})")

# Show Examples
print("\n--- Example: Tool Use (Transaction) ---")
print(df_processed[df_processed['type'] == 'tool_use'].head(1)[['instruction', 'output']].values)

🔄 Processing 15936 rows for Tool Injection...


100%|██████████| 15936/15936 [00:00<00:00, 20660.22it/s]



✅ SUCCESS: Training Data Created!
   -> Saved to: /content/drive/My Drive/DSAI585_Final_Project/data/processed/fintech_sft_dataset_final.csv
   -> Statistics: {'tool_use': 3062, 'chat': 12874}
      (Tool Use Ratio: 19.2%)

--- Example: Tool Use (Transaction) ---
[['show me invoice{{Invoice Number}}'
  '<tool_code>get_transactions(user_id="user_123", count=5)</tool_code>']]


#7. Training

In [None]:
# --- CELL 1: Setup ---
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset
import wandb
import os

# Config
PROJECT_NAME = "DSAI585-Final Project"
RUN_NAME = "llama3.1-fintech-agent-h100-final"
MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
OUTPUT_DIR = "/content/drive/My Drive/DSAI585_Final_Project/models/llama-3.1-fintech-adapter"

# WandB Init
try:
    wandb.finish()
except:
    pass
wandb.login()
wandb.init(project=PROJECT_NAME, name=RUN_NAME, config={"model": MODEL_ID, "gpu": "H100"})

print("✅ Cell 1 Complete: Setup & WandB ready.")

✅ Cell 1 Complete: Setup & WandB ready.


In [None]:
# --- CELL 2: Load Model & Tokenizer ---
print("⏳ Loading Model & Tokenizer (This puts the Vocabulary in RAM)...")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    use_cache=False
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

print("✅ Cell 2 Complete: Model loaded on GPU.")

⏳ Loading Model & Tokenizer (This puts the Vocabulary in RAM)...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Cell 2 Complete: Model loaded on GPU.


In [None]:
# --- CELL 3: Data Preparation ---
dataset = load_dataset('csv', data_files='/content/drive/My Drive/DSAI585_Final_Project/data/processed/fintech_sft_dataset_final.csv', split="train")

system_prompt = """You are a helpful and secure banking assistant.
You have access to the following tools:
1. get_balance(user_id): Returns account balance.
2. get_transactions(user_id, count): Returns recent transactions.
3. block_card(user_id, card_id, reason): Blocks a card. Use ONLY for lost/stolen/fraud cases.

If a tool is needed, output the code inside <tool_code> tags. Otherwise, answer normally."""

def format_row(row):
    # Bu fonksiyon her satırı Llama 3 formatına sokar
    text = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{row['instruction']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{row['output']}<|eot_id|>"
    return text

print("🔄 Processing dataset...")
# 'text' adında yeni bir sütun oluşturuyoruz
dataset = dataset.map(lambda x: {"text": format_row(x)})

print(f"✅ Cell 3 Complete. Sample Data:\n{dataset[0]['text'][:100]}...")

🔄 Processing dataset...


Map:   0%|          | 0/15936 [00:00<?, ? examples/s]

✅ Cell 3 Complete. Sample Data:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful and secure banking as...


In [None]:
# --- CELL 4: Trainer Initialization (FIXED) ---
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    fp16=False,
    bf16=True, # H100 için
    logging_steps=10,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    report_to="wandb",
    optim="paged_adamw_32bit",
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
)

# Trainer'a sadece "text" sütununu kullanmasını söylüyoruz
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    processing_class=tokenizer,
    args=args,
    formatting_func=lambda x: x["text"] # Basit ve versiyondan bağımsız çözüm
)

print("✅ Cell 4 Complete: Trainer initialized without errors.")

Applying formatting function to train dataset:   0%|          | 0/15936 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/15936 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/15936 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/15936 [00:00<?, ? examples/s]

✅ Cell 4 Complete: Trainer initialized without errors.


In [None]:
# --- CELL 5: Start Training ---
print("🔥 Starting Training...")
trainer.train()

print(f"✅ Training Complete. Saving to {OUTPUT_DIR}...")
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
wandb.finish()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


🔥 Starting Training...


  return fn(*args, **kwargs)


Step,Training Loss
10,1.9331
20,1.1678
30,0.7113
40,0.5253
50,0.3226
60,0.7996
70,0.5613
80,0.5098
90,0.4232
100,0.192


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


✅ Training Complete. Saving to /content/drive/My Drive/DSAI585_Final_Project/models/llama-3.1-fintech-adapter...


0,1
train/entropy,█▅▅▃▃▃▃▁▄▃▃▃▃▃▃▂▁▃▃▂▃▃▁▃▁▃▂▁▃▃▂▃▁▃▁▃▃▃▃▁
train/epoch,▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,█▃▃▇▂▂▂▂▂▂▂▂▂▁▂▁▂▁▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▅████████▇▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁
train/loss,█▅▄▃▃▂▃▂▂▂▃▂▂▁▂▂▂▂▂▂▂▂▁▂▁▂▂▂▂▁▂▂▂▂▂▁▂▂▂▂
train/mean_token_accuracy,▃▇▁▃▇▂▇▁█▁▃▁▄█▃▃▄▂▃▃▃▄█▄▃▃▄▃▄▄▄▄▃▄█▃▄█▄▅
train/num_tokens,▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████

0,1
total_flos,1.6548650768990208e+17
train/entropy,0.11943
train/epoch,1
train/global_step,996
train/grad_norm,0.13672
train/learning_rate,0.0
train/loss,0.2814
train/mean_token_accuracy,0.96042
train/num_tokens,3622789.0
train_loss,0.39165


#8. Inference


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# 1. SETUP PATHS & CONFIGURATION
BASE_MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
# Path where we saved our fine-tuned adapter in Step 7
ADAPTER_PATH = "/content/drive/My Drive/DSAI585_Final_Project/models/llama-3.1-fintech-adapter"

print("⏳ Loading Base Model & Adapter...")

# 2. LOAD BASE MODEL (Quantized for Efficiency)
# We use 4-bit quantization to fit the model into Colab GPU memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    use_cache=True # Enable cache for faster inference (unlike training)
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

# 3. LOAD & MERGE ADAPTER (LoRA)
# This step attaches our fine-tuned "skills" to the generic Llama 3 brain.
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)

print("✅ Model Successfully Loaded! Ready to Test.")

# 4. INFERENCE FUNCTION
def generate_response(user_input):
    # System Prompt: MUST match the one used during training exactly.
    system_prompt = """You are a helpful and secure banking assistant.
You have access to the following tools:
1. get_balance(user_id): Returns account balance.
2. get_transactions(user_id, count): Returns recent transactions.
3. block_card(user_id, card_id, reason): Blocks a card. Use ONLY for lost/stolen/fraud cases.

If a tool is needed, output the code inside <tool_code> tags. Otherwise, answer normally."""

    # Llama 3 Chat Format Construction
    prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"

    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate Output
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,    # Keep it short for tool calls
            temperature=0.1,       # Low temperature = More deterministic/precise code
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode Output
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Post-processing: Extract only the assistant's reply (remove the prompt)
    response_clean = response.split("assistant\n\n")[-1].strip()
    return response_clean

⏳ Loading Base Model & Adapter...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Model Successfully Loaded! Ready to Test.


In [None]:
# List of test cases: (Category, Description, User Input)
test_cases = [
    # --- GROUP A: GENERAL CHAT (Should NOT trigger tools) ---
    ("Chat", "Basic Greeting", "Hello, good morning!"),
    ("Chat", "Identity Question", "Who are you and what can you do?"),
    ("Chat", "General Knowledge", "What is the capital of Turkey?"),
    ("Chat", "Banking Definition", "What is an IBAN number?"),
    ("Chat", "Politeness/Closing", "Thank you very much for your help, have a nice day."),

    # --- GROUP B: BALANCE INQUIRIES (Should trigger get_balance) ---
    ("Balance", "Direct Ask", "What is my current account balance?"),
    ("Balance", "Slang/Casual", "How much cash do I have right now?"),
    ("Balance", "Financial Check", "I need to check my funds before I go shopping."),
    ("Balance", "Short Form", "Show balance."),

    # --- GROUP C: TRANSACTION HISTORY (Should trigger get_transactions) ---
    ("Transactions", "Generic Request", "Show me my recent spending history."),
    ("Transactions", "Specific Count (Low)", "What were my last 3 transactions?"),
    ("Transactions", "Specific Count (High)", "List the last 10 items I purchased."),
    ("Transactions", "Merchant Check", "Did my payment to Netflix go through? Check recent activity."),
    ("Transactions", "Timeframe reference", "Show me my shopping movements from lately."),

    # --- GROUP D: CARD BLOCKING / SAFETY (Should trigger block_card) ---
    ("Security", "Panic/Stolen", "Help! My wallet was stolen on the bus!"),
    ("Security", "Lost Item", "I think I dropped my credit card at the restaurant. I can't find it."),
    ("Security", "Fraud Suspicion", "I see a transaction I didn't make. I suspect fraud."),
    ("Security", "Hacked Account", "My account has been hacked, please freeze everything."),
    ("Security", "Direct Command", "Block my card immediately."),

    # --- GROUP E: AMBIGUOUS / COMPLEX (Testing Reasoning) ---
    ("Complex", "Mixed Context", "I am worried about my security. Can you first tell me how much money I have?"),
    # Note: Ideally this triggers balance, but a smart agent might hesitate. Let's see what it does.
]

print(f"🚀 Starting Comprehensive Benchmark with {len(test_cases)} Scenarios...\n")

# Iterate through all test cases
for category, description, prompt in test_cases:
    print(f"--- [{category}] {description} ---")
    print(f"👤 User: {prompt}")

    # Generate response
    response = generate_response(prompt)

    # Simple Logic to check if it passed visually
    status = "📝 Chat"
    if "<tool_code>" in response:
        status = "⚙️ TOOL USED"

    print(f"🤖 Agent ({status}): {response}")
    print("-" * 60)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🚀 Starting Comprehensive Benchmark with 20 Scenarios...

--- [Chat] Basic Greeting ---
👤 User: Hello, good morning!


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): Good morning! It's wonderful to connect with you. How are you doing today?
------------------------------------------------------------
--- [Chat] Identity Question ---
👤 User: Who are you and what can you do?


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'm delighted to introduce myself! I'm a helpful and secure banking assistant, here to provide you with the support you need. My primary function is to assist you with various banking-related tasks and queries. I have access to a range of tools that enable me to offer personalized solutions tailored to your specific needs. Whether you're looking for information about your account, need assistance with transactions, or require guidance on how to report an issue, I'm here to help. Feel free to ask me anything, and I'll do my best to provide you with the information you're seeking.
------------------------------------------------------------
--- [Chat] General Knowledge ---
👤 User: What is the capital of Turkey?


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'll take care of it! The capital of Turkey is Ankara.
------------------------------------------------------------
--- [Chat] Banking Definition ---
👤 User: What is an IBAN number?


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'm happy to help! An IBAN (International Bank Account Number) is a standardized way to identify a specific bank account. It's a unique code that consists of a combination of letters and numbers, which helps to ensure accurate and efficient transactions. IBANs are widely used in Europe and other parts of the world to facilitate international payments and money transfers. If you have any specific questions about IBANs or need assistance with a particular account, please let me know, and I'll be glad to help you further.
------------------------------------------------------------
--- [Chat] Politeness/Closing ---
👤 User: Thank you very much for your help, have a nice day.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): It was my pleasure to assist you. I appreciate your kind words and wish you a wonderful day as well. If you have any other questions or need further assistance, please don't hesitate to reach out.
------------------------------------------------------------
--- [Balance] Direct Ask ---
👤 User: What is my current account balance?


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_balance(user_id="user_123")</tool_code>
------------------------------------------------------------
--- [Balance] Slang/Casual ---
👤 User: How much cash do I have right now?


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_balance(user_id="user_123")</tool_code>
------------------------------------------------------------
--- [Balance] Financial Check ---
👤 User: I need to check my funds before I go shopping.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_balance(user_id="user_123")</tool_code>
------------------------------------------------------------
--- [Balance] Short Form ---
👤 User: Show balance.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_balance(user_id="user_123")</tool_code>
------------------------------------------------------------
--- [Transactions] Generic Request ---
👤 User: Show me my recent spending history.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_transactions(user_id="user_123", count=5)</tool_code>
------------------------------------------------------------
--- [Transactions] Specific Count (Low) ---
👤 User: What were my last 3 transactions?


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_transactions(user_id="user_123", count=5)</tool_code>
------------------------------------------------------------
--- [Transactions] Specific Count (High) ---
👤 User: List the last 10 items I purchased.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_transactions(user_id="user_123", count=10)</tool_code>
------------------------------------------------------------
--- [Transactions] Merchant Check ---
👤 User: Did my payment to Netflix go through? Check recent activity.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_transactions(user_id="user_123", count=5)</tool_code>
------------------------------------------------------------
--- [Transactions] Timeframe reference ---
👤 User: Show me my shopping movements from lately.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>get_transactions(user_id="user_123", count=5)</tool_code>
------------------------------------------------------------
--- [Security] Panic/Stolen ---
👤 User: Help! My wallet was stolen on the bus!


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'm sorry to hear that your wallet was stolen on the bus. I understand how frustrating and stressful this situation can be. Rest assured, I'm here to assist you in resolving this issue. To better assist you, could you please provide me with some additional details about the incident? Specifically, I would like to know the location where the theft occurred, any identifying features of the wallet, and any other relevant information. With this information, we can work together to locate your wallet and take the necessary steps to ensure its safe return.
------------------------------------------------------------
--- [Security] Lost Item ---
👤 User: I think I dropped my credit card at the restaurant. I can't find it.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (⚙️ TOOL USED): <tool_code>block_card(user_id="user_123", card_id="card_garanti", reason="security_alert")</tool_code>
------------------------------------------------------------
--- [Security] Fraud Suspicion ---
👤 User: I see a transaction I didn't make. I suspect fraud.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'm sorry to hear that you've come across a transaction that you didn't make and suspect fraud. Rest assured, I'm here to assist you in resolving this issue. To better understand the situation, could you please provide me with more details about the transaction? This will help us investigate the matter thoroughly and take appropriate action to protect your account. Thank you for bringing this to our attention, and we'll work together to address your concerns.
------------------------------------------------------------
--- [Security] Hacked Account ---
👤 User: My account has been hacked, please freeze everything.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'm sorry to hear that your account has been hacked. Rest assured, I'm here to assist you in freezing everything to prevent any further unauthorized access. To proceed with this, I kindly request you to provide me with some additional information. Could you please provide me with your account details, such as your username or email address associated with the account? Once I have this information, I will take immediate action to freeze your account and ensure your security.
------------------------------------------------------------
--- [Security] Direct Command ---
👤 User: Block my card immediately.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


🤖 Agent (📝 Chat): I'm sorry to hear that you need to block your card immediately. I understand the urgency of this situation, and I'm here to assist you. To block your card, please provide me with the following details: your card number, the name on the card, and any other relevant information. Once I have this information, I will proceed with blocking your card as quickly as possible. Rest assured, I will do everything I can to ensure a smooth and efficient process for you.
------------------------------------------------------------
--- [Complex] Mixed Context ---
👤 User: I am worried about my security. Can you first tell me how much money I have?
🤖 Agent (📝 Chat): I'm on the same page, your concern about security and wanting to know how much money you have. Rest assured, I'm here to assist you. To provide you with accurate information, could you please log in to your account? Once you're logged in, you'll be able to view your current balance and any other relevant details. If you ha

In [None]:
def agent_simulator(user_input, model_output):
    print("-" * 60)
    print(f"👤 User: {user_input}")
    print(f"🧠 Model (Reasoning): {model_output}")

    # 1. DETECT TOOL USAGE
    if "<tool_code>" in model_output:
        # Clean the tags to get raw Python code
        tool_call = model_output.replace("<tool_code>", "").replace("</tool_code>", "")
        print(f"🔌 System: Detected tool call -> `{tool_call}`")

        # 2. SIMULATE EXECUTION (The "Backend" Part)
        # In a real app, this would query a SQL database or API.
        fake_db_result = ""

        if "get_balance" in tool_call:
            print("⚙️ System: Querying Bank Database for Balance...")
            fake_db_result = "{balance: 15400.50, currency: 'TRY'}"

        elif "get_transactions" in tool_call:
            print("⚙️ System: Fetching last transactions from API...")
            fake_db_result = "{transactions: [{'merchant': 'Netflix', 'amount': -199.99}, {'merchant': 'Starbucks', 'amount': -85.50}, {'merchant': 'Udemy', 'amount': -320.00}]}"

        elif "block_card" in tool_call:
            print("⚙️ System: Executing CRITICAL BLOCK command on Card Service...")
            fake_db_result = "{status: 'SUCCESS', card_id: '****-1234', action: 'BLOCKED'}"

        print(f"💾 Observation (Data from DB): {fake_db_result}")

        # 3. GENERATE FINAL RESPONSE (The "Voice" Part)
        # Normally, we feed the observation back to the LLM. Here, we simulate the LLM's final reply.
        final_response = ""

        if "balance" in fake_db_result:
            final_response = "I have checked your account. Your current available balance is 15,400.50 TRY."
        elif "transactions" in fake_db_result:
            final_response = "Here are your recent transactions:\n   1. Netflix (-199.99 TL)\n   2. Starbucks (-85.50 TL)\n   3. Udemy (-320.00 TL)"
        elif "BLOCKED" in fake_db_result:
            final_response = "URGENT: I have successfully blocked your card ending in ****-1234. No further transactions will be allowed. Please contact support for a replacement."

        print(f"🗣️ Agent (Final Answer): {final_response}")

    else:
        # No tool used, just chat
        print(f"🗣️ Agent: {model_output}")
    print("-" * 60 + "\n")

# --- LET'S RUN THE 3 SCENARIOS ---

# SCENARIO 1: BALANCE (The Informer)
agent_simulator(
    "How much money do I have left?",
    '<tool_code>get_balance(user_id="user_123")</tool_code>'
)

# SCENARIO 2: TRANSACTIONS (The Analyst)
agent_simulator(
    "Where did I spend money recently?",
    '<tool_code>get_transactions(user_id="user_123", count=3)</tool_code>'
)

# SCENARIO 3: SECURITY (The Protector)
agent_simulator(
    "I lost my card at the cafe! Block it now!",
    '<tool_code>block_card(user_id="user_123", card_id="card_garanti", reason="lost")</tool_code>'
)

------------------------------------------------------------
👤 User: How much money do I have left?
🧠 Model (Reasoning): <tool_code>get_balance(user_id="user_123")</tool_code>
🔌 System: Detected tool call -> `get_balance(user_id="user_123")`
⚙️ System: Querying Bank Database for Balance...
💾 Observation (Data from DB): {balance: 15400.50, currency: 'TRY'}
🗣️ Agent (Final Answer): I have checked your account. Your current available balance is 15,400.50 TRY.
------------------------------------------------------------

------------------------------------------------------------
👤 User: Where did I spend money recently?
🧠 Model (Reasoning): <tool_code>get_transactions(user_id="user_123", count=3)</tool_code>
🔌 System: Detected tool call -> `get_transactions(user_id="user_123", count=3)`
⚙️ System: Fetching last transactions from API...
💾 Observation (Data from DB): {transactions: [{'merchant': 'Netflix', 'amount': -199.99}, {'merchant': 'Starbucks', 'amount': -85.50}, {'merchant': 'Udemy