In [1]:
# # Install required packages
# !pip install --upgrade pip
# !pip install flashinfer-python
# !pip install unsloth
# !pip install --no-deps xformers trl peft accelerate bitsandbytes


In [2]:
import os
import torch
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback
from datetime import datetime

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Your Flash Attention 2 installation seems to be broken?
A possible explanation is you have a new CUDA version which isn't
yet compatible with FA2? Please file a ticket to Unsloth or FA2.
We shall now use Xformers instead, which does not have any performance hits!
We found this negligible impact by benchmarking on 1x A100.
Switching to PyTorch attention since your Xformers is broken.

/opt/conda/lib/python3.11/site-packages/flash_attn_2_cuda.cpython-311-x86_64-linux-gnu.so: undefined symbol: _ZNK3c106SymInt6sym_neERKS0_
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


In [3]:
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")


PyTorch Version: 2.9.0+cu128
CUDA Available: True
CUDA Version: 12.8
GPU Name: NVIDIA L40S
VRAM: 44.5 GB


In [4]:
# ==============================================================================
# CONFIGURATION
# ==============================================================================

class Config:
    # Model
    MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
    MAX_SEQ_LENGTH = 1536  # Qwen 2.5 supports up to 32K, but 2K is optimal for training
    DTYPE = None  # Auto-detect (bfloat16 for modern GPUs)
    LOAD_IN_4BIT = True  # Enable 4-bit quantization
    
    # Dataset
    DATASET_NAME = "Vishva007/RBI-Circular-QA-Dataset"
    DATASET_SPLIT = "train"
    EVAL_SIZE = 0.1  # 10% for evaluation
    SEED = 42
    
    # LoRA Configuration
    LORA_R = 16  # Rank
    LORA_ALPHA = 32  # Alpha (2x rank for stronger learning)
    LORA_DROPOUT = 0.1  # Dropout for regularization
    TARGET_MODULES = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ]
    USE_RSLORA = True  # Enable RS-LoRA for better stability
    
    # Training
    OUTPUT_DIR = "./qwen2.5-3b-rbi-qa"
    NUM_EPOCHS = 1  # 1 epoch for 47K samples
    BATCH_SIZE = 32  # Per device
    GRADIENT_ACCUMULATION = 1  # Effective batch size = 32
    LEARNING_RATE = 2e-4
    WARMUP_RATIO = 0.05  # 5% warmup
    LR_SCHEDULER = "cosine"
    WEIGHT_DECAY = 0.01
    MAX_GRAD_NORM = 1.0
    
    # Evaluation & Saving
    EVAL_STEPS = 250
    SAVE_STEPS = 250
    SAVE_TOTAL_LIMIT = 3
    LOGGING_STEPS = 50
    
    # Early Stopping
    EARLY_STOPPING_PATIENCE = 5
    EARLY_STOPPING_THRESHOLD = 0.005
    
    # Output
    REPO_ID = "Vishva007/Qwen2.5-3B-Instruct-RBI-QA"

print("Configuration loaded!")
print(f"Model: {Config.MODEL_NAME}")
print(f"Dataset: {Config.DATASET_NAME}")
print(f"Output: {Config.OUTPUT_DIR}")


Configuration loaded!
Model: Qwen/Qwen2.5-3B-Instruct
Dataset: Vishva007/RBI-Circular-QA-Dataset
Output: ./qwen2.5-3b-rbi-qa


In [5]:
# ==============================================================================
# LOAD MODEL & TOKENIZER
# ==============================================================================

print("Loading Qwen 2.5 3B model...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=Config.MODEL_NAME,
    max_seq_length=Config.MAX_SEQ_LENGTH,
    dtype=Config.DTYPE,
    load_in_4bit=Config.LOAD_IN_4BIT,
)

print("âœ“ Model loaded successfully")
print(f"âœ“ Model device: {model.device}")
print(f"âœ“ Model dtype: {model.dtype}")

# Check tokenizer
print("\nTokenizer details:")
print(f"  Vocab size: {len(tokenizer)}")
print(f"  Pad token: {tokenizer.pad_token}")
print(f"  EOS token: {tokenizer.eos_token}")
print(f"  BOS token: {tokenizer.bos_token}")


Loading Qwen 2.5 3B model...
==((====))==  Unsloth 2025.11.3: Fast Qwen2 patching. Transformers: 4.57.1.
   \\   /|    NVIDIA L40S. Num GPUs = 1. Max memory: 44.521 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.36G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

âœ“ Model loaded successfully
âœ“ Model device: cuda:0
âœ“ Model dtype: torch.bfloat16

Tokenizer details:
  Vocab size: 151665
  Pad token: <|vision_pad|>
  EOS token: <|im_end|>
  BOS token: None


In [6]:
# ==============================================================================
# APPLY LORA ADAPTERS
# ==============================================================================

print("Applying LoRA configuration...")

model = FastLanguageModel.get_peft_model(
    model,
    r=Config.LORA_R,
    target_modules=Config.TARGET_MODULES,
    lora_alpha=Config.LORA_ALPHA,
    lora_dropout=Config.LORA_DROPOUT,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=Config.SEED,
    use_rslora=Config.USE_RSLORA,
    loftq_config=None,
)

print("âœ“ LoRA adapters applied")
print(f"  Rank: {Config.LORA_R}")
print(f"  Alpha: {Config.LORA_ALPHA}")
print(f"  Dropout: {Config.LORA_DROPOUT}")
print(f"  RS-LoRA: {Config.USE_RSLORA}")

# Count trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"\nTrainable params: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"Total params: {total_params:,}")


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


Applying LoRA configuration...


Unsloth 2025.11.3 patched 36 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


âœ“ LoRA adapters applied
  Rank: 16
  Alpha: 32
  Dropout: 0.1
  RS-LoRA: True

Trainable params: 29,933,568 (1.64%)
Total params: 1,830,055,936


In [8]:
# ==============================================================================
# SETUP CHAT TEMPLATE FOR QWEN 2.5
# ==============================================================================

print("Setting up Qwen 2.5 chat template...")

tokenizer = get_chat_template(
    tokenizer,
    chat_template="qwen-2.5",  # Use Qwen 2.5 specific template
)

print("âœ“ Chat template configured")

# Test the template
test_messages = [
    {"role": "system", "content": "You are an expert on RBI regulations."},
    {"role": "user", "content": "What is the RBI's policy on inflation?"},
]

formatted = tokenizer.apply_chat_template(
    test_messages,
    tokenize=False,
    add_generation_prompt=True
)

print("\nSample formatted prompt:")
print(formatted[:500] + "...")


Setting up Qwen 2.5 chat template...
âœ“ Chat template configured

Sample formatted prompt:
<|im_start|>system
You are an expert on RBI regulations.<|im_end|>
<|im_start|>user
What is the RBI's policy on inflation?<|im_end|>
<|im_start|>assistant
...


In [9]:
# ==============================================================================
# LOAD & FORMAT DATASET
# ==============================================================================

def format_rbi_dataset(examples):
    """
    Format RBI QA dataset for Qwen 2.5 chat template
    """
    texts = []
    
    system_msg = """You are a highly knowledgeable AI assistant with expertise in Indian banking and 
    financial regulations, particularly those outlined in Reserve Bank of India (RBI) circulars. 
    Your task is to answer questions based on the RBI circulars and related financial regulations.
    Provide accurate, specific answers including relevant dates, amounts, and institutional details."""
    
    for i in range(len(examples['question'])):
        # Create chat messages
        messages = [
            {"role": "system", "content": system_msg},
            {"role": "user", "content": examples['question'][i]},
            {"role": "assistant", "content": examples['answer'][i]}
        ]
        
        # Apply chat template
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False  # Include assistant response
        )
        
        texts.append(text)
    
    return {"text": texts}


# Load dataset
print(f"Loading dataset: {Config.DATASET_NAME}")
dataset = load_dataset(Config.DATASET_NAME, split=Config.DATASET_SPLIT)

print(f"Total records: {len(dataset)}")
print(f"Columns: {dataset.column_names}")

# Apply formatting
print("\nFormatting dataset...")
dataset = dataset.map(
    format_rbi_dataset,
    batched=True,
    remove_columns=dataset.column_names,
    num_proc=4,  # Parallel processing
    desc="Formatting dataset"
)

print(f"âœ“ Dataset formatted: {len(dataset)} samples")

# Show sample
print("\n" + "="*70)
print("SAMPLE FORMATTED EXAMPLE:")
print("="*70)
print(dataset[0]['text'][:500] + "...")


Loading dataset: Vishva007/RBI-Circular-QA-Dataset


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/25.0M [00:00<?, ?B/s]

data/eval-00000-of-00001.parquet:   0%|          | 0.00/2.48M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/47934 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Total records: 47934
Columns: ['document', 'filename', 'model_name', 'regulation_area', 'applicable_to', 'issued_on', 'key_topics', 'chunks_text', 'is_table', 'question', 'answer', 'evaluation_criteria', 'category', 'estimated_difficulty', 'rephrased_question', 'rephrased_answer', 'data_source']

Formatting dataset...


Formatting dataset (num_proc=4):   0%|          | 0/47934 [00:00<?, ? examples/s]

âœ“ Dataset formatted: 47934 samples

SAMPLE FORMATTED EXAMPLE:
<|im_start|>system
You are a highly knowledgeable AI assistant with expertise in Indian banking and 
    financial regulations, particularly those outlined in Reserve Bank of India (RBI) circulars. 
    Your task is to answer questions based on the RBI circulars and related financial regulations.
    Provide accurate, specific answers including relevant dates, amounts, and institutional details.<|im_end|>
<|im_start|>user
What relaxations were provided by the Reserve Bank of India regarding the ...


In [10]:
# ==============================================================================
# TRAIN/EVAL SPLIT
# ==============================================================================

print("Splitting dataset into train/eval...")

dataset_splits = dataset.train_test_split(
    test_size=Config.EVAL_SIZE,
    seed=Config.SEED,
    shuffle=True
)

print(f"Train samples: {len(dataset_splits['train'])}")
print(f"Eval samples: {len(dataset_splits['test'])}")

# Calculate training steps
steps_per_epoch = len(dataset_splits['train']) // (Config.BATCH_SIZE * Config.GRADIENT_ACCUMULATION)
total_steps = steps_per_epoch * Config.NUM_EPOCHS

print("\nTraining details:")
print(f"  Steps per epoch: {steps_per_epoch}")
print(f"  Total steps: {total_steps}")
print(f"  Eval every: {Config.EVAL_STEPS} steps")
print(f"  Save every: {Config.SAVE_STEPS} steps")


Splitting dataset into train/eval...
Train samples: 43140
Eval samples: 4794

Training details:
  Steps per epoch: 1348
  Total steps: 1348
  Eval every: 250 steps
  Save every: 250 steps


In [11]:
# ==============================================================================
# TRAINING ARGUMENTS
# ==============================================================================

training_args = TrainingArguments(
    # Output
    output_dir=Config.OUTPUT_DIR,
    run_name="qwen2.5-3b-rbi-qa",
    overwrite_output_dir=True,
    
    # Training
    num_train_epochs=Config.NUM_EPOCHS,
    per_device_train_batch_size=Config.BATCH_SIZE,
    gradient_accumulation_steps=Config.GRADIENT_ACCUMULATION,
    
    # Optimization
    learning_rate=Config.LEARNING_RATE,
    lr_scheduler_type=Config.LR_SCHEDULER,
    warmup_ratio=Config.WARMUP_RATIO,
    weight_decay=Config.WEIGHT_DECAY,
    max_grad_norm=Config.MAX_GRAD_NORM,
    optim="paged_adamw_8bit",
    
    # Memory & Performance
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    fp16=False,
    bf16=True,
    
    # Evaluation
    eval_strategy="steps",
    eval_steps=Config.EVAL_STEPS,
    per_device_eval_batch_size=Config.BATCH_SIZE,
    
    # Saving
    save_strategy="steps",
    save_steps=Config.SAVE_STEPS,
    save_total_limit=Config.SAVE_TOTAL_LIMIT,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    
    # Logging
    logging_steps=Config.LOGGING_STEPS,
    logging_strategy="steps",
    report_to="tensorboard",
    
    # Reproducibility
    seed=Config.SEED,
    data_seed=Config.SEED,
)

print("Training arguments configured!")
print(f"  Effective batch size: {Config.BATCH_SIZE * Config.GRADIENT_ACCUMULATION}")
print(f"  Learning rate: {Config.LEARNING_RATE}")
print(f"  Scheduler: {Config.LR_SCHEDULER}")
print("  Precision: BF16")


Training arguments configured!
  Effective batch size: 32
  Learning rate: 0.0002
  Scheduler: cosine
  Precision: BF16


In [12]:
# ==============================================================================
# SETUP TRAINER
# ==============================================================================

print("Setting up SFTTrainer...")

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset_splits['train'],
    eval_dataset=dataset_splits['test'],
    dataset_text_field="text",
    max_seq_length=Config.MAX_SEQ_LENGTH,
    dataset_num_proc=4,
    packing=False,  # Disable packing for better quality
    args=training_args,
)

# Add early stopping
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=Config.EARLY_STOPPING_PATIENCE,
    early_stopping_threshold=Config.EARLY_STOPPING_THRESHOLD,
)
trainer.add_callback(early_stopping)

print("âœ“ Trainer configured")
print("  Model: Qwen 2.5 3B with LoRA")
print(f"  Train samples: {len(dataset_splits['train'])}")
print(f"  Eval samples: {len(dataset_splits['test'])}")
print(f"  Early stopping patience: {Config.EARLY_STOPPING_PATIENCE}")


Setting up SFTTrainer...


Unsloth: Tokenizing ["text"] (num_proc=116):   0%|          | 0/43140 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=116):   0%|          | 0/4794 [00:00<?, ? examples/s]

âœ“ Trainer configured
  Model: Qwen 2.5 3B with LoRA
  Train samples: 43140
  Eval samples: 4794
  Early stopping patience: 5


In [13]:
# ==============================================================================
# START TRAINING
# ==============================================================================

print("\n" + "="*70)
print("STARTING TRAINING - QWEN 2.5 3B ON RBI QA DATASET")
print("="*70)
print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70 + "\n")

# Train
trainer_stats = trainer.train()

print("\n" + "="*70)
print("âœ… TRAINING COMPLETE!")
print("="*70)
print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Total time: {trainer_stats.metrics['train_runtime']:.2f} seconds")
print(f"Samples/sec: {trainer_stats.metrics['train_samples_per_second']:.2f}")
print(f"Final train loss: {trainer_stats.metrics.get('train_loss', 'N/A')}")
print("="*70)

# Get final evaluation
print("\nRunning final evaluation...")
eval_results = trainer.evaluate()
print(f"Final eval loss: {eval_results['eval_loss']:.4f}")


The model is already on multiple devices. Skipping the move to device specified in `args`.



STARTING TRAINING - QWEN 2.5 3B ON RBI QA DATASET
Start time: 2025-11-24 16:21:13



==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 43,140 | Num Epochs = 1 | Total steps = 1,349
O^O/ \_/ \    Batch size per device = 32 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (32 x 1 x 1) = 32
 "-____-"     Trainable parameters = 29,933,568 of 3,115,872,256 (0.96% trained)


Step,Training Loss,Validation Loss
250,0.7918,0.781934
500,0.6964,0.688187
750,0.6327,0.629984
1000,0.5896,0.592467
1250,0.5724,0.579091



âœ… TRAINING COMPLETE!
End time: 2025-11-24 17:14:01
Total time: 3167.23 seconds
Samples/sec: 13.62
Final train loss: 0.708442140809689

Running final evaluation...


Final eval loss: 0.5791


In [14]:
# ==============================================================================
# SAVE MODEL - MERGED 16-BIT FOR INFERENCE
# ==============================================================================

print("\n" + "="*70)
print("SAVING MODEL")
print("="*70)

output_path = f"{Config.OUTPUT_DIR}/merged-16bit"
print(f"Saving to: {output_path}")

# Save as merged 16-bit model (best for inference/vLLM/SGLang)
model.save_pretrained_merged(
    output_path,
    tokenizer,
    save_method="merged_16bit",
)

print(f"âœ“ Model saved to {output_path}")
print("  Format: Merged 16-bit (ready for vLLM/SGLang)")



SAVING MODEL
Saving to: ./qwen2.5-3b-rbi-qa/merged-16bit


config.json:   0%|          | 0.00/757 [00:00<?, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Checking cache directory for required files...
Cache check failed: model-00001-of-00002.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  50%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆ     | 1/2 [00:07<00:07,  7.87s/it]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:12<00:00,  6.20s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:11<00:00,  5.69s/it]


Unsloth: Merge process complete. Saved to `/workspace/qwen2.5-3b-rbi-qa/merged-16bit`
âœ“ Model saved to ./qwen2.5-3b-rbi-qa/merged-16bit
  Format: Merged 16-bit (ready for vLLM/SGLang)


In [16]:
# ==============================================================================
# PUSH TO HUGGING FACE HUB (Optional)
# ==============================================================================

from huggingface_hub import notebook_login
       
# Login
notebook_login()
    
print(f"\nPushing to: {Config.REPO_ID}")
    
# Push merged model
model.push_to_hub_merged(
        Config.REPO_ID,
        tokenizer,
        save_method="merged_16bit",
        token=True,  # Use saved token
    )
    
print(f"âœ“ Model pushed to {Config.REPO_ID}")
print(f"ðŸ”— View at: https://huggingface.co/{Config.REPO_ID}")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦


Pushing to: Vishva007/Qwen2.5-3B-Instruct-RBI-QA


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Checking cache directory for required files...
Cache check failed: model-00001-of-00002.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  50%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆ     | 1/2 [00:07<00:07,  7.22s/it]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:12<00:00,  6.02s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit:   0%|          | 0/2 [00:00<?, ?it/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Unsloth: Merging weights into 16bit:  50%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆ     | 1/2 [00:49<00:49, 49.68s/it]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Unsloth: Merging weights into 16bit: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [01:21<00:00, 40.84s/it]


Unsloth: Merge process complete. Saved to `/workspace/Vishva007/Qwen2.5-3B-Instruct-RBI-QA`
âœ“ Model pushed to Vishva007/Qwen2.5-3B-Instruct-RBI-QA
ðŸ”— View at: https://huggingface.co/Vishva007/Qwen2.5-3B-Instruct-RBI-QA
