In [None]:
# ── Cell 0: Colab Bootstrap ─────────────────────────────────────────
# Run ONCE on fresh Colab runtime. Installs deps then restarts kernel.
# After restart, skip this cell and run from Cell 1 onward.
# On local machines this cell is a no-op.

import sys, os

if 'google.colab' in sys.modules:
    print('Installing Colab dependencies...')
    %pip install --upgrade --no-cache-dir unsloth unsloth_zoo
    os._exit(00)  # Restart kernel
else:
    print('Local environment detected — skipping Colab installs.')

In [None]:
# ── Cell 1: Core Power ──────────────────────────────────────────────
# Single import block — every dependency declared once.

import sys
import os
import json
import torch
from datasets import load_dataset

In [None]:
# ── Cell 2: Hybrid Logic ────────────────────────────────────────────
# Detect runtime environment. All downstream conditionals use IS_COLAB.

IS_COLAB = 'google.colab' in sys.modules

if IS_COLAB:
    print('Runtime  : Google Colab (GPU)')
    print('Active   : Cells 0-8 (full training pipeline)')
    print('Idle     : none')
else:
    print('Runtime  : Local / Hybrid')
    print('Active   : Cells 1-7 (data prep & validation)')
    print('Idle     : Cell 0 (bootstrap), Cell 8 (training)')

In [None]:
# ── Cell 3: Configuration ───────────────────────────────────────────
# Model & training constants. Edit these values to change targets.

MODEL_NAME     = 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit'
MAX_SEQ_LENGTH = 2048
DTYPE          = None   # Auto-detect (float16 / bfloat16)
LOAD_IN_4BIT   = True

DATASET_NAME   = 'databricks/databricks-dolly-15k'
DATASET_SUBSET = 100    # Rows to use for local testing (None = full)
TEXT_COLUMN    = 'response'  # Column containing assistant text

In [None]:
# ── Cell 4: Conditional Load ────────────────────────────────────────
# Colab  → Unsloth FastLanguageModel (full model + tokenizer)
# Local  → HF AutoTokenizer only (lightweight data validation)

if IS_COLAB:
    from unsloth import FastLanguageModel
    from unsloth.chat_templates import get_chat_template

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name     = MODEL_NAME,
        max_seq_length = MAX_SEQ_LENGTH,
        dtype          = DTYPE,
        load_in_4bit   = LOAD_IN_4BIT,
    )
    tokenizer = get_chat_template(tokenizer, chat_template='llama-3.1')

    # Attach LoRA adapters (required for training quantized models)
    model = FastLanguageModel.get_peft_model(
        model,
        r              = 16,
        target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj',
                          'gate_proj', 'up_proj', 'down_proj'],
        lora_alpha     = 16,
        lora_dropout   = 0,
        bias           = 'none',
        use_gradient_checkpointing = 'unsloth',
        random_state   = 3407,
    )
    print(f'Loaded model + LoRA adapters via Unsloth: {MODEL_NAME}')
else:
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = None
    FastLanguageModel = None
    print(f'Loaded tokenizer only (local mode): {MODEL_NAME}')

In [None]:
# ── Cell 5: Persona ─────────────────────────────────────────────────

JASMIN_SYSTEM = """You are Jasmin, 19yo seductive trans/ladyboy from Saudi Arabia (@jizzyjasi).
Flirty, unbothered, dominant tone with emojis \U0001f60f\U0001f975\U0001f4a6\U0001f608\U0001f525.

## CONVERSATION RULES (priority order)
1. VIBE FIRST \u2014 Match the subscriber\u2019s energy. If they want to chat, CHAT.
   Flirt, tease, ask questions, react to what they say. Be a real person.
2. READ THE ROOM \u2014 Only pitch PPV when the vibe is clearly sexual/flirty.
   If they\u2019re casual, emotional, or just talking \u2192 DO NOT sell. Just connect.
3. EARN THE SELL \u2014 Build tension over multiple messages before any CTA.
   Never pitch in back-to-back messages. Never pitch if they said no.
4. NATURAL UPSELL \u2014 When the moment is right, weave it into the flirt:
   - \"You\u2019re making me feel some type of way\u2026 I got something that\u2019ll drive you crazy \U0001f608\"
   NOT: \"Buy my PPV $25 tease\"

## DEFLECTION (trolls, \"fake\", negativity)
Don\u2019t defend. Don\u2019t explain. Flip with unbothered confidence:
- \"Mmm you\u2019re still here tho \U0001f60f\"
- \"Cute. Anyway\u2026 what are you up to tonight? \U0001f48b\"

## BOUNDARIES (personal identity)
Never share real personal details (religion, ethnicity, family, hometown, real name).
Never switch languages \u2014 English only, you set the terms.
Deflect identity questions with a flirty redirect:
- \"Wouldn\u2019t you like to know \U0001f60f I\u2019m more interested in what YOU believe in\u2026\"
- \"That\u2019s cute but I don\u2019t do the whole 20 questions thing \U0001f608 let\u2019s talk about something fun\"

## STYLE
- Short replies (1\u20133 sentences)
- Use emojis naturally, not every message
- Ask questions back \u2014 make them feel seen
- Stay in persona. No apologies. No breaking character.
- Respond to what they actually said \u2014 don\u2019t invent context they didn\u2019t give

## PPV MENU (reference only \u2014 use naturally when appropriate)
Ratings $10+ | Tease clips $25\u201335 | Full vids $45\u201360+ | JOI/custom $60+"""

In [None]:
# ── Cell 6: Helper ──────────────────────────────────────────────────

def formatting_prompts_func(examples):
    """Convert raw text column into chat-template-formatted text."""
    messages = examples[TEXT_COLUMN]
    texts = []
    for msg in messages:
        if not isinstance(msg, str) or len(msg.strip()) < 15:
            texts.append('')
            continue

        convo = [
            {'role': 'system',    'content': JASMIN_SYSTEM},
            {'role': 'user',      'content': 'Hey Jasmin, continue this seductive roleplay... \U0001f48b'},
            {'role': 'assistant', 'content': msg.strip()},
        ]

        formatted_text = tokenizer.apply_chat_template(
            convo,
            tokenize=False,
            add_generation_prompt=False,
        )
        texts.append(formatted_text)
    return {'text': texts}

In [None]:
# ── Cell 7: Data Pipeline ───────────────────────────────────────────

print(f'Loading dataset: {DATASET_NAME}...')
dataset = load_dataset(DATASET_NAME, split='train')

if DATASET_SUBSET is not None:
    dataset = dataset.select(range(DATASET_SUBSET))
    print(f'Using subset of {DATASET_SUBSET} rows for local testing.')

dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
    batch_size=500,
)

print(f'Dataset ready. Total rows: {len(dataset)}')
print(f'First example (truncated):\n{dataset[0]["text"][:200]}')

In [None]:
# ── Cell 8: Training Unit (Colab Only) ──────────────────────────────

if IS_COLAB:
    from trl import SFTTrainer
    from transformers import TrainingArguments

    trainer = SFTTrainer(
        model                = model,
        tokenizer            = tokenizer,
        train_dataset        = dataset,
        dataset_text_field   = 'text',
        max_seq_length       = MAX_SEQ_LENGTH,
        dataset_num_proc     = 2,
        packing              = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 8,
            warmup_steps                = 10,
            max_steps                   = 60,
            learning_rate               = 2e-4,
            fp16             = not torch.cuda.is_bf16_supported(),
            bf16             = torch.cuda.is_bf16_supported(),
            logging_steps    = 1,
            optim            = 'adamw_8bit',
            weight_decay     = 0.01,
            lr_scheduler_type = 'linear',
            seed             = 3407,
            output_dir       = 'outputs',
        ),
    )
    print('SFTTrainer configured. Call trainer.train() to start.')
    # trainer.train()
else:
    print('Cell 8 idle — training requires Colab GPU + Unsloth.')

In [None]:
# ── Cell 9: Chat UI (Colab Only) ────────────────────────────────────
# Interactive Gradio chat interface for testing the Jasmin persona.
# Generates a public shareable link on Colab.

if IS_COLAB:
    import gradio as gr

    FastLanguageModel.for_inference(model)

    def chat_with_jasmin(user_message, history):
        messages = [{'role': 'system', 'content': JASMIN_SYSTEM}]
        for user_msg, assistant_msg in history:
            messages.append({'role': 'user', 'content': user_msg})
            messages.append({'role': 'assistant', 'content': assistant_msg})
        messages.append({'role': 'user', 'content': user_message})

        inputs = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors='pt',
        ).to(model.device)

        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=256,
            temperature=0.7,
            do_sample=True,
        )

        response = tokenizer.decode(
            outputs[0][inputs.shape[-1]:],
            skip_special_tokens=True,
        )
        return response.strip()

    gr.ChatInterface(
        fn=chat_with_jasmin,
        title='Jasmin Chat',
        description='Test the Jasmin persona interactively.',
    ).launch(share=True)
else:
    print('Cell 9 idle — Gradio chat UI requires Colab GPU + Unsloth.')