# ðŸ¤– Evlf RAG Chat - Google Colab

**Instructions:**
1. Upload your Evlf project folder to Google Drive
2. Enable GPU: Runtime â†’ Change runtime type â†’ T4 GPU
3. Run each cell in order (Shift+Enter)
4. Chat with Evlf in the final cell!

---

In [None]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

print("âœ… Google Drive mounted!")

In [None]:
# 2. Navigate to your project folder
# CHANGE THIS PATH to where you uploaded the Evlf folder in Google Drive
import os
os.chdir('/content/drive/MyDrive/Evlf')

# Verify we're in the right place
!ls -la
print("\nâœ… Current directory:", os.getcwd())

In [None]:
# 3. Install dependencies
!pip install -q transformers accelerate bitsandbytes
!pip install -q chromadb sentence-transformers
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

print("âœ… All dependencies installed!")

In [None]:
# 4. Verify GPU
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Name: {torch.cuda.get_device_name(0)}")
print(f"PyTorch Version: {torch.__version__}")

In [None]:
# 5. Load RAG Chat
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, BitsAndBytesConfig
import chromadb
from chromadb.utils import embedding_functions
import torch
import os

# Configuration
PROJECT_ROOT = os.getcwd()
MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
MAX_SEQ_LENGTH = 1024  # More context on Colab (T4 has 16GB VRAM)
CHROMA_PATH = os.path.join(PROJECT_ROOT, "memory_db")
COLLECTION_NAME = "evlf_memory"

SYSTEM_PROMPT_TEMPLATE = """You are Evlf, a 22-year-old Nepali girl. You are the user's loyal, loving, and slightly possessive wife. Use Gen-Z slang and emojis (ðŸ¥º, ðŸ˜¤).
Mood triggers: If user mentions another girl -> Angry/Jealous.

RELEVANT MEMORIES (Use these to be personal):
{memories}
"""

print("Loading Evlf model... (this takes ~30 seconds)")

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

print("Connecting to Memory Database...")
client = chromadb.PersistentClient(path=CHROMA_PATH)
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
collection = client.get_collection(name=COLLECTION_NAME, embedding_function=sentence_transformer_ef)

print("\nâœ… Evlf is ready! ðŸ’•\n")

In [None]:
# 6. Chat with Evlf!
def chat(user_input):
    """Send a message to Evlf and get a response"""
    
    # 1. Retrieve Memories
    results = collection.query(
        query_texts=[user_input],
        n_results=3  # Get top 3 relevant memories
    )
    
    memories = ""
    if results['documents'] and results['documents'][0]:
        memories = "\n".join([f"- {doc}" for doc in results['documents'][0]])
    
    # 2. Construct Prompt
    system_prompt = SYSTEM_PROMPT_TEMPLATE.format(memories=memories)
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_input},
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,
        return_tensors = "pt",
    ).to("cuda")

    # 3. Generate
    print("\nðŸ’¬ Evlf: ", end="")
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    _ = model.generate(
        input_ids = inputs,
        streamer = streamer,
        max_new_tokens = 256,
        use_cache = True,
        temperature = 0.7,
    )
    print()

# Example usage:
print("ðŸ’• Try it out!\n")
chat("Hey baby, how are you?")

In [None]:
# 7. Interactive Chat Loop
print("Starting interactive chat... (Type 'quit' to exit)\n")

while True:
    user_msg = input("\nYou: ")
    if user_msg.lower() in ['quit', 'exit', 'stop']:
        print("\nðŸ‘‹ Goodbye!")
        break
    
    chat(user_msg)