# üöÄ Laravel RAG LLM - Google Colab Version

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ubaidillahfaris/LLM/blob/main/notebooks/Colab_Quick_Start.ipynb)

Notebook ini di-optimize untuk **Google Colab** dengan auto-detection path.

**Just run all cells! üéâ**

## Step 1: Clone Repository (Skip jika sudah ada)

In [None]:
import os

# Check if already cloned
if not os.path.exists('/content/LLM'):
    print("üì• Cloning repository...")
    !git clone https://github.com/ubaidillahfaris/LLM.git /content/LLM
    print("‚úÖ Repository cloned!")
else:
    print("‚úÖ Repository already exists!")

# Change to project directory
%cd /content/LLM
!ls -la

## Step 2: Install Dependencies

‚è∞ **Wait time**: 2-3 minutes di Colab

In [None]:
# Install dependencies
!pip install -q transformers datasets pandas numpy tqdm

print("‚úÖ All dependencies installed!")

## Step 3: Setup & Import

In [None]:
import sys
import os
import json
import torch

# Auto-detect project root (works in Colab, local, and other environments)
if os.path.exists('/content/LLM'):
    # Google Colab
    project_root = '/content/LLM'
elif os.path.exists('/home/user/LLM'):
    # Local environment
    project_root = '/home/user/LLM'
else:
    # Try to find based on current directory
    current = os.getcwd()
    if 'LLM' in current:
        # Navigate up until we find LLM root
        while os.path.basename(current) != 'LLM' and current != '/':
            current = os.path.dirname(current)
        project_root = current
    else:
        project_root = os.getcwd()

# Add src to path
src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

print("üìÅ Environment Detection:")
print(f"   Project root: {project_root}")
print(f"   Source path: {src_path}")
print(f"   Current dir: {os.getcwd()}")

# Verify structure
required_dirs = ['src', 'data', 'configs']
all_exist = True
for d in required_dirs:
    path = os.path.join(project_root, d)
    exists = os.path.exists(path)
    status = "‚úì" if exists else "‚úó"
    print(f"   {status} {d}/")
    if not exists:
        all_exist = False

if not all_exist:
    print("\n‚ö†Ô∏è  Some directories are missing! Did you clone the repo?")
else:
    print("\n‚úÖ Project structure validated!")

# Import modules
try:
    from config_loader import ConfigLoader
    from retrieval import RAGRetriever
    from model_utils import ModelManager, RAGGenerator
    print("\n‚úÖ All modules imported successfully!")
    
    # Check device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    if device == "cuda":
        gpu_name = torch.cuda.get_device_name(0)
        print(f"üöÄ GPU detected: {gpu_name}")
    else:
        print(f"üíª Running on CPU")
        
except ImportError as e:
    print(f"\n‚ùå Import error: {e}")
    print("\nüîß Try running Step 2 again or restart runtime.")

## Step 4: Load Configuration

In [None]:
# Load config using detected project root
config_path = os.path.join(project_root, 'configs', 'config.json')

print(f"üìã Loading config from: {config_path}")
config = ConfigLoader(config_path=config_path)

print("\n‚úÖ Configuration loaded!")
print(f"   Model: {config.get('model.name')}")
print(f"   Temperature: {config.get('model.temperature')}")
print(f"   Max tokens: {config.get('generation.max_new_tokens')}")

## Step 5: Load GPT-2 Model

‚è∞ **Wait time**: 1-2 minutes (first time download ~500MB)

In [None]:
# Initialize model manager
device = "cuda" if torch.cuda.is_available() else "cpu"

print("üì¶ Initializing model manager...")
model_manager = ModelManager(
    model_name="gpt2",
    device=device
)

print("\nüì• Loading GPT-2 model...")
print("   (This may take 1-2 minutes on first run)")
model_manager.load_model(from_pretrained=True)

print("\n‚úÖ Model loaded and ready!")
print(f"   Device: {device}")
print(f"   Model: {model_manager.model_name}")

## Step 6: Setup RAG System

In [None]:
# Setup knowledge base path
kb_path = os.path.join(project_root, 'data', 'knowledge_base', 'local_db.json')

print(f"üìö Loading knowledge base from:")
print(f"   {kb_path}")

# Initialize retriever
retriever = RAGRetriever(kb_path=kb_path)

print("\nüìñ Knowledge Base Contents:")
retriever.kb.show_all()

# Initialize RAG generator
rag_generator = RAGGenerator(model_manager, retriever)

print("\n‚úÖ RAG system ready!")

## Step 7: Test RAG System üß™

In [None]:
# Test dengan sample query
test_query = "Bagaimana cara install Laravel?"

print(f"‚ùì Test Question: {test_query}\n")
print("ü§ñ Generating answer...\n")

result = rag_generator.generate_with_context(
    query=test_query,
    max_new_tokens=config.get('generation.max_new_tokens', 200),
    temperature=config.get('model.temperature', 0.7)
)

print("=" * 60)
print(f"üìä Confidence: {result['confidence']:.2%}")
print(f"üîç Retrieval Method: {result['method']}")
print(f"\nüí° Answer:\n{result['answer']}")
print("=" * 60)

## Step 8: Interactive Q&A üéØ

Sekarang lu bisa tanya apapun tentang Laravel!

In [None]:
def ask(question: str, show_context: bool = False):
    """
    Ask a Laravel question
    
    Args:
        question: Your Laravel-related question
        show_context: Show retrieved context (default: False)
    """
    print(f"\n{'=' * 60}")
    print(f"‚ùì {question}")
    print(f"{'=' * 60}\n")
    
    result = rag_generator.generate_with_context(
        query=question,
        max_new_tokens=config.get('generation.max_new_tokens', 200),
        temperature=config.get('model.temperature', 0.7)
    )
    
    print(f"üìä Confidence: {result['confidence']:.2%} | Method: {result['method']}")
    
    if show_context and result['context']:
        print(f"\nüìö Retrieved Context:")
        print(f"{result['context'][:200]}...\n")
    
    print(f"\nüí° Answer:\n{result['answer']}\n")
    
    return result

print("‚úÖ Function 'ask()' ready!")
print("\nüìù Usage: ask('Your question here')")
print("   Example: ask('Apa itu Eloquent ORM?')")

In [None]:
# Try different questions!
ask("Apa itu Eloquent ORM?")

In [None]:
ask("Bagaimana cara membuat controller?")

In [None]:
ask("Bagaimana cara membuat middleware?", show_context=True)

In [None]:
# Your custom question here!
ask("YOUR QUESTION HERE")

## Bonus: Explore Dataset üìä

In [None]:
import pandas as pd

# Load raw dataset
dataset_path = os.path.join(project_root, 'data', 'raw', 'laravel_qa_dataset.json')
with open(dataset_path, 'r') as f:
    dataset = json.load(f)

df = pd.DataFrame(dataset)

print("üìä Dataset Statistics:")
print(f"   Total QA pairs: {len(df)}")
print(f"   Categories: {df['category'].nunique()}")
print(f"\nüìà Distribution by category:")
print(df['category'].value_counts())

print(f"\nüìà Distribution by difficulty:")
print(df['difficulty'].value_counts())

# Show dataframe
print("\nüìã Sample Data:")
df[['question', 'category', 'difficulty']].head(10)

## üéä Success!

RAG system sekarang running di Google Colab!

### üöÄ What's Next?

1. **Ask more questions** - Edit cells di atas dengan pertanyaan lu
2. **Add knowledge** - Edit `data/knowledge_base/local_db.json`
3. **Expand dataset** - Tambah QA ke `data/raw/laravel_qa_dataset.json`
4. **Fine-tune model** - Buka `Laravel_RAG_LLM_Complete.ipynb` untuk training

### üìö Resources

- [GitHub Repo](https://github.com/ubaidillahfaris/LLM)
- [Laravel Docs](https://laravel.com/docs)
- [Transformers Docs](https://huggingface.co/docs/transformers)

---

**Happy Coding! üéâ**