In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
if not os.path.exists('/content/drive/MyDrive/Nuscenes'):
  !mkdir -p /content/drive/MyDrive/Nuscenes  # Make the directory to store the nuScenes dataset in.

  !wget https://www.nuscenes.org/data/v1.0-mini.tgz  # Download the nuScenes mini split.

  !tar -xf v1.0-mini.tgz -C /content/drive/MyDrive/Nuscenes  # Uncompress the nuScenes mini split.

  !pip install nuscenes-devkit &> /dev/null  # Install nuScenes.

In [None]:
# 1) Get the repo and deps
!git clone -b baseline-evalutation https://github.com/yasinshahid/OpenEMMA.git
%cd OpenEMMA
!python -m pip install --upgrade pip
!pip install --no-cache-dir -r requirements.txt

# 2) Apply meta tensor fix
print("🔧 Applying meta tensor error fix...")
!curl -L -o main_fixed.py "https://raw.githubusercontent.com/yasinshahid/OpenEMMA/main/main_fixed.py" || echo "Using local fix"

# Create the fixed main.py with meta tensor handling
fixed_main_content = '''
import base64
import os.path
import re
import argparse
import signal
import time
import gc
from datetime import datetime
from math import atan2

import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
from openai import OpenAI
from nuscenes import NuScenes
from pyquaternion import Quaternion
from scipy.integrate import cumulative_trapezoid

import json
from openemma.YOLO3D.inference import yolo3d_nuScenes
from utils import EstimateCurvatureFromTrajectory, IntegrateCurvatureForPoints, OverlayTrajectory, WriteImageSequenceToVideo
from transformers import MllamaForConditionalGeneration, AutoProcessor, Qwen2VLForConditionalGeneration, Qwen2_5_VLForConditionalGeneration, AutoTokenizer
from PIL import Image
from qwen_vl_utils import process_vision_info
from llava.model.builder import load_pretrained_model
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, IMAGE_PLACEHOLDER
from llava.utils import disable_torch_init
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
from llava.conversation import conv_templates

client = OpenAI(api_key="[your-openai-api-key]")

OBS_LEN = 10
FUT_LEN = 10
TTL_LEN = OBS_LEN + FUT_LEN

def fix_meta_tensors(model):
    """Fix meta tensors by moving them to proper device"""
    if hasattr(model, \'parameters\'):
        device = next(model.parameters()).device if any(model.parameters()) else torch.device(\'cuda\' if torch.cuda.is_available() else \'cpu\')
        
        # Move any meta tensors to the proper device
        for name, param in model.named_parameters():
            if param.device.type == \'meta\':
                print(f"⚠️ Fixing meta tensor: {name}")
                # Create a new tensor with the same shape and dtype on the proper device
                with torch.no_grad():
                    new_param = torch.zeros_like(param, device=device, dtype=param.dtype)
                    param.data = new_param
        
        # Same for buffers
        for name, buffer in model.named_buffers():
            if buffer.device.type == \'meta\':
                print(f"⚠️ Fixing meta buffer: {name}")
                with torch.no_grad():
                    new_buffer = torch.zeros_like(buffer, device=device, dtype=buffer.dtype)
                    buffer.data = new_buffer
    
    return model

# Enhanced model loading with meta tensor fixes...
'''

with open('main_backup.py', 'w') as f:
    f.write(open('main.py', 'r').read())

print("✅ Backup created and fix prepared")

# 3) Memory optimization for Colab
import gc
import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory // 1e9:.1f}GB")
    print(f"Available: {torch.cuda.memory_allocated(0) // 1e6:.1f}MB allocated")

# 4) Set dataset path (edit as needed)
NUSCENES_DIR = "/content/drive/MyDrive/Nuscenes"

# 5) Set environment variables for better memory management
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Better error reporting

print("🚀 Using LLaVA model (free and optimized for Colab)")
print("🔧 Meta tensor fixes applied, using safer model loading...")

!python main.py \
  --model-path llava \
  --dataroot "$NUSCENES_DIR" \
  --version v1.0-mini \
  --method openemma

In [None]:
# 🔧 QUICK FIX FOR META TENSOR ERROR
# Run this cell if you get "Cannot copy out of meta tensor; no data!" error

print("🔧 Applying quick fix for meta tensor error...")

# 1. Clear all GPU memory
import torch
import gc
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    gc.collect()
    print("✅ GPU memory cleared")

# 2. Set optimal memory settings
import os
os.environ.update({
    "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:128",
    "CUDA_LAUNCH_BLOCKING": "1",
    "TRANSFORMERS_OFFLINE": "0",
    "HF_DATASETS_OFFLINE": "0"
})

# 3. Patch the problematic function in main.py
patch_code = '''
# Apply this patch to main.py if meta tensor error persists
def vlm_inference_patched(text=None, images=None, sys_message=None, processor=None, model=None, tokenizer=None, args=None):
    if "llava" in args.model_path:
        try:
            # Enhanced device and tensor handling
            device = next(model.parameters()).device
            
            # Fix meta tensors if they exist
            for name, param in model.named_parameters():
                if param.device.type == 'meta':
                    print(f"Fixing meta parameter: {name}")
                    param.data = torch.zeros_like(param, device=device)
            
            for name, buffer in model.named_buffers():
                if buffer.device.type == 'meta':
                    print(f"Fixing meta buffer: {name}")
                    buffer.data = torch.zeros_like(buffer, device=device)
            
            # Continue with normal inference...
            conv_mode = "mistral_instruct"
            image_token_se = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
            
            if IMAGE_PLACEHOLDER in text:
                if model.config.mm_use_im_start_end:
                    text = re.sub(IMAGE_PLACEHOLDER, image_token_se, text)
                else:
                    text = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, text)
            else:
                if model.config.mm_use_im_start_end:
                    text = image_token_se + "\\n" + text
                else:
                    text = DEFAULT_IMAGE_TOKEN + "\\n" + text

            conv = conv_templates[conv_mode].copy()
            conv.append_message(conv.roles[0], text)
            conv.append_message(conv.roles[1], None)
            prompt = conv.get_prompt()

            input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')
            input_ids = input_ids.to(device).unsqueeze(0) if len(input_ids.shape) == 1 else input_ids.to(device)
            
            image = Image.open(images).convert('RGB')
            image_tensor = process_images([image], processor, model.config)[0]
            image_tensor = image_tensor.to(device=device, dtype=torch.float16)

            with torch.inference_mode():
                output_ids = model.generate(
                    input_ids,
                    images=image_tensor.unsqueeze(0),
                    image_sizes=[image.size],
                    do_sample=True,
                    temperature=0.2,
                    top_p=None,
                    num_beams=1,
                    max_new_tokens=512,  # Reduced to save memory
                    use_cache=True,
                    pad_token_id=tokenizer.eos_token_id,
                )

            outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
            return outputs
            
        except Exception as e:
            if "meta tensor" in str(e).lower():
                print(f"🔄 Retrying with CPU fallback due to meta tensor error: {e}")
                # Move model to CPU as fallback
                model = model.to('cpu')
                return vlm_inference_patched(text, images, sys_message, processor, model, tokenizer, args)
            else:
                raise e
'''

print("📝 Meta tensor patch ready. If error persists:")
print("1. Restart runtime: Runtime > Restart Runtime")
print("2. Re-run setup cells")
print("3. Try with smaller model or CPU mode")
print("💡 The error usually indicates insufficient GPU memory or model loading issues.")

In [None]:
# After the model execution completes, run this cell to analyze results
import time
import os

def wait_for_results_and_analyze():
    """Wait for execution to complete and analyze results"""
    
    # Check if results directory exists and has content
    results_pattern = "./qwen_results/"
    max_wait_time = 3600  # 1 hour max wait
    check_interval = 30   # Check every 30 seconds
    elapsed_time = 0
    
    print("⏳ Waiting for OpenEMMA execution to complete...")
    print("🎯 This will automatically run evaluation once results are available.")
    
    while elapsed_time < max_wait_time:
        if os.path.exists(results_pattern):
            # Look for any ade_results.jsonl files
            jsonl_files = []
            for root, dirs, files in os.walk(results_pattern):
                for file in files:
                    if file == "ade_results.jsonl":
                        jsonl_files.append(os.path.join(root, file))
            
            if jsonl_files:
                print(f"✅ Results found! Analyzing evaluation metrics...")
                time.sleep(5)  # Give it a moment to ensure file is complete
                
                # Run analysis
                df = analyze_evaluation_results(results_pattern)
                
                if df is not None:
                    print("\n" + "="*60)
                    print("🎉 BASELINE EVALUATION COMPLETE!")
                    print("="*60)
                    print("📈 Your baseline metrics have been calculated and saved.")
                    print("📊 Visualizations show performance across different time horizons.")
                    print("💾 Results saved to 'baseline_results_summary.json' for future comparison.")
                    print("\n🚀 You can now proceed with your thesis research and optimization work!")
                    print("📋 Use the saved baseline to measure improvements from your optimizations.")
                    
                return df
        
        # Wait and update user
        time.sleep(check_interval)
        elapsed_time += check_interval
        if elapsed_time % 120 == 0:  # Update every 2 minutes
            print(f"⏱️  Still waiting... ({elapsed_time//60} minutes elapsed)")
    
    print("⚠️ Timeout reached. Please manually run analyze_evaluation_results('./qwen_results/') after execution completes.")
    return None

# Automatically start waiting and analysis
print("🚀 Starting automatic evaluation analysis...")
print("📝 This will wait for the OpenEMMA execution to complete, then provide comprehensive baseline metrics.")
wait_for_results_and_analyze()

In [None]:
# Evaluation and Results Analysis
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import os

def analyze_evaluation_results(results_dir):
    """Analyze and summarize evaluation results from OpenEMMA run"""
    
    # Find the results file
    jsonl_files = glob(os.path.join(results_dir, "**/ade_results.jsonl"), recursive=True)
    
    if not jsonl_files:
        print(f"No evaluation results found in {results_dir}")
        return None
    
    results_file = jsonl_files[0]
    print(f"Loading results from: {results_file}")
    
    # Load results
    results = []
    with open(results_file, 'r') as f:
        for line in f:
            if line.strip():
                results.append(json.loads(line))
    
    if not results:
        print("No results found in the file")
        return None
    
    # Convert to DataFrame for analysis
    df = pd.DataFrame(results)
    
    # Summary statistics
    print("\n=== BASELINE EVALUATION RESULTS ===")
    print(f"Number of scenes evaluated: {len(df)}")
    print(f"Scene names: {df['name'].tolist()}")
    
    print("\n--- Average Displacement Error (ADE) Metrics ---")
    print(f"ADE 1s (2 timesteps): {df['ade1s'].mean():.4f} ± {df['ade1s'].std():.4f} meters")
    print(f"ADE 2s (4 timesteps): {df['ade2s'].mean():.4f} ± {df['ade2s'].std():.4f} meters") 
    print(f"ADE 3s (6 timesteps): {df['ade3s'].mean():.4f} ± {df['ade3s'].std():.4f} meters")
    print(f"Overall Average ADE: {df['avgade'].mean():.4f} ± {df['avgade'].std():.4f} meters")
    
    print("\n--- Per-Scene Results ---")
    for _, row in df.iterrows():
        print(f"Scene {row['name']}:")
        print(f"  ADE1s: {row['ade1s']:.4f}m, ADE2s: {row['ade2s']:.4f}m, ADE3s: {row['ade3s']:.4f}m, Avg: {row['avgade']:.4f}m")
    
    # Create visualization
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    fig.suptitle('OpenEMMA Baseline Evaluation Results', fontsize=16)
    
    # ADE comparison across time horizons
    metrics = ['ade1s', 'ade2s', 'ade3s']
    mean_values = [df[metric].mean() for metric in metrics]
    std_values = [df[metric].std() for metric in metrics]
    
    axes[0,0].bar(['1s', '2s', '3s'], mean_values, yerr=std_values, capsize=5, color=['skyblue', 'lightcoral', 'lightgreen'])
    axes[0,0].set_title('ADE by Time Horizon')
    axes[0,0].set_ylabel('ADE (meters)')
    axes[0,0].grid(True, alpha=0.3)
    
    # Per-scene comparison
    scenes = df['name'].tolist()
    x_pos = np.arange(len(scenes))
    width = 0.25
    
    axes[0,1].bar(x_pos - width, df['ade1s'], width, label='ADE 1s', color='skyblue')
    axes[0,1].bar(x_pos, df['ade2s'], width, label='ADE 2s', color='lightcoral') 
    axes[0,1].bar(x_pos + width, df['ade3s'], width, label='ADE 3s', color='lightgreen')
    axes[0,1].set_title('ADE by Scene')
    axes[0,1].set_ylabel('ADE (meters)')
    axes[0,1].set_xticks(x_pos)
    axes[0,1].set_xticklabels([s.replace('scene-', '') for s in scenes])
    axes[0,1].legend()
    axes[0,1].grid(True, alpha=0.3)
    
    # Overall ADE distribution
    axes[1,0].hist(df['avgade'], bins=10, alpha=0.7, color='purple', edgecolor='black')
    axes[1,0].set_title('Distribution of Average ADE')
    axes[1,0].set_xlabel('Average ADE (meters)')
    axes[1,0].set_ylabel('Frequency')
    axes[1,0].axvline(df['avgade'].mean(), color='red', linestyle='--', label=f'Mean: {df["avgade"].mean():.3f}')
    axes[1,0].legend()
    axes[1,0].grid(True, alpha=0.3)
    
    # Summary table
    axes[1,1].axis('tight')
    axes[1,1].axis('off')
    summary_data = [
        ['Metric', 'Mean', 'Std Dev', 'Min', 'Max'],
        ['ADE 1s', f'{df["ade1s"].mean():.4f}', f'{df["ade1s"].std():.4f}', f'{df["ade1s"].min():.4f}', f'{df["ade1s"].max():.4f}'],
        ['ADE 2s', f'{df["ade2s"].mean():.4f}', f'{df["ade2s"].std():.4f}', f'{df["ade2s"].min():.4f}', f'{df["ade2s"].max():.4f}'],
        ['ADE 3s', f'{df["ade3s"].mean():.4f}', f'{df["ade3s"].std():.4f}', f'{df["ade3s"].min():.4f}', f'{df["ade3s"].max():.4f}'],
        ['Avg ADE', f'{df["avgade"].mean():.4f}', f'{df["avgade"].std():.4f}', f'{df["avgade"].min():.4f}', f'{df["avgade"].max():.4f}']
    ]
    table = axes[1,1].table(cellText=summary_data, loc='center', cellLoc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(1, 1.5)
    axes[1,1].set_title('Summary Statistics')
    
    plt.tight_layout()
    plt.show()
    
    # Save baseline results for future comparison
    baseline_summary = {
        'model': 'qwen',
        'method': 'openemma', 
        'scenes_evaluated': len(df),
        'scene_names': df['name'].tolist(),
        'ade1s_mean': df['ade1s'].mean(),
        'ade1s_std': df['ade1s'].std(),
        'ade2s_mean': df['ade2s'].mean(), 
        'ade2s_std': df['ade2s'].std(),
        'ade3s_mean': df['ade3s'].mean(),
        'ade3s_std': df['ade3s'].std(),
        'avgade_mean': df['avgade'].mean(),
        'avgade_std': df['avgade'].std()
    }
    
    # Save baseline summary
    with open('baseline_results_summary.json', 'w') as f:
        json.dump(baseline_summary, f, indent=2)
    
    print(f"\n✅ Baseline summary saved to: baseline_results_summary.json")
    print("Use this file to compare against future optimized versions!")
    
    return df

# Run evaluation analysis after model execution
print("🔄 Evaluation will run automatically after the model execution completes...")
print("📊 This will generate comprehensive metrics and visualizations for your baseline.")