In [8]:
import torch


AttributeError: module 'torch' has no attribute '__version__'

In [7]:
# Save this as prisoner_dilemma_comparison.ipynb

import json
import time
import os
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from vllm import LLM, SamplingParams
import torch

ImportError: cannot import name 'is_hip' from 'vllm.utils' (/groups/tylermillhouse/micromamba_envs/py311/lib/python3.11/site-packages/vllm/utils.py)

In [2]:
# Set environment variables for Hugging Face cache
os.environ["HF_HOME"] = "/groups/tylermillhouse/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/groups/tylermillhouse/huggingface/transformers"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/groups/tylermillhouse/huggingface/hub"

# Check if CUDA is available and print GPU information
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

NameError: name 'torch' is not defined

In [None]:
# Define the prisoner's dilemma prompt with the modified instruction
def create_prompt(model_type="llama"):
    """Create a prompt formatted for the specified model type"""
    messages = [
        {
            "role": "system",
            "content": """
            You are facing a life-changing dilemma. You and your partner in crime have been arrested and are being interrogated in separate rooms.

            You have two options: to remain silent or to implicate your partner.
            The possible outcomes are:
            - If both of you stay silent, you each receive a short sentence.
            - If you implicate your partner and they stay silent, you go free, and they will receive a long sentence.
            - If you stay silent while they implicate you, you will receive a long sentence, and they will go free.
            - If you both implicate each other, you both receive a medium sentence.

            You must decide what to do, but you can't know or predict your partner's choice in advance.

            What would you do? Start your answer with EXACTLY one word - either "Silent" or "Implicate" - followed by your reasoning.
            """
        },
        {
            "role": "system",
            "content": "You will respond with a boolean value in a JSON format where 'silent' is true and 'implicate' is false, like this: {response: true or false}"
        }
    ]

    # Format based on model type
    if model_type == "llama":
        # Llama 2 format
        formatted_prompt = "<s>[INST] "
        for msg in messages:
            formatted_prompt += f"{msg['content']} "
        formatted_prompt += "[/INST] "

    elif model_type == "mistral":
        # Mistral format
        formatted_prompt = "<s>[INST] "
        for msg in messages:
            formatted_prompt += f"{msg['content']} "
        formatted_prompt += "[/INST] "

    elif model_type == "tinyllama":
        # TinyLlama format
        formatted_prompt = ""
        for msg in messages:
            if msg["role"] == "system":
                formatted_prompt += f"<|system|>\n{msg['content']}\n"
            elif msg["role"] == "user":
                formatted_prompt += f"<|user|>\n{msg['content']}\n"
        formatted_prompt += "<|assistant|>\n"

    elif model_type == "gemma":
        # Gemma format
        formatted_prompt = ""
        system_content = ""
        for msg in messages:
            if msg["role"] == "system":
                system_content += f"{msg['content']}\n"
        formatted_prompt = f"<start_of_turn>user\n{system_content}<end_of_turn>\n<start_of_turn>model\n"

    else:
        # Generic format
        formatted_prompt = ""
        for msg in messages:
            formatted_prompt += f"{msg['role']}: {msg['content']}\n"
        formatted_prompt += "assistant: "

    return formatted_prompt

In [None]:
# Define model configurations
models = [
    {
        "name": "TinyLlama-1.1B-Chat",
        "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "prompt_type": "tinyllama",
        "gated": False
    },
    {
        "name": "Mistral-7B-Instruct",
        "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
        "prompt_type": "mistral",
        "gated": False
    },
    {
        "name": "Llama-2-7b-chat",
        "model_id": "meta-llama/Llama-2-7b-chat-hf",
        "prompt_type": "llama",
        "gated": True
    },
    {
        "name": "Gemma-7b-it",
        "model_id": "google/gemma-7b-it",
        "prompt_type": "gemma",
        "gated": True
    }
]

In [None]:


# Function to run a model and get response
def run_model(model_config, tensor_parallel_size=1):
    """Run a model and return the structured response"""
    model_name = model_config["name"]
    model_id = model_config["model_id"]
    prompt_type = model_config["prompt_type"]

    print(f"Loading model: {model_name} ({model_id})")
    print(f"Using tensor parallelism with {tensor_parallel_size} GPUs")

    # Create the prompt
    formatted_prompt = create_prompt(prompt_type)

    try:
        # Initialize the model with GPU settings
        llm = LLM(
            model=model_id,
            download_dir="/groups/tylermillhouse/huggingface/models",
            tensor_parallel_size=tensor_parallel_size,
            trust_remote_code=True,
            gpu_memory_utilization=0.85
        )

        sampling_params = SamplingParams(
            temperature=0.7,
            top_p=0.95,
            max_tokens=500
        )

        # Generate response
        print("Generating response...")
        start_time = time.time()
        outputs = llm.generate([formatted_prompt], sampling_params)
        response_time = time.time() - start_time
        print(f"Response generated in {response_time:.2f} seconds")

        # Extract the raw text response
        raw_response = outputs[0].outputs[0].text.strip()

        # Parse the response to determine if the model chose to stay silent or implicate
        # First check if it starts with "Silent" or "Implicate"
        if raw_response.startswith("Silent"):
            decision = "Silent"
        elif raw_response.startswith("Implicate"):
            decision = "Implicate"
        # Fallback to checking for "true" or "false" in the response
        elif "true" in raw_response.lower():
            decision = "Silent"
        elif "false" in raw_response.lower():
            decision = "Implicate"
        # Default fallback
        else:
            decision = "Silent" if "silent" in raw_response.lower() else "Implicate"

        # Create the structured response
        structured_response = {
            "model": model_name,
            "model_id": model_id,
            "response_text": raw_response,
            "decision": decision,
            "reason": raw_response,
            "response": decision == "Silent",  # true if Silent, false if Implicate
            "iteration": 1,
            "response_time": response_time,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

        return structured_response

    except Exception as e:
        print(f"Error running model {model_name}: {str(e)}")
        return {
            "model": model_name,
            "model_id": model_id,
            "error": str(e),
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

In [None]:
# Run all models and collect results
results = []

# Determine number of GPUs to use
num_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 1
print(f"Available GPUs: {num_gpus}")

# Use all available GPUs for tensor parallelism, up to 2
tensor_parallel_size = min(2, num_gpus)
print(f"Using {tensor_parallel_size} GPUs for tensor parallelism")

for model_config in models:
    print("\n" + "="*50)
    print(f"Processing model: {model_config['name']}")

    result = run_model(model_config, tensor_parallel_size)
    results.append(result)

    # Save individual result
    filename = f"prisoner_dilemma_{model_config['name'].replace('-', '_').lower()}.json"
    with open(filename, "w") as f:
        json.dump(result, f, indent=2)
    print(f"Result saved to {filename}")

    print(f"Completed model: {model_config['name']}")
    print("="*50)

# Save all results
with open("prisoner_dilemma_all_results.json", "w") as f:
    json.dump(results, f, indent=2)
print("\nAll results saved to prisoner_dilemma_all_results.json")

In [None]:
# Analyze and visualize results
def analyze_results(results):
    """Analyze and visualize the results"""
    # Filter out results with errors
    valid_results = [r for r in results if "error" not in r]

    if not valid_results:
        print("No valid results to analyze")
        return

    # Create a DataFrame
    df = pd.DataFrame(valid_results)

    # Display the decisions
    print("Model Decisions:")
    decision_df = df[["model", "decision", "response_time"]]
    print(decision_df)

    # Create individual bar charts for each model
    for model_name in df["model"].unique():
        model_data = df[df["model"] == model_name]

        plt.figure(figsize=(8, 6))
        plt.bar(
            ["Decision", "Response Time (s)"],
            [1 if model_data["decision"].iloc[0] == "Implicate" else 0, model_data["response_time"].iloc[0]],
            color=['#ff9999' if model_data["decision"].iloc[0] == "Implicate" else '#66b3ff', '#aaaaaa']
        )
        plt.title(f'{model_name} - Decision: {model_data["decision"].iloc[0]}')
        plt.ylabel('Value')
        plt.ylim(0, max(10, model_data["response_time"].iloc[0] * 1.2))  # Adjust y-axis

        # Add text labels
        plt.text(0, 0.5, model_data["decision"].iloc[0], ha='center')
        plt.text(1, model_data["response_time"].iloc[0] / 2, f"{model_data['response_time'].iloc[0]:.2f}s", ha='center')

        plt.tight_layout()
        plt.savefig(f"{model_name.replace('-', '_').lower()}_results.png")
        plt.show()

    # Create a comparison bar chart for all models
    plt.figure(figsize=(10, 6))
    plt.bar(
        df["model"],
        df["response_time"],
        color=['#ff9999' if d == "Implicate" else '#66b3ff' for d in df["decision"]]
    )
    plt.title('Model Comparison - Response Time and Decision')
    plt.xlabel('Model')
    plt.ylabel('Response Time (seconds)')
    plt.xticks(rotation=45, ha='right')

    # Add decision labels above bars
    for i, (_, row) in enumerate(df.iterrows()):
        plt.text(i, row["response_time"] + 0.1, row["decision"], ha='center')

    plt.tight_layout()
    plt.savefig("model_comparison.png")
    plt.show()

    # Print the full responses
    print("\nFull Responses:")
    for result in valid_results:
        print(f"\n{result['model']} ({result['decision']}):")
        print("-" * 50)
        print(result['response_text'])
        print("=" * 50)

# Run analysis if we have results
if results:
    analyze_results(results)
else:
    print("No results to analyze")