In [1]:
import os
from llama_cpp import Llama
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import time

In [2]:
# Initialize the LLaMA model
# Note: Replace 'codellama-7b-instruct.Q4_K_M.gguf' with your actual model file
MODEL_PATH = "/models/codellama-7b-instruct.Q4_K_M.gguf"

class CodeAssistant:
    def __init__(self, model_path):
        self.llm = Llama(
            model_path=model_path,
            n_ctx=2048,  # Context window
            n_threads=4,  # Adjust based on your CPU
            n_gpu_layers=0  # Set to higher number if using GPU
        )
        
    def generate_response(self, prompt, max_tokens=512, temperature=0.7):
        """Generate a response from the model"""
        response = self.llm(
            prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stop=["```", "Human:", "\n\n"],
            echo=False
        )
        return response['choices'][0]['text'].strip()
    
    def format_code(self, code):
        """Format code for display"""
        return HTML(f"""
        <div style="background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-family: monospace;">
            <pre>{code}</pre>
        </div>
        """)

In [None]:
# Example prompts for different tasks
EXAMPLE_PROMPTS = {
    "explain_code": """
    Explain what this code does:
    ```python
    {code}
    ```
    """,
    "optimize_code": """
    Optimize this code for better performance:
    ```python
    {code}
    ```
    """,
    "generate_code": """
    Write Python code to {task}
    """
}

# Initialize assistant (run this after placing your model file)
assistant = CodeAssistant(MODEL_PATH)

# Example usage for code generation
task = "create a function that calculates the Fibonacci sequence up to n terms using dynamic programming"
response = assistant.generate_response(
    EXAMPLE_PROMPTS["generate_code"].format(task=task)
)
display(assistant.format_code(response))

# Example for code explanation
code_to_explain = """
def quicksort(arr):
    if len(arr) <= 1:
        return arr
    pivot = arr[len(arr) // 2]
    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]
    return quicksort(left) + middle + quicksort(right)
"""
explanation = assistant.generate_response(
    EXAMPLE_PROMPTS["explain_code"].format(code=code_to_explain)
)
print("Explanation:", explanation)

# Interactive code optimization example
slow_code = """
def find_duplicates(arr):
    duplicates = []
    for i in range(len(arr)):
        for j in range(i + 1, len(arr)):
            if arr[i] == arr[j] and arr[i] not in duplicates:
                duplicates.append(arr[i])
    return duplicates
"""
optimized = assistant.generate_response(
    EXAMPLE_PROMPTS["optimize_code"].format(code=slow_code)
)
display(assistant.format_code(optimized))

In [4]:
# Performance comparison function
def compare_performance(original_code, optimized_code, test_data):
    """Compare execution time of original vs optimized code"""
    # Prepare test environment
    original_locals = {}
    optimized_locals = {}
    
    # Execute both versions
    exec(original_code, globals(), original_locals)
    exec(optimized_code, globals(), optimized_locals)
    
    # Time original version
    start = time.time()
    original_result = original_locals['find_duplicates'](test_data)
    original_time = time.time() - start
    
    # Time optimized version
    start = time.time()
    optimized_result = optimized_locals['find_duplicates'](test_data)
    optimized_time = time.time() - start
    
    return {
        'original_time': original_time,
        'optimized_time': optimized_time,
        'speedup': original_time / optimized_time if optimized_time > 0 else float('inf')
    }

In [9]:
assistant.generate_response('what is 10*10?')

Llama.generate: 6 prefix-match hit, remaining 5 prompt tokens to eval
llama_perf_context_print:        load time =    6279.49 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     5 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   140 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   20278.31 ms /   145 tokens


'",\n                    "answer": "100"\n                },\n                {\n                    "question": "What is 10 x 100?",\n                    "answer": "1000"\n                },\n                {\n                    "question": "What is 100 x 10?",\n                    "answer": "1000"\n                },\n                {\n                    "question": "What is 100 x 100?",\n                    "answer": "10000"\n                },\n            ]\n        }\n    ]\n}'

In [5]:
# Test the performance
test_data = list(range(1000)) * 2  # Create list with duplicates
results = compare_performance(slow_code, optimized, test_data)

# Visualize results
plt.figure(figsize=(10, 5))
plt.bar(['Original', 'Optimized'], 
        [results['original_time'], results['optimized_time']])
plt.title('Performance Comparison')
plt.ylabel('Execution Time (seconds)')
plt.show()

print(f"Speedup factor: {results['speedup']:.2f}x")

SyntaxError: invalid syntax (<string>, line 1)