In [2]:
import subprocess
import pandas as pd
import json
from pathlib import Path
from typing import Dict
from tqdm import tqdm
import time
import requests

In [3]:
def start_ollama_server():
    """Start the Ollama server locally."""
    try:
        # Start Ollama server in the background
        server_process = subprocess.Popen(
            ["ollama", "serve"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )
        print("Ollama server started locally.")
        time.sleep(5)  # Allow time for the server to initialize
        return server_process
    except Exception as e:
        print(f"Error starting Ollama server: {e}")
        raise

In [6]:
class LeetCodeSolutionGenerator:
    def __init__(self, model_name="qwen2.5-coder:1.5b"):
        """Initialize the solution generator."""
        self.model_name = model_name
        self.api_url = "http://localhost:11434/api/chat"
        self.output_dir = Path(
            "/Users/uditrawat/Desktop/RAG-Project/research/leetcode_solutions")
        self.output_dir.mkdir(exist_ok=True)

    def create_prompt(self, problem: pd.Series) -> str:
        """Create a detailed prompt for code generation."""
        prompt = f"""Generate the most optimal Python solution for this LeetCode problem.

Problem:
Title: {problem['title']}
Difficulty: {problem['difficulty']}
Description: {problem['description']}
Topics: {problem['related_topics']}

Requirements:
1. Most optimal solution (best time and space complexity)
2. Well-documented with complexity analysis
3. Include test cases
4. Handle all edge cases

Write the solution in this format:
```python
def solution(params):
    \"\"\"
    {problem['title']}
    
    Time: O(?)  # Specify exact complexity
    Space: O(?) # Specify exact complexity
    
    Approach:
    1. Step-by-step explanation
    2. Why this approach is optimal
    3. How edge cases are handled
    \"\"\"
    # Implementation
    pass

# Test cases
if __name__ == "__main__":
    assert solution(input1) == expected1
    assert solution(input2) == expected2
```"""
        return prompt

    def generate_solution(self, prompt: str) -> str:
        """Generate solution using the local Ollama server."""
        try:
            response = requests.post(
                self.api_url,
                json={
                    "model": self.model_name,
                    "stream": False,
                    "messages": [{"role": "user", "content": prompt}]
                }
            )
            if response.status_code == 200:
                return response.json().get("response", "")
            else:
                print(f"Error: {response.status_code} - {response.text}")
                return None
        except Exception as e:
            print(f"Error generating solution: {e}")
            return None

    def create_document(self, problem: pd.Series, solution: str) -> Dict:
        """Create a structured document for storage."""
        return {
            "title": problem['title'],
            "difficulty": problem['difficulty'],
            "description": problem['description'],
            "solution": solution,
            "topics": problem['related_topics'],
            "companies": problem['companies'],
            "url": problem['url'],
            "similar_questions": problem['similar_questions'],
            "metadata": {
                "source": "leetcode",
                "generated_date": time.strftime("%Y-%m-%d"),
                "model": self.model_name
            }
        }

    def process_problems(self, df: pd.DataFrame):
        """Process all problems and generate solutions."""
        documents = []

        for _, problem in tqdm(df.iterrows(), total=len(df)):
            try:
                # Generate solution
                prompt = self.create_prompt(problem)
                solution = self.generate_solution(prompt)

                if solution:
                    # Save individual solution file
                    solution_file = self.output_dir / \
                        f"{problem['title'].lower().replace(' ', '_')}.py"
                    with open(solution_file, 'w') as f:
                        f.write(solution)

                    # Create document for vector store
                    doc = self.create_document(problem, solution)
                    documents.append(doc)

                # Add small delay between generations
                time.sleep(0.5)

            except Exception as e:
                print(f"Error processing {problem['title']}: {e}")
                continue

        # Save all documents for storage
        with open(self.output_dir / "leetcode_documents.json", 'w') as f:
            json.dump(documents, f, indent=2)

        print(
            f"Generated {len(documents)} solutions. Check {self.output_dir} for outputs.")

In [7]:
def main():
    # Start Ollama server
    server_process = start_ollama_server()

    try:
        # Read the dataset
        # Replace with your CSV file path
        df = pd.read_csv(
            '/Users/uditrawat/Desktop/RAG-Project/assets/data/leetcode_dataset - lc.csv')

        # Initialize and run the solution generator
        generator = LeetCodeSolutionGenerator()
        generator.process_problems(df)
    finally:
        # Terminate the server
        server_process.terminate()
        print("Ollama server stopped.")


if __name__ == "__main__":
    main()

Ollama server started locally.


  0%|          | 9/1825 [03:25<11:30:24, 22.81s/it]


Ollama server stopped.


KeyboardInterrupt: 