In [12]:
# pip install -qU groq python-docx "langchain[groq]"

In [13]:
import os
import json
import re
from groq import Groq
import threading
from queue import Queue

In [14]:
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

In [15]:
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)

repo_scan_report = os.path.join(output_dir, "repo_scan_report.json")
with open(repo_scan_report, "r") as file:
    repo_data = json.load(file)

In [16]:
def call_groq(prompt):
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
        model="llama3-8b-8192",
    )
    return chat_completion.choices[0].message.content

In [17]:
def determine_microservice_splits(repo_data):
    microservices = {}  # Stores file mappings
    file_descriptions = {}  # Stores descriptions of new files

    file_list = [file["name"] for file in repo_data["files"]]  # List of all files for reference

    for file in repo_data["files"]:
        prompt = f"""
        You are an expert software architect analyzing a monolithic codebase to plan its migration to a microservices architecture. 

        ### Task:
        Determine whether the given file should be split into smaller, more modular files as part of a microservices-based system.

        ### Given File:
        - **Name**: {file["name"]}
        - **Path**: {file["path"]}
        - **Content**: {file["content"][:2000]}  # Limiting content to prevent exceeding token limits

        ### Project Context:
        Here is a list of all the files in the repository to provide context:
        {json.dumps(file_list)}

        ### Instructions:
        - **Analyze** the given file and decide whether it needs to be split.
        - **If splitting is needed**, generate new file paths and names that describe their intended purpose.
        - **If the file does NOT need to be split**, return the file in the same path.
        - **Additionally**, provide a short description for each new file, explaining its functionality.
        - **Ensure that the output strictly follows the JSON format below**.

        ### Response Format:
        Your response must be in **valid JSON format**, following this structure:
        ```json
        {{
            "microservice_mapping": {{
                "{file["path"]}": ["new_file_path_with_name.py", "new_file_path_with_name.py"]
            }},
            "file_descriptions": {{
                "new_file_path_with_name.py": "Short description of its functionality",
                "new_file_path_with_name.py": "Another short description"
            }}
        }}
        ```

        ### Rules:
        1. **Only split if necessary** : If the file is already modular, return its original path without modifications.
        2. **Maintain file relationships** : If splitting, ensure logically related functions remain together.
        3. **Use meaningful names** : New filenames should clearly reflect their role in the microservices architecture.
        4. **Provide concise descriptions** : Explain the functionality of each new file in one sentence.
        5. **No explanations or extra text** : Only return valid JSON.

        ### Example Responses:

        #### **Case 1: The file needs to be split**
        ```json
        {{
            "microservice_mapping": {{
                "{file["path"]}": ["services/auth/user_auth.py", "services/auth/token_handler.py"]
            }},
            "file_descriptions": {{
                "services/auth/user_auth.py": "Handles user authentication and session management.",
                "services/auth/token_handler.py": "Manages JWT token generation and validation."
            }}
        }}
        ```

        #### **Case 2: The file does NOT need to be split**
        ```json
        {{
            "microservice_mapping": {{
                "{file["path"]}": ["{file["path"]}"]
            }},
            "file_descriptions": {{
                "{file["path"]}": "This file does not need splitting and remains unchanged."
            }}
        }}
        ```

        Now, analyze the given file and generate the response in the requested format.
        """

        try:
            json_output = call_groq(prompt)
            
            # Extract valid JSON using regex
            json_match = re.search(r"\{.*\}", json_output, re.DOTALL)
            if not json_match:
                raise ValueError("No valid JSON found in the response.")

            json_str = json_match.group(0)
            response_data = json.loads(json_str)

            # Update both dictionaries
            microservices.update(response_data.get("microservice_mapping", {}))
            file_descriptions.update(response_data.get("file_descriptions", {}))

        except Exception as e:
            print(f"[ERROR] Failed to process {file['name']}: {e}")

    return microservices, file_descriptions

In [18]:
# EXECUTE PIPELINE
print("[INFO] Determining microservice splits...")
microservices, file_descriptions = determine_microservice_splits(repo_data)

microservices_path = os.path.join(output_dir, "microservice_splits.json")
file_descriptions_path = os.path.join(output_dir, "file_descriptions.json")

with open(microservices_path, "w") as outfile:
    json.dump(microservices, outfile, indent=4)

with open(file_descriptions_path, "w") as outfile:
    json.dump(file_descriptions, outfile, indent=4)

[INFO] Determining microservice splits...
