In [1]:
# using ollama
%pip install -U langchain-ollama

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import os
from langchain_community.chat_models import ChatOllama
from langchain.schema import SystemMessage, HumanMessage

In [17]:
problem_statement = """Metro station wants to establish a TicketDistributor machine that issues tickets for
passengers travelling on metro rails. Travellers have options of selecting a ticket for a single
trip, round trips or multiple trips. They can also issue a metro pass for regular passengers or
a time card for a day, a week or a month according to their requirements. The discounts on
tickets will be provided to frequent travelling passengers. The machine is also supposed to
read the metro pass and time cards issued by the metro counters or machine. The ticket rates
differ based on whether the traveller is a child or an adult. The machine is also required to
recognize original as well as fake currency notes. The typical transaction consists of a user
using the display interface to select the type and quantity of tickets and then choosing a
payment method of either cash, credit/debit card or smartcard. The tickets are printed and
dispensed to the user. Also, the messaging facilities after every transaction are required on
the registered number. The system can also be operated comfortably by a touch-screen. A
large number of heavy components are to be used. We do not want our system to slow down,
and also the usability of the machine.
The TicketDistributor must be able to handle several exceptions, such as aborting the
transaction for incomplete transactions, the insufficient amount given by the travellers to the
machine, money return in case of an aborted transaction, change return after a successful
transaction, showing insufficient balance in the card, updated information printed on the
tickets e.g. departure time, date, time, price, valid from, valid till, validity duration, ticket
issued from and destination station. In case of exceptions, an error message is to be displayed.
We do not want user feedback after every development stage but after every two stages to
save time. The machine is required to work in a heavy load environment such that in the
morning and evening time on weekdays, and weekends performance and efficiency would
not be affected."""

In [24]:
import ollama
import json
import re

def identify_classes(problem_statement):
    """
    Identifies potential class names from a given problem statement using Ollama LLaMA 3.2.
    """
    prompt = f"""
    You are an expert software analyst. Your task is to extract potential class names from a given problem statement. 
    Follow this step-by-step approach:

    1. Identify the key **nouns** representing domain concepts.
    2. If a noun represents a role or system entity, classify it as a **class**.
    3. Ignore verbs and adjectives.
    4. **Output only valid JSON** in the following format without any explanation:
       {{
           "classes": ["Class1", "Class2", "Class3"]
       }}

    **Problem Statement:**
    {problem_statement}

    Now extract the class names based on the above rules.  
    **Do NOT include any explanation — output JSON only.**
    """
      
    # Generate response using Ollama LLaMA 3.2
    response = ollama.chat(
        model="llama3.2",
        messages=[{"role": "user", "content": prompt}],
        options={
            "temperature": 0,
            "top_p": 1,
            "top_k": 1
        }
    )
    
    # Debugging: Print raw response
    response_text = response['message']['content'].strip()
    print("Raw Response from LLaMA 3.2:\n", response_text)

    # ✅ Extract JSON part only using regex
    json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
    if json_match:
        cleaned_text = json_match.group(0).strip()
    else:
        print("Error: No valid JSON found in response.")
        return []

    # Additional Debugging: Print cleaned text before parsing
    print("Cleaned JSON:\n", repr(cleaned_text))

    # ✅ Check if cleaned_text is empty
    if not cleaned_text:
        print("Error: Cleaned JSON is empty. Cannot parse.")
        return []

    # ✅ Parse JSON safely
    try:
        output = json.loads(cleaned_text)  # Convert string to JSON
        return output.get("classes", [])  # Ensure output is a list
    except json.JSONDecodeError as e:
        print("JSON parsing error:", str(e))
        return []

identified_classes = identify_classes(problem_statement)
print("Identified Classes:", identified_classes)


Raw Response from LLaMA 3.2:
 {
  "classes": [
    "MetroStation",
    "TicketDistributor",
    "Traveller",
    "Passenger",
    "Child",
    "Adult",
    "MetroPass",
    "TimeCard",
    "Transaction",
    "User",
    "Machine",
    "CurrencyNote"
  ]
}
Cleaned JSON:
 '{\n  "classes": [\n    "MetroStation",\n    "TicketDistributor",\n    "Traveller",\n    "Passenger",\n    "Child",\n    "Adult",\n    "MetroPass",\n    "TimeCard",\n    "Transaction",\n    "User",\n    "Machine",\n    "CurrencyNote"\n  ]\n}'
Identified Classes: ['MetroStation', 'TicketDistributor', 'Traveller', 'Passenger', 'Child', 'Adult', 'MetroPass', 'TimeCard', 'Transaction', 'User', 'Machine', 'CurrencyNote']


In [21]:
import ollama
import json
import re

def identify_attributes(problem_statement, classes):
    """
    Identifies attributes for each class extracted from the problem statement using Ollama LLaMA 3.2.
    Ensures deterministic output and structured JSON format.
    """
    prompt = f"""
    You are an expert software analyst. Your task is to extract **attributes** for each identified class from the given problem statement.

    **Rules to Identify Attributes:**
    1. Attributes describe **properties or characteristics** of a class.
    2. Ignore verbs, complex entities, and relationships.
    3. Common attributes include **names, IDs, types, and statuses**.
    4. **Output ONLY valid JSON** in the following format (without any explanation):
       {{
           "attributes": {{
               "Class1": ["attribute1", "attribute2"],
               "Class2": ["attribute1", "attribute2"]
           }}
       }}

    **Classes Identified:**
    {classes}

    **Problem Statement:**
    {problem_statement}

    Now extract the attributes for each class.
    **Do NOT include any explanation — output JSON only.**
    """

    # Generate response using Ollama LLaMA 3.2
    response = ollama.chat(
        model="llama3.2",
        messages=[{"role": "user", "content": prompt}],
        options={
            "temperature": 0,
            "top_p": 1,
            "top_k": 1
        }
    )
    
    # ✅ Debugging: Print raw response
    response_text = response['message']['content'].strip()
    print("Raw Response from LLaMA 3.2:\n", response_text)

    # ✅ Extract JSON part only using regex
    json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
    if json_match:
        cleaned_text = json_match.group(0).strip()
    else:
        print("Error: No valid JSON found in response.")
        return {}

    # ✅ Additional Debugging: Print cleaned text before parsing
    print("Cleaned JSON:\n", repr(cleaned_text))

    # ✅ Check if cleaned_text is empty
    if not cleaned_text:
        print("Error: Cleaned JSON is empty. Cannot parse.")
        return {}

    # ✅ Parse JSON safely
    try:
        output = json.loads(cleaned_text)  # Convert string to JSON
        return output.get("attributes", {})  # Extract attributes dictionary
    except json.JSONDecodeError as e:
        print("JSON parsing error:", str(e))
        return {}

identified_attributes = identify_attributes(problem_statement, identified_classes)

print("\nExtracted Attributes:\n", identified_attributes)


Raw Response from LLaMA 3.2:
 {
  "attributes": {
    "MetroStation": ["ID", "name"],
    "TicketDistributor": ["ID", "type", "status", "capacity"],
    "Traveller": ["ID", "age", "type"],
    "Passenger": ["ID", "type"],
    "Child": ["ID", "type"],
    "Adult": ["ID", "type"],
    "MetroPass": ["ID", "type", "duration"],
    "TimeCard": ["ID", "type", "duration"],
    "Transaction": ["ID", "status", "amount"],
    "User": ["ID", "name", "registered_number"],
    "Machine": ["ID", "type", "load_environment"],
    "DisplayInterface": ["ID", "type"],
    "PaymentMethod": ["ID", "type"],
    "Smartcard": ["ID", "type"],
    "CurrencyNote": ["ID", "type"],
    "TouchScreen": ["ID", "type"]
  }
}
Cleaned JSON:
 '{\n  "attributes": {\n    "MetroStation": ["ID", "name"],\n    "TicketDistributor": ["ID", "type", "status", "capacity"],\n    "Traveller": ["ID", "age", "type"],\n    "Passenger": ["ID", "type"],\n    "Child": ["ID", "type"],\n    "Adult": ["ID", "type"],\n    "MetroPass": ["ID", 

In [22]:
import ollama
import json
import re

def identify_relationships(problem_statement, classes, attributes):
    """
    Identifies relationships between extracted classes using Ollama LLaMA 3.2.
    Extracts Association, Generalization (Inheritance), Aggregation, and Composition.
    """
    prompt = f"""
    You are an expert software analyst. Your task is to identify **all types of relationships** between the given classes based on the problem statement.

    **Types of Relationships:**
    1️⃣ **Association** - One class interacts with another. Example: "User borrows Book".
    2️⃣ **Generalization (Inheritance)** - One class is a specialized form of another. Example: "Admin is a subclass of User".
    3️⃣ **Aggregation** - One class is made up of another, but they have independent lifecycles. Example: "Library has Books, but Books exist independently".
    4️⃣ **Composition** - A stronger form of Aggregation, where the part **cannot exist** without the whole. Example: "House has Rooms, Rooms cannot exist without a House".

    **Rules:**
    - Identify the **verbs** and **context clues** in the problem statement to find relationships.
    - Classify the relationship into one of the four types above.
    - **Output ONLY valid JSON** in the following format (without any explanation):
      {{
          "relationships": [
              {{"source": "ClassA", "target": "ClassB", "type": "Generalization", "description": "ClassA is a subclass of ClassB"}},
              {{"source": "ClassC", "target": "ClassD", "type": "Aggregation", "description": "ClassC contains ClassD but ClassD can exist independently"}},
              {{"source": "ClassE", "target": "ClassF", "type": "Composition", "description": "ClassE owns ClassF and ClassF cannot exist without ClassE"}}
          ]
      }}

    **Classes Identified:**
    {classes}

    **Attributes Identified:**
    {attributes}

    **Problem Statement:**
    {problem_statement}

    Now extract **all types of relationships** between the classes.
    **Do NOT include any explanation — output JSON only.**
    """

    # Generate response using Ollama LLaMA 3.2
    response = ollama.chat(
        model="llama3.2",
        messages=[{"role": "user", "content": prompt}],
        options={
            "temperature": 0,
            "top_p": 1,
            "top_k": 1
        }
    )

    # ✅ Debugging: Print raw response
    response_text = response['message']['content'].strip()
    print("Raw Response from LLaMA 3.2:\n", response_text)

    # ✅ Extract JSON part only using regex
    json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
    if json_match:
        cleaned_text = json_match.group(0).strip()
    else:
        print("Error: No valid JSON found in response.")
        return {}

    # ✅ Additional Debugging: Print cleaned text before parsing
    print("Cleaned JSON:\n", repr(cleaned_text))

    # ✅ Check if cleaned_text is empty
    if not cleaned_text:
        print("Error: Cleaned JSON is empty. Cannot parse.")
        return {}

    # ✅ Parse JSON safely
    try:
        output = json.loads(cleaned_text)  # Convert string to JSON
        return output.get("relationships", [])  # Extract relationships list
    except json.JSONDecodeError as e:
        print("JSON parsing error:", str(e))
        return {}


identified_relationships = identify_relationships(problem_statement, identified_classes, identified_attributes)

print("\nRelationships Identified:\n", identified_relationships)


Raw Response from LLaMA 3.2:
 {
  "relationships": [
    {"source": "MetroStation", "target": "TicketDistributor", "type": "Aggregation", "description": "Metro station contains TicketDistributor"},
    {"source": "Traveller", "target": "Passenger", "type": "Generalization", "description": "Traveller is a subclass of Passenger"},
    {"source": "Child", "target": "Adult", "type": "Generalization", "description": "Child is a subclass of Adult"},
    {"source": "TicketDistributor", "target": "MetroPass", "type": "Aggregation", "description": "TicketDistributor contains MetroPass"},
    {"source": "TicketDistributor", "target": "TimeCard", "type": "Aggregation", "description": "TicketDistributor contains TimeCard"},
    {"source": "User", "target": "Machine", "type": "Aggregation", "description": "User contains Machine"},
    {"source": "Machine", "target": "DisplayInterface", "type": "Aggregation", "description": "Machine contains DisplayInterface"},
    {"source": "PaymentMethod", "targe

In [27]:
import os
import json

def generate_plantuml(classes, attributes, relationships, output_file="class_diagram.puml"):
    """
    Generates a PlantUML class diagram from identified classes, attributes, and relationships.
    """
    plantuml_code = "@startuml\n\n"

    # ✅ Define Classes and Attributes
    for cls in classes:
        plantuml_code += f"class {cls} {{\n"
        if cls in attributes:
            for attr in attributes[cls]:
                plantuml_code += f"  {attr}\n"
        plantuml_code += "}\n\n"

    # ✅ Define Relationships (Simplified Labels)
    for rel in relationships:
        source = rel["source"]
        target = rel["target"]
        rel_type = rel["type"]

        # Simplified behavior labels
        if rel_type == "Association":
            plantuml_code += f"{source} --> {target} : uses\n"
        elif rel_type == "Generalization":
            plantuml_code += f"{source} --|> {target} : inherits\n"
        elif rel_type == "Aggregation":
            plantuml_code += f"{source} o-- {target} : has\n"
        elif rel_type == "Composition":
            plantuml_code += f"{source} *-- {target} : owns\n"

    plantuml_code += "\n@enduml"

    # ✅ Save to file
    with open(output_file, "w") as file:
        file.write(plantuml_code)

    print(f"PlantUML file '{output_file}' generated successfully.")


# ✅ Debugging Check
print("Classes Extracted:", identified_classes)
print("Attributes Extracted:", identified_attributes)
print("Relationships Extracted:", identified_relationships)

# ✅ Generate UML
generate_plantuml(identified_classes, identified_attributes, identified_relationships)

# 🖥️ To render, use: `plantuml class_diagram.puml` in the terminal


Classes Extracted: ['MetroStation', 'TicketDistributor', 'Traveller', 'Passenger', 'Child', 'Adult', 'MetroPass', 'TimeCard', 'Transaction', 'User', 'Machine', 'CurrencyNote']
Attributes Extracted: {'MetroStation': ['ID', 'name'], 'TicketDistributor': ['ID', 'type', 'status', 'capacity'], 'Traveller': ['ID', 'age', 'type'], 'Passenger': ['ID', 'type'], 'Child': ['ID', 'type'], 'Adult': ['ID', 'type'], 'MetroPass': ['ID', 'type', 'duration'], 'TimeCard': ['ID', 'type', 'duration'], 'Transaction': ['ID', 'status', 'amount'], 'User': ['ID', 'name', 'registered_number'], 'Machine': ['ID', 'type', 'load_environment'], 'DisplayInterface': ['ID', 'type'], 'PaymentMethod': ['ID', 'type'], 'Smartcard': ['ID', 'type'], 'CurrencyNote': ['ID', 'type'], 'TouchScreen': ['ID', 'type']}
Relationships Extracted: [{'source': 'MetroStation', 'target': 'TicketDistributor', 'type': 'Association', 'description': 'issues'}, {'source': 'Traveller', 'target': 'Passenger', 'type': 'Aggregation', 'description': 

### Debugging

In [26]:
import ollama
import json
import re

def identify_relationships(problem_statement, classes, attributes):
    """
    Identifies relationships between extracted classes using Ollama LLaMA 3.2.
    Extracts Association, Generalization (Inheritance), Aggregation, and Composition.
    """
    prompt = f"""
    You are an expert software analyst. Your task is to identify **all types of relationships** between the given classes based on the problem statement.

    ### **Types of Relationships:**
    1️⃣ **Association** - One class interacts with another. Example: "User borrows Book".
    2️⃣ **Generalization (Inheritance)** - One class is a specialized form of another. Example: "Admin is a subclass of User".
    3️⃣ **Aggregation** - One class is made up of another, but they have independent lifecycles. Example: "Library has Books, but Books exist independently".
    4️⃣ **Composition** - A stronger form of Aggregation, where the part **cannot exist** without the whole. Example: "House has Rooms, Rooms cannot exist without a House".

    ### **Rules:**
    ✅ Identify the **verbs** and **context clues** in the problem statement to find relationships.  
    ✅ Classify the relationship into one of the four types above.  
    ✅ **Use clear operation-style descriptions** instead of full sentences.  
    ✅ Try to cover **all identified classes** and **attributes** in the relationships.  

    ### **Follow this Step-by-Step Approach:**
    1. Carefully analyze the problem statement to extract interaction patterns between classes.
    2. Identify the type of relationship (Association, Generalization, Aggregation, or Composition).
    3. Formulate a short, clear description using an **operation-style** phrase like "issues", "contains", "inherits", etc.
    4. Ensure that every identified class is considered for possible relationships.

    ### **Output Format:**
    Output **ONLY valid JSON** without any explanation:
    ```json
    {{
        "relationships": [
            {{"source": "ClassA", "target": "ClassB", "type": "Association", "description": "issues"}},
            {{"source": "ClassC", "target": "ClassD", "type": "Aggregation", "description": "contains"}},
            {{"source": "ClassE", "target": "ClassF", "type": "Generalization", "description": "inherits"}}
        ]
    }}
    ```

    ### **Classes Identified:**
    {classes}

    ### **Attributes Identified:**
    {attributes}

    ### **Problem Statement:**
    {problem_statement}

    Now extract **all types of relationships** between the classes.  
    **Do NOT include any explanation — output JSON only.**
    """

    # Generate response using Ollama LLaMA 3.2
    response = ollama.chat(
        model="llama3.2",
        messages=[{"role": "user", "content": prompt}],
        options={
            "temperature": 0,
            "top_p": 1,
            "top_k": 1
        }
    )

    # ✅ Debugging: Print raw response
    response_text = response['message']['content'].strip()
    print("Raw Response from LLaMA 3.2:\n", response_text)

    # ✅ Extract JSON part only using regex
    json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
    if json_match:
        cleaned_text = json_match.group(0).strip()
    else:
        print("Error: No valid JSON found in response.")
        return {}

    # ✅ Additional Debugging: Print cleaned text before parsing
    print("Cleaned JSON:\n", repr(cleaned_text))

    # ✅ Check if cleaned_text is empty
    if not cleaned_text:
        print("Error: Cleaned JSON is empty. Cannot parse.")
        return {}

    # ✅ Parse JSON safely
    try:
        output = json.loads(cleaned_text)  # Convert string to JSON
        return output.get("relationships", [])  # Extract relationships list
    except json.JSONDecodeError as e:
        print("JSON parsing error:", str(e))
        return {}


# Example usage
identified_relationships = identify_relationships(problem_statement, identified_classes, identified_attributes)

print("\nRelationships Identified:\n", identified_relationships)


Raw Response from LLaMA 3.2:
 {
  "relationships": [
    {"source": "MetroStation", "target": "TicketDistributor", "type": "Association", "description": "issues"},
    {"source": "Traveller", "target": "Passenger", "type": "Aggregation", "description": "contains"},
    {"source": "User", "target": "Transaction", "type": "Association", "description": "issues"},
    {"source": "Machine", "target": "DisplayInterface", "type": "Aggregation", "description": "contains"},
    {"source": "PaymentMethod", "target": "Smartcard", "type": "Composition", "description": "is-a"},
    {"source": "MetroPass", "target": "TimeCard", "type": "Aggregation", "description": "contains"},
    {"source": "TicketDistributor", "target": "User", "type": "Association", "description": "issues"},
    {"source": "Traveller", "target": "Child", "type": "Composition", "description": "is-a"},
    {"source": "Traveller", "target": "Adult", "type": "Composition", "description": "is-a"},
    {"source": "MetroStation", "targ