In [None]:
!pip install -U langchain-ollama

In [None]:
import pandas as pd

def generate_llm_prompt(scenario_id, scenarios_df, threats_df, vulnerabilities_df):
    """
    Generates a formatted LLM prompt based on the given scenario, threats, and vulnerabilities.
    
    Args:
        scenario_id (str): The scenario ID to pull the correct description.
        scenarios_df (pd.DataFrame): DataFrame containing scenarios data.
        threats_df (pd.DataFrame): DataFrame containing threat data.
        vulnerabilities_df (pd.DataFrame): DataFrame containing vulnerability data.
        
    Returns:
        str: The LLM prompt formatted as a string.
    """
    # Get the specific scenario description
    scenario_row = scenarios_df[scenarios_df['Scenario ID'] == scenario_id].iloc[0]
    scenario_description = scenario_row['User']  # You can use 'User' or another column based on need
    
    # Get the risk and vulnerability descriptions
    risk_description = scenario_row['Assistant - Risk description'] if 'Assistant - Risk description' in scenario_row else "No risk description available."
    vulnerability_description = scenario_row['Assistant - Vulnerability description'] if 'Assistant - Vulnerability description' in scenario_row else "No vulnerability description available."
    
    # Format the Threats section
    threats_text = "\n".join([
        f"THREAT ID: {row['THREAT ID']}\nTHREAT: {row['THREAT']}\nDESCRIPTION: {row['DESCRIPTION']}"
        for _, row in threats_df.iterrows()
    ])
    #print(threats_df)
    
    # Format the Vulnerabilities section
    vulnerabilities_text = "\n".join([
        f"VULNERABILITY ID: {row['ID']}\nVULNERABILITY: {row['VULNERABILITY']}\nDESCRIPTION: {row['DESCRIPTION']}"
        for _, row in vulnerabilities_df.iterrows()
    ])
    #print(vulnerabilities_text)
    # Format the complete LLM prompt
    prompt = f"""


    Scenario: "{scenario_description}"

    
    Beginning of list of Threats
    Threats:
    {threats_text}

    End list of Threats


    ROLE: You are an assistant in security risk analysis. 

    For each given scenario, determine which **threats** and **vulnerabilities** are present. 
    You have access to two lists: one for **threats** and another for **vulnerabilities**. Use these lists to identify the relevant threats and vulnerabilities for each scenario.

    ### **Instructions:**
    1. For each scenario provided, identify which threats and vulnerabilities from the lists match the scenario.
    2. If a **threat** or **vulnerability** applies, include its description, explaining why it is relevant to the scenario.
    3. Provide a **JSON** output with the following structure:
    - **Threats**: A list of threats that apply to the scenario.
    - **Vulnerabilities**: A list of vulnerabilities that apply to the scenario.

    Each item in the list should contain:
    - **ThreatID** / **VulnID**: The identifier for the threat/vulnerability.
    - **Threat** / **Vulnerability**: The name of the threat/vulnerability.
    - **Description**: A detailed explanation of the threat/vulnerability.

    **If no threats or vulnerabilities apply**, respond with an empty array for that category.
    "Generate a JSON object with the following information."
    ### **Format of the Response:**
        {{
            "ScenarioID": "[Scenario ID]",
            "Threats": [
                {{
                    "ThreatID": "[Threat ID]",
                    "Threat": "[Threat Name]",
                    "Description": "[Threat Description]"
                }}
            ]
            
        }}
        """
    
    return prompt


def generate_prompts_for_all_scenarios(scenarios_df, threats_df, vulnerabilities_df):
    """
    Generate LLM prompts for all scenarios in the CSV.

    Args:
        scenarios_df (pd.DataFrame): DataFrame containing scenarios.
        threats_df (pd.DataFrame): DataFrame containing threats.
        vulnerabilities_df (pd.DataFrame): DataFrame containing vulnerabilities.

    Returns:
        list: A list of prompts for each scenario.
    """
    prompts = []
    for scenario_id in scenarios_df['Scenario ID']:
        prompt = generate_llm_prompt(scenario_id, scenarios_df, threats_df, vulnerabilities_df)
        prompts.append(prompt)
    
    return prompts


# Example usage:

# Load the CSV files into DataFrames with the correct delimiter (semicolon)
scenarios_df = pd.read_csv('scen1.csv', delimiter=';')
threats_df = pd.read_csv('threat.csv', delimiter=';')
vulnerabilities_df = pd.read_csv('vulnerability.csv', delimiter=';')

# Clean the column names (strip spaces)
scenarios_df.columns = scenarios_df.columns.str.strip()
threats_df.columns = threats_df.columns.str.strip()
vulnerabilities_df.columns = vulnerabilities_df.columns.str.strip()

# Generate prompts for all scenarios
prompts = generate_prompts_for_all_scenarios(scenarios_df, threats_df, vulnerabilities_df)

# Print the first generated prompt as an example
print(prompts[0])  # Example: Print the first prompt


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
import csv



template = """Question: {question}
Answer: Lets think stpe by step"""

prompt = ChatPromptTemplate.from_template(template)
model = OllamaLLM(model="marco-o1")
chain = prompt | model
answer = (chain.invoke({"question": prompts[0]}))
print(answer)

csv_filename = "answers.csv"

with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow([answer])  # Writing question-answer pair

print(f"Answer saved to {csv_filename}")