# Analytic Plan Generator



## Background



## Environment Setup

Ensure you have imported a Gemini API key from AI Studio. You can do this directly in the Secrets tab on the left.

In [1]:
!pip install -U -q "google"
!pip install -U -q "google.genai"
!pip install mitreattack-python

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.3/45.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.4/164.4 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mitreattack-python
  Downloading mitreattack_python-4.0.0-py3-none-any.whl.metadata (6.9 kB)
Collecting deepdiff (from mitreattack-python)
  Downloading deepdiff-8.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting drawsvg>=2.0.0 (from mitreattack-python)
  Downloading drawsvg-2.4.0-py3-none-any.whl.metadata (19 kB)
Collecting loguru (from mitreattack-python)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting stix2 (from mitreattack-python)
  Downloading stix2-3.0.1-py2.py3-none-any.whl.metadata (10 kB)
Collecting xlsxwriter (from mitreattack-python)
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Collecting orderly-set<6,>=5.3.0 (from deepdiff->mitreattack-python)
  Downloading orderly_set-5.4.0-p

In [2]:
import os
from google.colab import userdata
from google.colab import drive
import requests
from mitreattack.stix20 import MitreAttackData
import json
import base64
from google import genai
from google.genai import types
import datetime
import time

In [3]:
# Rate limits: https://ai.google.dev/gemini-api/docs/rate-limits
# Pricing: https://ai.google.dev/gemini-api/docs/pricing
# Usage: https://console.cloud.google.com/apis/api/generativelanguage.googleapis.com/metrics?project=gen-lang-client-0497172401
os.environ["GEMINI_API_KEY"] = userdata.get("GOOGLE_API_KEY")

In [4]:
def log(message, end="\n"):
  """
  Logs a message to the console, prepended with the current timestamp
  in ISO 8601 format.

  Args:
    message (str): The string message to log.
  """
  # Get the current date and time
  current_time = datetime.datetime.now()

  # Format the timestamp in ISO 8601 format
  # The isoformat() method handles this directly.
  # It typically includes microseconds. You can specify a separator and precision if needed.
  # e.g., current_time.isoformat(sep='T', timespec='seconds') for 'YYYY-MM-DDTHH:MM:SS'
  timestamp = current_time.isoformat()

  # Construct the final log string using an f-string for clean formatting
  log_string = f"[{timestamp}] {message}"

  # Print the log string to the console
  print(log_string, end=end, flush = True if end == "\n" else False)

In [5]:
# Mount Google Drive and move into the Google AI Studio folder
drive.mount("/content/drive")
os.chdir("/content/drive/MyDrive/Google AI Studio/techniques")

Mounted at /content/drive


## ATT&CK Matrix Retrieval

This section first retrieves the latest MITRE ATT&CK Matrix for Enterprise from MITRE's GitHub repository. Then, it builds a data structure that stores each technique with it's parent tactic, the technique's description, and detection suggestions as a nested dictionary.

In [6]:
if not os.path.exists("enterprise-attack.json"):
    attack = requests.get("https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json").text
    attack_json = json.loads(attack)

    # Writing to sample.json
    with open("enterprise-attack.json", "w") as outfile:
        outfile.write(json.dumps(attack_json, indent=4))

mad = MitreAttackData("enterprise-attack.json")

In [7]:
# Map tactic name → ATT&CK ID
tactic_name_to_id = {
    tactic.name.lower(): tactic.external_references[0]["external_id"]
    for tactic in mad.get_tactics()
}

# Helper to extract ATT&CK ID
def attack_id(stix_obj):
    for ref in stix_obj.get("external_references", []):
        if ref.get("source_name") == "mitre-attack":
            return ref.get("external_id")
    return None

# Final output
technique_dict = {}

# Combine techniques and sub-techniques
all_techniques = mad.get_techniques(remove_revoked_deprecated=True) + \
                 mad.get_subtechniques(remove_revoked_deprecated=True)

for tech in all_techniques:
    tid = attack_id(tech)                         # T1055.011
    name = tech.get("name", "").strip().replace("/", "-")           # Extra Window Memory Injection
    full_key = f"{tid} - {name}"                  # T1055.011 - Extra Window Memory Injection

    description = tech.get("description", "").strip()
    detection = tech.get("x_mitre_detection", "").strip()

    # Get tactic(s)
    tactic_names = []
    for phase in tech.get("kill_chain_phases", []):
        if phase["kill_chain_name"] == "mitre-attack":
            phase_name = phase["phase_name"].lower().replace("-", " ")
            tactic_id = tactic_name_to_id.get(phase_name)
            if tactic_id:
                tactic_names.append(f"{tactic_id} - {phase['phase_name'].title()}")

    tactic_str = ", ".join(sorted(set(tactic_names)))

    technique_dict[full_key] = {
        "tactic": tactic_str,
        "description": description,
        "detection": detection,
    }

In [8]:
# technique_dict["T1133 - External Remote Services"]

## Build Prompts

This section generates a prompt for each technique in the MITRE ATT&CK matrix. It first defines a base prompt that provides context to the model, and then generates a specific prompt crafted for each individual technique.

In [9]:
base_prompt = """\
Generate an Analytic Scheme of Maneuver (ASOM) based on the following definitions of its components. For each component, provide specific examples relevant to a scenario where we suspect a sophisticated external threat is attempting to gain unauthorized access to our organization's sensitive data or negatively impact the network.

ASOM Component Definitions:

1.  Information Requirement (IR): These identify the information about the enemy or the terrain that the commander considers most important. For example, "Has the adversary gained initial access to the network?" These should be tagged with MITRE ATT&CK tactic numbers; for example, (TA0001 - Initial Access). A complete PIR looks like this: "Has the adversary gained initial access to the network? (TA0001 - Initial Access)".

2.  Indicators: These are positive or negative evidence of threat activity pertaining to one or more information requirements. They are observable clues related to a specific information requirement. For the IR "Has the adversary gained initial access to the network? (TA0001 - Initial Access)", the indicator should be a technique within that tactic; for example, "T1190 - Exploit Public Facing Application".

3.  Evidence: This is the concrete information that supports or refutes an indicator. It provides the "proof" and can vary in complexity. For the IR "Has the adversary gained initial access to the network?" and the indicator "T1190 - Exploit Public Facing Application" beneath it, appropriate evidence could be:
    * "Anomalous login attempts from unusual geographic locations."
    * "Network traffic involving known malicious command and control (C2) infrastructure."

4.  Data: This describes the precise data necessary to identify evidence. Specificity here is key (e.g., Zeek Conn logs, Sysmon event ID 4624, Active Directory security logs). For the evidence, focus your ASOM on the following data sources: network logs, specifically Zeek logs; host logs, specifically Windows Event IDs. Write only the data name. For example, Windows Event ID 4688; Zeek conn.log

5. Data Platform: This describes the type of system from which the data can be collected. It should be one of the following options: "Endpoints", "Servers", "Network devices".

6.  Named Areas of Interest (NAIs): These are areas where data that will satisfy a specific information requirement can be collected. For all NAIs, use a dummy value of "Insert site-specific NAI here".

7.  Actions: These are high-level instructions that guide the analysts' search for evidence. Focus mostly on simple detections, but also look for opportunities to incorporate basic statistical methods data science techniques here, such as descriptive statistics, Inter‑quartile range & box‑plots, entropy measures, correlation analysis, linear regression, time series analysis, and other, similar methods. For the evidence above, appropriate actions could be:
    * "Identify all source IP addresses for failed and successful login events. Geolocate logain sources, then identify rare source countries by low frequency of occurence or percentiles to flag anomalous login events from unusual geographic locations."
    * "Inner join a list of source and destination IP addresses from public-facing devices with a list of known malicious command and control (C2) servers from a cyber threat intelligence provider updated within at least 30 days. Investigate all sessions where connections involved C2 servers."

Based on these definitions, please generate a detailed ASOM in the JSON format. The keys in the JSON object should correspond to the following ASOM components in this order: IR, Indicators, Evidence, Data Platform, Data, NAIs, Actions. Note that the key for IR should be replaced by the actual IR based on the description above. Also note that the key for evidence should be replaced with the actual form of evidence based on the description above. Here is an example template:

{
    "(Insert IR here)": {
        "Indicators": {
            "(Insert form of evidence here)": {
                "Data": "",
                "Data Platform": "",
                "NAI": "",
                "Action": ""
            },
            "(Insert form of evidence here)": {
                "Data": "",
                "Data Platform": "",
                "NAI": "",
                "Action": ""
            }
        }
    }
}

For each MITRE technique below, generate one PIR (a general question tagged with the parent tactic's T-code, in the format "Has the adversary gained initial access to the network? (TA0001 - Initial Access)") for each tactic. For example, if there is one parent tactic, generate one PIR; if there are two parent tactics, generate two PIRs. For each PIR, generate two indicators (the MITRE technique provided). For each indicator, generate one form of evidence. Each form of evidence should have Data, NAI, and Actions. I have also provided a helpful description of the technique labeled "Description:", and a potential idea for detecting it, labeled "Detection:"."""

In [10]:
prompt_library = {}

for technique in technique_dict:
    prompt_library[technique] = base_prompt + f"\n\nTechnique: {technique}\n\nTactic(s): {technique_dict[technique]['tactic']}\n\nDescription: {technique_dict[technique]['description']}\n\nDetection: {technique_dict[technique]['detection']}"

In [11]:
# print(prompt_library["T1190 - Exploit Public-Facing Application"])

## Generate the ASOM

This section generates one PIR, and two indicators (with evidence, data, data platform, NAI, and action) for each technique in the MITRE ATT&CK matrix for Enterprise.

In [12]:
def generate(prompt, target_model):
    client = genai.Client(
        api_key=os.environ.get("GEMINI_API_KEY"),
    )

    model = target_model
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text="""Generate an Analytic Scheme of Maneuver (ASOM) based on the following definitions of its components. For each component, provide specific examples relevant to a scenario where we suspect a sophisticated external threat is attempting to gain unauthorized access to our organization's sensitive data or negatively impact the network.

ASOM Component Definitions:

1.  Information Requirement (IR): These identify the information about the enemy or the terrain that the commander considers most important. For example, \"Has the adversary gained initial access to the network?\" These should be tagged with MITRE ATT&CK tactic numbers; for example, (TA0001 - Initial Access). A complete PIR looks like this: \"Has the adversary gained initial access to the network? (TA0001 - Initial Access)\".

2.  Indicators: These are positive or negative evidence of threat activity pertaining to one or more information requirements. They are observable clues related to a specific information requirement. For the IR \"Has the adversary gained initial access to the network? (TA0001 - Initial Access)\", the indicator should be a technique within that tactic; for example, \"T1190 - Exploit Public Facing Application\".

3.  Evidence: This is the concrete information that supports or refutes an indicator. It provides the \"proof\" and can vary in complexity. For the IR \"Has the adversary gained initial access to the network?\" and the indicator \"T1190 - Exploit Public Facing Application\" beneath it, appropriate evidence could be:
    * \"Anomalous login attempts from unusual geographic locations.\"
    * \"Network traffic involving known malicious command and control (C2) infrastructure.\"

4.  Data: This describes the precise data necessary to identify evidence. Specificity here is key (e.g., Zeek Conn logs, Sysmon event ID 4624, Active Directory security logs). For the evidence, focus your ASOM on the following data sources: network logs, specifically Zeek logs; host logs, specifically Windows Event IDs. Write only the data name. For example, Windows Event ID 4688; Zeek conn.log

5. Data Platform: This describes the type of system from which the data can be collected. It should be one of the following options: \"Endpoints\", \"Servers\", \"Network devices\".

6.  Named Areas of Interest (NAIs): These are areas where data that will satisfy a specific information requirement can be collected. For all NAIs, use a dummy value of \"Insert site-specific NAI here\".

7.  Actions: These are high-level instructions that guide the analysts' search for evidence. Focus mostly on simple detections, but also look for opportunities to incorporate basic statistical methods data science techniques here, such as descriptive statistics, Inter‑quartile range & box‑plots, entropy measures, correlation analysis, linear regression, time series analysis, and other, similar methods. For the evidence above, appropriate actions could be:
    * \"Identify all source IP addresses for failed and successful login events. Geolocate logain sources, then identify rare source countries by low frequency of occurence or percentiles to flag anomalous login events from unusual geographic locations.\"
    * \"Inner join a list of source and destination IP addresses from public-facing devices with a list of known malicious command and control (C2) servers from a cyber threat intelligence provider updated within at least 30 days. Investigate all sessions where connections involved C2 servers.\"

Based on these definitions, please generate a detailed ASOM in the JSON format. The keys in the JSON object should correspond to the following ASOM components in this order: IR, Indicators, Evidence, Data Platform, Data, NAIs, Actions. Note that the key for IR should be replaced by the actual IR based on the description above. Also note that the key for evidence should be replaced with the actual form of evidence based on the description above. Here is an example template:

{
    \"(Insert IR here)\": {
        \"Indicators\": {
            \"(Insert form of evidence here)\": {
                \"Data\": \"\",
                \"Data Platform\": \"\",
                \"NAI\": \"\",
                \"Action\": \"\"
            },
            \"(Insert form of evidence here)\": {
                \"Data\": \"\",
                \"Data Platform\": \"\",
                \"NAI\": \"\",
                \"Action\": \"\"
            }
        }
    }
}

For each MITRE technique below, generate one PIR (a general question tagged with the parent tactic's T-code, in the format \"Has the adversary gained initial access to the network? (TA0001 - Initial Access)\") for each tactic. For example, if there is one parent tactic, generate one PIR; if there are two parent tactics, generate two PIRs. For each PIR, generate two indicators (the MITRE technique provided). For each indicator, generate one form of evidence. Each form of evidence should have Data, NAI, and Actions. I have also provided a helpful description of the technique labeled \"Description:\", and a potential idea for detecting it, labeled \"Detection:\".

Technique: T1133 - External Remote Services

Tactic: TA0001 - Initial-Access, TA0003 - Persistence

Description: Adversaries may leverage external-facing remote services to initially access and/or persist within a network. Remote services such as VPNs, Citrix, and other access mechanisms allow users to connect to internal enterprise network resources from external locations. There are often remote service gateways that manage connections and credential authentication for these services. Services such as [Windows Remote Management](https://attack.mitre.org/techniques/T1021/006) and [VNC](https://attack.mitre.org/techniques/T1021/005) can also be used externally.(Citation: MacOS VNC software for Remote Desktop)

Access to [Valid Accounts](https://attack.mitre.org/techniques/T1078) to use the service is often a requirement, which could be obtained through credential pharming or by obtaining the credentials from users after compromising the enterprise network.(Citation: Volexity Virtual Private Keylogging) Access to remote services may be used as a redundant or persistent access mechanism during an operation.

Access may also be gained through an exposed service that doesn’t require authentication. In containerized environments, this may include an exposed Docker API, Kubernetes API server, kubelet, or web application such as the Kubernetes dashboard.(Citation: Trend Micro Exposed Docker Server)(Citation: Unit 42 Hildegard Malware)

Detection: Follow best practices for detecting adversary use of [Valid Accounts](https://attack.mitre.org/techniques/T1078) for authenticating to remote services. Collect authentication logs and analyze for unusual access patterns, windows of activity, and access outside of normal business hours.

When authentication is not required to access an exposed remote service, monitor for follow-on activities such as anomalous external use of the exposed API or application."""),
            ],
        ),
        types.Content(
            role="model",
            parts=[
                types.Part.from_text(text="""The user wants an Analytic Scheme of Maneuver (ASOM) in JSON format.
The ASOM should follow a specific structure: IR -> Indicators -> Evidence -> Data, Data Platform, NAI, Action.
The IR key should be the actual Information Requirement string.
The Evidence key should be the actual evidence string.
The scenario involves detecting a sophisticated external threat attempting unauthorized access or network impact.
I need to generate PIRs, Indicators, Evidence, Data, Data Platform, NAI, and Actions based on the provided technique T1133 - External Remote Services.

**Plan:**

1.  **Identify Tactics:** T1133 belongs to TA0001 (Initial Access) and TA0003 (Persistence).
2.  **Formulate PIRs:** Create one PIR for each tactic, using the general question format provided and the tactic T-code.
    *   PIR 1 (TA0001): \"Has the adversary gained initial access to the network via external remote services? (TA0001 - Initial Access)\"
    *   PIR 2 (TA0003): \"Is the adversary maintaining persistence using external remote services? (TA0003 - Persistence)\"
3.  **Define Indicators:** For each PIR, use the provided technique T1133 as the indicator. Since the user asked for *two* indicators per PIR, but only provided *one* technique (T1133), I will use T1133 twice for each PIR, but focus on different aspects or detection methods derived from the description and detection notes.
    *   PIR 1 -> Indicator 1: T1133 - External Remote Services (focus on login anomalies)
    *   PIR 1 -> Indicator 2: T1133 - External Remote Services (focus on exposed services)
    *   PIR 2 -> Indicator 1: T1133 - External Remote Services (focus on persistent unusual access)
    *   PIR 2 -> Indicator 2: T1133 - External Remote Services (focus on redundant access mechanisms)
4.  **Develop Evidence:** For each indicator, create a specific form of evidence based on the detection notes.
    *   PIR1/Indicator1 (T1133 - Login Anomalies): \"Anomalous login attempts to external remote services (VPN, Citrix, RDP) from unusual geographic locations or outside normal business hours.\"
    *   PIR1/Indicator2 (T1133 - Exposed Services): \"Anomalous external usage patterns targeting potentially exposed remote services (e.g., Docker API, Kubernetes API) that do not require authentication.\"
    *   PIR2/Indicator1 (T1133 - Persistent Unusual Access): \"Successful logins to external remote services exhibiting unusual access patterns (e.g., multiple failed logins followed by success, logins at odd hours consistently over time) matching known Valid Accounts.\"
    *   PIR2/Indicator2 (T1133 - Redundant Access): \"Connections via external remote services originating from IP addresses associated with other known suspicious activities or previously compromised accounts.\"
5.  **Specify Data Sources:** Identify the specific logs needed for each piece of evidence, focusing on Windows Event IDs and Zeek logs as requested.
    *   Evidence 1 (Login Anomalies): Windows Event ID 4624, Windows Event ID 4625, VPN Logs, Citrix Logs, Zeek conn.log
    *   Evidence 2 (Exposed Services): Zeek conn.log, Web Server Access Logs (if applicable for K8s dashboard etc.)
    *   Evidence 3 (Persistent Unusual Access): Windows Event ID 4624, Windows Event ID 4625, VPN Logs, Citrix Logs
    *   Evidence 4 (Redundant Access): Zeek conn.log, VPN Logs, Citrix Logs, Threat Intel Feeds
6.  **Define Data Platform:** Assign the appropriate platform (\"Endpoints\", \"Servers\", \"Network devices\").
    *   Evidence 1: Servers (for AD/Event Logs), Network devices (VPN/Citrix/Zeek)
    *   Evidence 2: Network devices (Zeek), Servers (Web server logs)
    *   Evidence 3: Servers (for AD/Event Logs), Network devices (VPN/Citrix)
    *   Evidence 4: Network devices (Zeek/VPN/Citrix), Servers (Threat Intel platform)
7.  **Set NAI:** Use the placeholder \"Insert site-specific NAI here\".
8.  **Create Actions:** Define actions to find the evidence, incorporating basic analysis and statistical methods where applicable.
    *   Evidence 1 Action: \"Collect remote service authentication logs (Windows Event ID 4624, 4625, VPN/Citrix). Identify source IP addresses for successful and failed logins. Geolocate source IPs. Calculate frequency distribution of source countries/regions and login times. Flag logins from low-frequency locations or occurring outside typical business hours (e.g., using IQR or percentile analysis on login times).\"
    *   Evidence 2 Action: \"Monitor Zeek conn.log for connections to known ports associated with exposed services (e.g., Docker API 2375/2376, K8s API 6443/443, kubelet 10250). Analyze connection volume, source IPs, and request patterns. Use time series analysis to detect sudden spikes or changes in traffic patterns from external IPs to these services.\"
    *   Evidence 3 Action: \"Correlate successful logins (Event ID 4624, VPN/Citrix logs) with preceding failed attempts (Event ID 4625). Analyze login timestamps for patterns inconsistent with normal user behavior (e.g., logins consistently occurring late at night or on weekends). Use descriptive statistics to establish baseline login behavior per user/group and identify deviations.\"
    *   Evidence 4 Action: \"Cross-reference source IP addresses from remote service logs (Zeek conn.log, VPN/Citrix logs) against a list of IP addresses identified in other security alerts or known compromised host lists. Inner join connection logs with Threat Intelligence feeds for known malicious IPs connecting via remote services.\"
9.  **Assemble JSON:** Construct the final JSON object following the specified structure and order. Use the PIRs as the top-level keys. Use the evidence strings as the keys under \"Indicators\"."""),
                types.Part.from_text(text="""[
  {
    \"Has the adversary gained initial access to the network via external remote services? (TA0001 - Initial Access)\": {
      \"Indicators\": {
        \"T1133 - External Remote Services\": {
          \"Anomalous login attempts to external remote services (VPN, Citrix, RDP) from unusual geographic locations or outside normal business hours.\": {
            \"Data\": \"Windows Event ID 4624, Windows Event ID 4625, Zeek conn.log\",
            \"Data Platform\": \"Servers, Network devices\",
            \"NAI\": \"Insert site-specific NAI here\",
            \"Action\": \"Collect remote service authentication logs (Windows Event ID 4624, 4625, VPN/Citrix). Identify source IP addresses for successful and failed logins. Geolocate source IPs. Calculate frequency distribution of source countries/regions and login times. Flag logins from low-frequency locations or occurring outside typical business hours (e.g., using IQR or percentile analysis on login times).\"
          },
          \"Anomalous external usage patterns targeting potentially exposed remote services (e.g., Docker API, Kubernetes API) that do not require authentication.\": {
            \"Data\": \"Zeek conn.log\",
            \"Data Platform\": \"Network devices\",
            \"NAI\": \"Insert site-specific NAI here\",
            \"Action\": \"Monitor Zeek conn.log for connections to known ports associated with exposed services (e.g., Docker API 2375/2376, K8s API 6443/443, kubelet 10250). Analyze connection volume, source IPs, and request patterns. Use time series analysis to detect sudden spikes or changes in traffic patterns from external IPs to these services.\"
          }
        }
      }
    }
  },
  {
    \"Is the adversary maintaining persistence using external remote services? (TA0003 - Persistence)\": {
      \"Indicators\": {
        \"T1133 - External Remote Services\": {
          \"Successful logins to external remote services exhibiting unusual access patterns (e.g., multiple failed logins followed by success, logins at odd hours consistently over time) matching known Valid Accounts.\": {
            \"Data\": \"Windows Event ID 4624, Windows Event ID 4625\",
            \"Data Platform\": \"Servers\",
            \"NAI\": \"Insert site-specific NAI here\",
            \"Action\": \"Correlate successful logins (Event ID 4624, VPN/Citrix logs) with preceding failed attempts (Event ID 4625). Analyze login timestamps for patterns inconsistent with normal user behavior (e.g., logins consistently occurring late at night or on weekends). Use descriptive statistics to establish baseline login behavior per user/group and identify deviations.\"
          },
          \"Connections via external remote services originating from IP addresses associated with other known suspicious activities or previously compromised accounts.\": {
            \"Data\": \"Zeek conn.log\",
            \"Data Platform\": \"Network devices\",
            \"NAI\": \"Insert site-specific NAI here\",
            \"Action\": \"Cross-reference source IP addresses from remote service logs (Zeek conn.log, VPN/Citrix logs) against a list of IP addresses identified in other security alerts or known compromised host lists. Inner join connection logs with Threat Intelligence feeds for known malicious IPs connecting via remote services.\"
          }
        }
      }
    }
  }
]"""),
            ],
        ),
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=prompt),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        response_mime_type="application/json",
        system_instruction=[
            types.Part.from_text(text="""Based on these definitions, please generate a detailed ASOM in the JSON format. The keys in the JSON object should correspond to the following ASOM components in this order: IR, Indicators, Evidence, Data Platform, Data, NAIs, Actions. Note that the key for IR should be replaced by the actual IR based on the description above. Also note that the key for evidence should be replaced with the actual form of evidence based on the description above. Here is an example template:

{
    \"(Insert IR here)\": {
        \"Indicators\": {
            \"(Insert form of evidence here)\": {
                \"Data\": \"\",
                \"Data Platform\": \"\",
                \"NAI\": \"\",
                \"Action\": \"\"
            },
            \"(Insert form of evidence here)\": {
                \"Data\": \"\",
                \"Data Platform\": \"\",
                \"NAI\": \"\",
                \"Action\": \"\"
            }
        }
    }
}"""),
        ],
    )

    response = client.models.generate_content(
        model=model,
        contents=contents,
        config=generate_content_config,
    )
    return response.text

In [13]:
# --- Rate Limiting Logic ---
REQUESTS_PER_MINUTE = 10
MIN_SECONDS_BETWEEN_REQUESTS = 60 / REQUESTS_PER_MINUTE

last_request_time = 0 # Initialize to ensure the first request is not delayed
request_count_in_loop = 0 # Counter for requests made *within this script run*

# --- Main Loop ---
i = 1 # Counter for total items processed (including skipped)
limit = 200 # Limit on the number of *successful* generations in this run

for technique in prompt_library:
    # --- Check if file exists ---
    output_filename = f"{technique}.json"
    if os.path.exists(output_filename):
        log(f"Skipping {technique}: {output_filename} already exists.")
        continue # Skip to the next technique

    # --- Rate Limiting Check ---
    current_time = time.time()
    elapsed_time = current_time - last_request_time

    if elapsed_time < MIN_SECONDS_BETWEEN_REQUESTS:
        wait_time = MIN_SECONDS_BETWEEN_REQUESTS - elapsed_time
        log(f"Rate limit check: Waiting for {wait_time:.2f} seconds...")
        time.sleep(wait_time)

    # --- Make the Request ---
    log(f"Processing #{request_count_in_loop + 1}/{limit}). Started generating plan for {technique} ...")
    # Update last request time *before* making the request
    last_request_time = time.time()
    try:
        response = generate(prompt_library[technique], target_model = "gemini-2.5-flash-preview-04-17") # "gemini-2.5-pro-exp-03-25"
        request_count_in_loop += 1 # Increment count *after* successful request attempt

        # --- Save the Response ---
        with open(output_filename, "w") as f:
            f.write(response)
        log(f"Successfully generated plan for '{technique}' and saved as {output_filename}")

    except Exception as e:
        log(f"Error generating plan for {technique}: {e}")
        # Decide if you want to retry, skip, or stop on error
        break # Skipping on error in this example

    # --- Check Loop Limit ---
    i += 1 # Increment the overall processed item counter
    if request_count_in_loop >= limit:
        log(f"Generation limit of {limit} reached. Exiting.")
        break

log("Processing finished.")

[2025-05-05T10:58:49.659795] Skipping T1055.011 - Extra Window Memory Injection: T1055.011 - Extra Window Memory Injection.json already exists.
[2025-05-05T10:58:49.660964] Skipping T1053.005 - Scheduled Task: T1053.005 - Scheduled Task.json already exists.
[2025-05-05T10:58:49.661937] Skipping T1205.002 - Socket Filters: T1205.002 - Socket Filters.json already exists.
[2025-05-05T10:58:49.662873] Skipping T1560.001 - Archive via Utility: T1560.001 - Archive via Utility.json already exists.
[2025-05-05T10:58:49.663925] Skipping T1021.005 - VNC: T1021.005 - VNC.json already exists.
[2025-05-05T10:58:49.664908] Skipping T1047 - Windows Management Instrumentation: T1047 - Windows Management Instrumentation.json already exists.
[2025-05-05T10:58:49.665891] Skipping T1113 - Screen Capture: T1113 - Screen Capture.json already exists.
[2025-05-05T10:58:49.666769] Skipping T1027.011 - Fileless Storage: T1027.011 - Fileless Storage.json already exists.
[2025-05-05T10:58:49.667821] Skipping T103

In [18]:
def update_json_files_structure(root_dir, dict_to_add_or_update):
    """
    Recursively finds JSON files in root_dir and adds or updates keys
    from dict_to_add_or_update into the dictionary value of the primary
    key within each item of the main list.

    Args:
        root_dir (str): The path to the directory to start searching from.
        dict_to_add_or_update (dict): A dictionary containing key-value pairs
                                      to add or update.
    """
    if not isinstance(dict_to_add_or_update, dict):
        log("Error: 'dict_to_add_or_update' must be a dictionary.")
        return

    if not dict_to_add_or_update:
        log("Warning: 'dict_to_add_or_update' is empty. No changes will be made.")
        # Optionally return here if desired, or proceed to scan files anyway.
        # return

    if not os.path.isdir(root_dir):
        log(f"Error: Directory '{root_dir}' not found.")
        return

    log(f"Starting scan in directory: {root_dir}")
    log(f"Data to add/update: {dict_to_add_or_update}")
    files_processed = 0
    files_modified = 0
    errors_encountered = 0
    error_files = []
    warning_files = []

    for subdir, _, files in os.walk(root_dir):
        for filename in files:
            if filename.lower().endswith('.json'):
                file_path = os.path.join(subdir, filename)
                log(f"Processing file: {file_path}")
                files_processed += 1
                modified_in_this_file = False
                try:
                    # Read the JSON file with UTF-8 encoding
                    # Use a variable for the file handle to ensure it's closed
                    # even if errors occur before json.load() finishes
                    read_f = None
                    try:
                        read_f = open(file_path, 'r', encoding='utf-8')
                        data = json.load(read_f)
                    finally:
                        if read_f:
                            read_f.close()


                    # Ensure the top level is a list
                    if isinstance(data, list):
                        # Iterate through each item (dictionary) in the list
                        for item in data:
                            if isinstance(item, dict):
                                # Iterate through the primary keys in the item dictionary
                                # (Assuming one primary key per item as per example)
                                for primary_key in list(item.keys()): # Use list() for safe iteration
                                    # Check if the value associated with the primary key is a dictionary
                                    if isinstance(item[primary_key], dict):
                                        target_dict = item[primary_key]
                                        # Iterate through the keys and values to add/update
                                        for key_to_update, value_to_update in dict_to_add_or_update.items():
                                            # Check if the key needs to be added or if the value is different
                                            if key_to_update not in target_dict or target_dict[key_to_update] != value_to_update:
                                                target_dict[key_to_update] = value_to_update
                                                modified_in_this_file = True # Mark that a change occurred
                                    else:
                                        log(f"  Warning: Value for key '{primary_key}' in {file_path} is not a dictionary. Skipping update for this key.")
                                        warning_files.append(file_path)
                            else:
                                 log(f"  Warning: Found an item in the list that is not a dictionary in {file_path}. Skipping this item.")
                                 warning_files.append(file_path)

                    else:
                        log(f"  Warning: Root object in {file_path} is not a list. Skipping this file.")
                        warning_files.append(file_path)
                        continue # Skip to the next file

                    # Write the modified data back to the file if changes were made
                    if modified_in_this_file:
                        write_f = None
                        try:
                            write_f = open(file_path, 'w', encoding='utf-8')
                            # Use indent for pretty printing, adjust as needed (e.g., indent=4)
                            json.dump(data, write_f, indent=2, ensure_ascii=False)
                            log(f"  Successfully updated: {file_path}")
                            files_modified += 1
                        finally:
                             if write_f:
                                 write_f.close()
                    else:
                        log(f"  No changes needed for: {file_path}")


                except json.JSONDecodeError:
                    log(f"  Error: Invalid JSON format in {file_path}. Skipping.")
                    error_files.append(file_path)
                    errors_encountered += 1
                except IOError as e:
                    log(f"  Error: Could not read/write file {file_path}. Reason: {e}. Skipping.")
                    error_files.append(file_path)
                    errors_encountered += 1
                except Exception as e:
                    # Log the type of exception for better debugging
                    log(f"  Error: An unexpected {type(e).__name__} occurred processing {file_path}. Reason: {e}. Skipping.")
                    error_files.append(file_path)
                    errors_encountered += 1

    print("\n--- Processing Summary ---")
    print(f"Total files scanned: {files_processed}")
    print(f"Files successfully modified: {files_modified}")
    # Corrected calculation for skipped/unchanged files
    skipped_unchanged = files_processed - files_modified - errors_encountered
    print(f"Files skipped, unchanged, or with warnings: {skipped_unchanged}")
    print(f"Errors encountered during processing: {errors_encountered}")
    print("--------------------------")
    print("Error files:\n",error_files)
    print()
    print("Warning files:\n",warning_files)

In [19]:
target_directory = "./" # Example for Windows

data_to_add = {
    "version": "1.0",
    "last_updated": "2025-05-04"
}

# Call the generalized function
update_json_files_structure(target_directory, data_to_add)

[2025-05-05T11:13:49.935571] Starting scan in directory: ./
[2025-05-05T11:13:49.936533] Data to add/update: {'version': '1.0', 'last_updated': '2025-05-04'}
[2025-05-05T11:13:49.965092] Processing file: ./enterprise-attack.json
[2025-05-05T11:13:51.732676] Processing file: ./T1033 - System Owner-User Discovery.json
[2025-05-05T11:13:52.227507]   Successfully updated: ./T1033 - System Owner-User Discovery.json
[2025-05-05T11:13:52.231365] Processing file: ./T1564.012 - File-Path Exclusions.json
[2025-05-05T11:13:52.511501]   Successfully updated: ./T1564.012 - File-Path Exclusions.json
[2025-05-05T11:13:52.518692] Processing file: ./T1055.011 - Extra Window Memory Injection.json
[2025-05-05T11:13:52.821366]   Successfully updated: ./T1055.011 - Extra Window Memory Injection.json
[2025-05-05T11:13:52.826719] Processing file: ./T1053.005 - Scheduled Task.json
[2025-05-05T11:13:53.203023]   Successfully updated: ./T1053.005 - Scheduled Task.json
[2025-05-05T11:13:53.206174] Processing fil