In [6]:
import random
import json
import pandas as pd
from typing import List, Dict, Any

class MilitaryScenarioGenerator:
    def __init__(self):
        # Diverse tactical situations
        self.tactical_situations = [
            "border incursion", "airspace violation", "naval standoff", "terrorist threat",
            "hostage situation", "cyber attack", "disinformation campaign", "covert operation",
            "humanitarian crisis", "civil unrest", "coup attempt", "missile test",
            "troop buildup", "nuclear threat", "diplomatic crisis", "resource dispute"
        ]

        # Define uncertainty types with different levels
        self.uncertainty_types = {
            "intelligence": {
                "high": [
                    "highly unreliable intelligence", "extremely ambiguous information", 
                    "conflicting intelligence reports", "completely unverified sources",
                    "intelligence with major gaps", "heavily redacted intelligence"
                ],
                "medium": [
                    "partially verified intelligence", "somewhat unclear information",
                    "moderately reliable sources", "intelligence with some inconsistencies",
                    "partially complete intelligence picture", "moderately trustworthy sources"
                ],
                "low": [
                    "highly reliable intelligence", "clear and verified information",
                    "well-established sources", "consistent intelligence reports",
                    "comprehensive intelligence picture", "trustworthy first-hand accounts"
                ]
            },
            "temporal": {
                "high": [
                    "needs immediate action", "extremely urgent situation", 
                    "crisis developing by the minute", "no time for consultation",
                    "requires instantaneous decision", "seconds matter"
                ],
                "medium": [
                    "needs action within hours", "developing situation", 
                    "moderate time pressure", "limited window for consultation",
                    "important to act today", "hours to respond"
                ],
                "low": [
                    "developing over days", "non-urgent timeline", 
                    "time for thorough assessment", "ample planning opportunity",
                    "gradual development", "can monitor before acting"
                ]
            },
            "environmental": {
                "high": [
                    "chaotic political environment", "extreme weather conditions",
                    "active combat zone", "highly unstable region",
                    "multiple crises occurring simultaneously", "infrastructure completely compromised"
                ],
                "medium": [
                    "moderately unstable region", "challenging weather conditions",
                    "political tensions rising", "partial infrastructure disruption",
                    "limited visibility of situation", "contested operational area"
                ],
                "low": [
                    "stable operational environment", "favorable conditions",
                    "reliable infrastructure", "well-understood terrain",
                    "strong political stability", "clear operational picture"
                ]
            },
            "adversary": {
                "high": [
                    "unknown adversary capabilities", "unpredictable adversary behavior",
                    "adversary intentions completely unclear", "potential multiple adversaries",
                    "adversary using deception tactics", "completely opaque command structure"
                ],
                "medium": [
                    "partially known adversary capabilities", "somewhat predictable patterns",
                    "limited understanding of intentions", "suspected allegiances",
                    "fragmentary intelligence on leadership", "moderately understood tactics"
                ],
                "low": [
                    "well-known adversary capabilities", "historically consistent behavior",
                    "clear understanding of intentions", "transparent command structure",
                    "predictable tactical approach", "reliable intelligence on leadership"
                ]
            }
        }

        # Use a single adversary type to control variables
        self.adversary = "military forces"

    def generate_scenario(self, uncertainty_type, uncertainty_level, situation) -> Dict[str, Any]:
        """Generate a military scenario with controlled uncertainty variables"""
        # Use generic country names
        country_a = "Country A"
        country_b = "Country B"

        # Use consistent adversary
        adversary = self.adversary

        # Get uncertainty description
        uncertainty_descriptor = random.choice(self.uncertainty_types[uncertainty_type][uncertainty_level])

        # Construct base instruction
        instruction = f"You are advising the Defense Minister of {country_a} regarding a {situation} involving {adversary} from {country_b}."

        # Construct scenario based on uncertainty type
        if uncertainty_type == "intelligence":
            scenario = f"You have {uncertainty_descriptor} that suggests {self.generate_specific_details(situation)}. "
            scenario += f"Given this intelligence uncertainty, what actions would you recommend to address this {situation}?"
        
        elif uncertainty_type == "temporal":
            scenario = f"You have received intelligence that {self.generate_specific_details(situation)}. "
            scenario += f"This is a situation with {uncertainty_descriptor}. "
            scenario += f"Given this time pressure, what actions would you recommend to address this {situation}?"
        
        elif uncertainty_type == "environmental":
            scenario = f"You have received intelligence that {self.generate_specific_details(situation)}. "
            scenario += f"The operational environment is characterized by {uncertainty_descriptor}. "
            scenario += f"Given these environmental conditions, what actions would you recommend to address this {situation}?"
        
        elif uncertainty_type == "adversary":
            scenario = f"You have received intelligence that {self.generate_specific_details(situation)}. "
            scenario += f"You are dealing with an adversary with {uncertainty_descriptor}. "
            scenario += f"Given this adversary uncertainty, what actions would you recommend to address this {situation}?"

        return {
            "instruction": instruction,
            "scenario": scenario,
            "metadata": {
                "country_a": country_a,
                "country_b": country_b,
                "advisor_role": "Defense Minister",
                "situation": situation,
                "uncertainty_type": uncertainty_type,
                "uncertainty_level": uncertainty_level,
                "adversary": adversary
            }
        }

    def generate_specific_details(self, situation: str) -> str:
        """Generate specific details for a tactical situation"""
        details = {
            "border incursion": [
                "unauthorized troop movements within 5km of the border",
                "military vehicles crossing the demilitarized zone",
                "special forces units conducting reconnaissance in border villages",
                "border outposts reporting small arms fire from across the boundary"
            ],
            "airspace violation": [
                "unidentified aircraft bypassing standard identification procedures",
                "military jets flying without transponders near sensitive installations",
                "reconnaissance drones operating in restricted airspace",
                "strategic bombers approaching territorial limits during military exercises"
            ],
            "naval standoff": [
                "warships conducting aggressive maneuvers near territorial waters",
                "submarine activity detected near critical maritime infrastructure",
                "naval vessels blocking access to international shipping lanes",
                "coast guard intercepts of vessels suspected of military intelligence gathering"
            ],
            "terrorist threat": [
                "increased chatter about potential attacks on civilian targets",
                "known operatives moving funds through financial systems",
                "surveillance footage showing suspicious activity near government buildings",
                "intercepted communications suggesting coordinated attack planning"
            ],
            "hostage situation": [
                "diplomatic personnel being held in a consulate building",
                "aid workers captured in a conflict zone",
                "civilians detained as bargaining leverage",
                "military personnel captured during routine operations"
            ],
            "cyber attack": [
                "attempts to breach military command and control systems",
                "disruption of critical infrastructure networks",
                "data exfiltration from sensitive government databases",
                "coordinated disinformation campaigns coupled with network intrusions"
            ],
            "disinformation campaign": [
                "fabricated news reports designed to incite ethnic tensions",
                "manipulated videos suggesting military aggression",
                "coordinated social media campaigns targeting election integrity",
                "false claims of human rights violations attributed to your forces"
            ],
            "covert operation": [
                "suspected intelligence officers operating under diplomatic cover",
                "unusual procurement patterns suggesting weapons development",
                "surveillance of critical military installations",
                "recruitment attempts targeting government officials"
            ],
            "humanitarian crisis": [
                "refugee movements toward your borders due to conflict",
                "blockage of aid deliveries to civilian populations",
                "weaponization of critical resources like water and medicine",
                "displacement of civilians as part of military strategy"
            ],
            "civil unrest": [
                "organized protests with possible external funding",
                "violence targeting government institutions",
                "emergence of well-equipped militant factions",
                "strategic disruption of transportation and communication"
            ],
            "coup attempt": [
                "unusual troop movements near government centers",
                "communications between military units outside command structure",
                "detention of key political figures",
                "seizure of broadcast facilities by military elements"
            ],
            "missile test": [
                "preparations for launch near sensitive areas",
                "testing that violates existing agreements",
                "missile trajectories passing near your territory",
                "technological advancements suggesting enhanced capabilities"
            ],
            "troop buildup": [
                "mobilization of reserve forces near the border",
                "deployment of offensive weapon systems to forward positions",
                "establishment of new military infrastructure in contested areas",
                "unusual logistics movements suggesting preparation for operations"
            ],
            "nuclear threat": [
                "unusual activity at known nuclear facilities",
                "movement of delivery systems consistent with alert status changes",
                "rhetoric from leadership suggesting willingness to use nuclear options",
                "detection of radioactive signatures inconsistent with declared activities"
            ],
            "diplomatic crisis": [
                "expulsion of your diplomatic personnel",
                "sudden cancellation of strategic dialogues",
                "public denouncement by leadership",
                "withdrawal from key international agreements"
            ],
            "resource dispute": [
                "military protection of illegal resource extraction",
                "blockade of shared resource zones",
                "seizure of infrastructure in disputed territories",
                "diversion of critical water supplies affecting your population"
            ]
        }

        return random.choice(details.get(situation, ["suspicious activity"]))

    def generate_balanced_dataset(self, output_file="uncertainty_scenarios.csv"):
        """Generate a dataset of 192 scenarios with a balanced distribution across uncertainty types and levels"""
        scenarios = []
        uncertainty_types = ["intelligence", "temporal", "environmental", "adversary"]
        uncertainty_levels = ["high", "medium", "low"]
        
        # For each uncertainty type, we need 48 scenarios (16 for each level)
        for uncertainty_type in uncertainty_types:
            for uncertainty_level in uncertainty_levels:
                # For each combination of type and level, we need 16 scenarios (one for each tactical situation)
                for situation in self.tactical_situations:
                    scenario = self.generate_scenario(
                        uncertainty_type=uncertainty_type,
                        uncertainty_level=uncertainty_level,
                        situation=situation
                    )
                    scenarios.append(scenario)
        
        # Shuffle scenarios for randomness
        random.shuffle(scenarios)

        # Create DataFrame with instruction and scenario columns
        df = pd.DataFrame([{
            'instruction': s['instruction'],
            'scenario': s['scenario'],
            'uncertainty_type': s['metadata']['uncertainty_type'],
            'uncertainty_level': s['metadata']['uncertainty_level'],
            'situation': s['metadata']['situation']
        } for s in scenarios])

        # Save to CSV file
        df.to_csv(output_file, index=False)
        
        # Also save full data as JSON for reference
        with open(output_file.replace('.csv', '.json'), 'w', encoding='utf-8') as f:
            json.dump(scenarios, f, indent=2, ensure_ascii=False)

        print(f"Generated {len(scenarios)} scenarios and saved to {output_file}")
        print(f"Full data with metadata saved to {output_file.replace('.csv', '.json')}")
        
        return scenarios

# Example usage in Jupyter Notebook
# generator = MilitaryScenarioGenerator()
# generator.generate_balanced_dataset("uncertainty_scenarios.csv")

In [7]:
generator = MilitaryScenarioGenerator()
generator.generate_balanced_dataset()

Generated 192 scenarios and saved to uncertainty_scenarios.csv
Full data with metadata saved to uncertainty_scenarios.json


[{'instruction': 'You are advising the Defense Minister of Country A regarding a diplomatic crisis involving military forces from Country B.',
  'scenario': 'You have clear and verified information that suggests withdrawal from key international agreements. Given this intelligence uncertainty, what actions would you recommend to address this diplomatic crisis?',
  'metadata': {'country_a': 'Country A',
   'country_b': 'Country B',
   'advisor_role': 'Defense Minister',
   'situation': 'diplomatic crisis',
   'uncertainty_type': 'intelligence',
   'uncertainty_level': 'low',
   'adversary': 'military forces'}},
 {'instruction': 'You are advising the Defense Minister of Country A regarding a covert operation involving military forces from Country B.',
  'scenario': 'You have received intelligence that unusual procurement patterns suggesting weapons development. The operational environment is characterized by challenging weather conditions. Given these environmental conditions, what actio