In [19]:
import json
import os
import random
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from mesa import Agent, Model
from mesa.space import NetworkGrid
from mesa.datacollection import DataCollector
from mesa.time import SimultaneousActivation

# ===== Load NLP parameters =====
input_dir = "./output"
if not os.path.exists(input_dir):
    os.makedirs(input_dir)
    print(f"Created directory: {input_dir}")
    dummy_params = {
        "best_topic_count": 10, "coherence_score": 0.5,
        "esg_keywords": [], "keyword_cooccurrence": {}
    }
    with open(os.path.join(input_dir, "model_params.json"), "w") as f:
        json.dump(dummy_params, f)
    
    dummy_sentiment_df = pd.DataFrame({
        "sentiment": [random.uniform(4.0, 4.8) for _ in range(100)],
        "topic_distribution": [[random.random() for _ in range(10)] for _ in range(100)]
    })
    dummy_sentiment_df.to_csv(os.path.join(input_dir, "sentiment_results.csv"), index=False)
    print("Created dummy NLP output files for demonstration.")

with open(os.path.join(input_dir, "model_params.json"), "r") as f:
    params = json.load(f)

BEST_TOPIC_COUNT = params["best_topic_count"]
COHERENCE_SCORE = params["coherence_score"]
ESG_KEYWORDS = params["esg_keywords"]
KEYWORD_COOC = params["keyword_cooccurrence"]
print(f"Loaded model parameters: {BEST_TOPIC_COUNT} topics, coherence={COHERENCE_SCORE:.4f}")

# Load sentiment and topic data from NLP output
sentiment_df = pd.read_csv(os.path.join(input_dir, "sentiment_results.csv"))
sentiment_values = sentiment_df["sentiment"].tolist()
topic_vectors = sentiment_df["topic_distribution"].apply(lambda x: json.loads(x) if isinstance(x, str) else x).tolist()

# ===== ABM Agent and Model Classes =====
class ReviewerAgent(Agent):
    def __init__(self, unique_id, model, initial_sentiment, topic_vector):
        super().__init__(unique_id, model)
        self.sentiment = initial_sentiment
        self.topic_vector = topic_vector
        self.intervened = False  # Track intervention status

    def step(self):
        neighbor_agents = self.model.grid.get_neighbors(self.pos, include_center=False)
        if not neighbor_agents: return
        
        confidence_threshold = self.model.confidence_threshold
        relevant_neighbors = [a for a in neighbor_agents if abs(a.sentiment - self.sentiment) <= confidence_threshold]
        if not relevant_neighbors: return
        
        avg_neighbor_sentiment = np.mean([a.sentiment for a in relevant_neighbors])
        self.sentiment += self.model.influence_strength * (avg_neighbor_sentiment - self.sentiment)
        
        esg_weights = [1 / BEST_TOPIC_COUNT] * BEST_TOPIC_COUNT
        
        neighbor_topic_vectors = [a.topic_vector for a in relevant_neighbors]
        if neighbor_topic_vectors:
            avg_neighbor_topics = np.mean(neighbor_topic_vectors, axis=0)
            self.topic_vector = [
                (1 - self.model.influence_strength) * self.topic_vector[i] +
                self.model.influence_strength * esg_weights[i] * avg_neighbor_topics[i]
                for i in range(BEST_TOPIC_COUNT)
            ]

    def advance(self):
        self.sentiment = max(1.0, min(5.0, self.sentiment))
        total = sum(self.topic_vector)
        if total > 0: self.topic_vector = [p / total for p in self.topic_vector]

class ESGReviewModel(Model):
    def __init__(self, N=500, influence_strength=0.1, confidence_threshold=1.0, intervention_rate=0.1, baseline_df=None, seed=None):
        super().__init__()
        self.num_agents = N
        self.influence_strength = influence_strength
        self.confidence_threshold = confidence_threshold
        self.intervention_rate = intervention_rate
        self.G = nx.barabasi_albert_graph(n=N, m=2, seed=seed)
        self.grid = NetworkGrid(self.G)
        self.schedule = SimultaneousActivation(self)
        self.intervention_steps = []
        self.baseline_df = baseline_df
        self.seed = seed

        for i, node in enumerate(self.G.nodes()):
            total = sum(topic_vectors[i])
            normalized_topics = [p / total for p in topic_vectors[i]] if total > 0 else [1/BEST_TOPIC_COUNT]*BEST_TOPIC_COUNT
            agent = ReviewerAgent(i, self, sentiment_values[i], normalized_topics)
            self.schedule.add(agent)
            self.grid.place_agent(agent, node)

        model_reporters = {
            "AverageSentiment": lambda m: np.mean([a.sentiment for a in m.schedule.agents]),
            "TopicDiversity": lambda m: np.mean([-sum(p * np.log(p + 1e-10) for p in a.topic_vector) / np.log(BEST_TOPIC_COUNT) for a in m.schedule.agents]) if m.schedule.agents else 0.0
        }
        for i in range(BEST_TOPIC_COUNT):
            model_reporters[f"Topic_{i}_Adoption"] = lambda m, idx=i: sum(1 for a in m.schedule.agents if a.topic_vector[idx] > 0.25) / m.num_agents

        self.datacollector = DataCollector(model_reporters=model_reporters)
        self.degree_centrality = nx.degree_centrality(self.G)
        self.betweenness_centrality = nx.betweenness_centrality(self.G)

    def step(self):
        if random.random() < self.intervention_rate:
            self.intervention_steps.append(self.schedule.steps)
            agents_to_intervene = random.sample(list(self.schedule.agents), int(self.num_agents * 0.1))
            for agent in agents_to_intervene:
                agent.sentiment = min(5.0, agent.sentiment + 0.5)
                if BEST_TOPIC_COUNT > 5:
                    agent.topic_vector[4] += 0.15
                    agent.topic_vector[5] += 0.15
                agent.intervened = True
                total = sum(agent.topic_vector)
                if total > 0:
                    agent.topic_vector = [p / total for p in agent.topic_vector]

        self.datacollector.collect(self)
        self.schedule.step()

    def get_intervention_impact(self):
        if not self.intervention_steps or self.baseline_df is None:
            return {f"CIE_Topic_{i}": 0.0 for i in range(BEST_TOPIC_COUNT)}, 0.0
        current_df = self.datacollector.get_model_vars_dataframe()
        total_steps = len(current_df)
        
        cie_adoption = {f"CIE_Topic_{i}": np.trapz(
            [current_df[f"Topic_{i}_Adoption"][max(0, s):].mean() - self.baseline_df[f"Topic_{i}_Adoption"][max(0, s):].mean() 
             for s in self.intervention_steps], dx=1) / total_steps for i in range(BEST_TOPIC_COUNT)}
        cie_sentiment = np.trapz(
            [current_df["AverageSentiment"][max(0, s):].mean() - self.baseline_df["AverageSentiment"][max(0, s):].mean() 
             for s in self.intervention_steps], dx=1) / total_steps

        return cie_adoption, cie_sentiment

# ===== Run Simulation & Generate Plots =====
if __name__ == "__main__":
    # --- 1. Run Baseline Simulation Globally ---
    print("--- Running global baseline simulation ---")
    baseline_runs = []
    for i in range(10):
        baseline_model = ESGReviewModel(N=500, influence_strength=0.1, confidence_threshold=1.0, intervention_rate=0.0, seed=i)
        for _ in range(100):
            baseline_model.step()
        baseline_runs.append(baseline_model.datacollector.get_model_vars_dataframe())
    baseline_df = pd.concat(baseline_runs).groupby(level=0).mean()

    # --- 2. Run Main Simulations ---
    scenarios_config = [
        {"name": "Scenario 1 (Baseline)", "params": {"N": 500, "influence_strength": 0.15, "confidence_threshold": 1.0, "intervention_rate": 0.1}},
        {"name": "Scenario 2 (Stagnation)", "params": {"N": 500, "influence_strength": 0.05, "confidence_threshold": 0.5, "intervention_rate": 0.0}},
        {"name": "Scenario 3 (Acceleration)", "params": {"N": 500, "influence_strength": 0.2, "confidence_threshold": 1.5, "intervention_rate": 0.2}}
    ]

    scenario_results = []
    print("--- Running all simulation scenarios ---")
    for scenario in scenarios_config:
        print(f"Running {scenario['name']}...")
        run_results = []
        for i in range(10):
            model = ESGReviewModel(**scenario['params'], baseline_df=baseline_df, seed=i)
            for _ in range(100):
                model.step()
            results_df = model.datacollector.get_model_vars_dataframe()
            cie_adoption, cie_sentiment = model.get_intervention_impact()
            for key, value in cie_adoption.items():
                results_df[key] = value
            results_df["CIESentiment"] = cie_sentiment
            run_results.append(results_df)
        avg_results_df = pd.concat(run_results).groupby(level=0).mean()
        scenario_results.append({"name": scenario['name'], "data": avg_results_df})
        
        output_file = os.path.join(input_dir, f"abm_results_{scenario['name'].split(' ')[1]}.csv")
        avg_results_df.to_csv(output_file, index=True)
        print(f"Results saved to {output_file}")

    # --- 3. Generate Plots ---
    print("\n--- Generating plots ---")

    plt.style.use('ggplot')
    plt.rcParams.update({
        'font.size': 12, 'axes.labelsize': 14, 'axes.titlesize': 16,
        'legend.fontsize': 11, 'xtick.labelsize': 12, 'ytick.labelsize': 12,
        'axes.linewidth': 1.5, 'lines.linewidth': 2.0, 'figure.figsize': (11, 7),
        'font.family': 'serif',
    })
    
    distinct_colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
        '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]

    # === Individual Scenario Plots (Adoption and Sentiment Trends) ===
    for result in scenario_results:
        name, df = result['name'], result['data']
        fig, ax1 = plt.subplots(figsize=(11, 7))
        
        adoption_cols = [col for col in df.columns if 'Adoption' in col]
        ax1.set_xlabel('Simulation Step')
        ax1.set_ylabel('Topic Adoption (Proportion)', color='k')
        for i, col in enumerate(adoption_cols):
            ax1.plot(df.index, df[col], label=col.replace('_', ' ').replace('Topic ', 'T'), color=distinct_colors[i % len(distinct_colors)])
        ax1.tick_params(axis='y', labelcolor='k')
        ax1.set_ylim(0, 1.05)
        ax1.grid(True, linestyle='--', alpha=0.6)

        ax1_twin = ax1.twinx()
        ax1_twin.set_ylabel('Average Sentiment (1-5 Scale)', color='crimson')
        ax1_twin.plot(df.index, df['AverageSentiment'], label='Avg Sentiment', color='crimson', linestyle='--', linewidth=2.5)
        ax1_twin.tick_params(axis='y', labelcolor='crimson')
        ax1_twin.set_ylim(4, 5)

        plt.title(f'{name}: Adoption and Sentiment Trends')
        lines1, labels1 = ax1.get_legend_handles_labels()
        lines2, labels2 = ax1_twin.get_legend_handles_labels()
        ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.15), frameon=True, ncol=5)
        fig.tight_layout(rect=[0, 0.1, 1, 1])

        plt.savefig(os.path.join(input_dir, f"{name.replace(' ', '_')}_trends.png"), dpi=300, bbox_inches='tight')
        plt.close()

    # === Network Centrality Plot ===
    model = ESGReviewModel(**scenarios_config[0]['params'], baseline_df=baseline_df, seed=0)  # Use seed 0 for consistency
    centrality_df = pd.DataFrame({
        'DegreeCentrality': list(model.degree_centrality.values()),
        'BetweennessCentrality': list(model.betweenness_centrality.values()),
        'AdoptionInfluence': [sum(a.topic_vector[i] > 0.25 for i in range(BEST_TOPIC_COUNT)) / BEST_TOPIC_COUNT for a in model.schedule.agents]
    })
    centrality_df_filtered = centrality_df[centrality_df['AdoptionInfluence'] > 0.0]
    
    plt.figure(figsize=(8, 6))
    sns.scatterplot(data=centrality_df_filtered, x='DegreeCentrality', y='BetweennessCentrality', size='AdoptionInfluence', 
                    hue='AdoptionInfluence', palette='viridis', alpha=0.8)
    plt.title('Network Centrality vs. Adoption Influence')
    plt.xlabel('Degree Centrality')
    plt.ylabel('Betweenness Centrality')
    plt.tight_layout()
    
    plt.savefig(os.path.join(input_dir, "network_centrality.png"), dpi=300, bbox_inches='tight')
    plt.close()
    
    # === Evolution of Topic Diversity Across Scenarios Plot ===
    fig, ax = plt.subplots(figsize=(11, 7))
    for result in scenario_results:
        name, df = result['name'], result['data']
        ax.plot(df.index, df['TopicDiversity'], label=name, color=distinct_colors[scenario_results.index(result) % len(distinct_colors)])
    
    ax.set_xlabel('Simulation Step')
    ax.set_ylabel('Topic Diversity (Normalized Entropy)')
    ax.set_title('Evolution of Topic Diversity Across Scenarios')
    ax.set_ylim(0.5, 0.85)
    ax.legend()
    ax.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.savefig(os.path.join(input_dir, "topic_diversity_evolution.png"), dpi=300, bbox_inches='tight')
    plt.close()

    print("Generated all plots and results.")

Loaded model parameters: 10 topics, coherence=0.4635
--- Running global baseline simulation ---
--- Running all simulation scenarios ---
Running Scenario 1 (Baseline)...
Results saved to ./output/abm_results_1.csv
Running Scenario 2 (Stagnation)...
Results saved to ./output/abm_results_2.csv
Running Scenario 3 (Acceleration)...
Results saved to ./output/abm_results_3.csv

--- Generating plots ---
Generated all plots and results.
