In [5]:
!pip install seaborn

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from sklearn.ensemble import IsolationForest
import seaborn as sns
import logging
from pathlib import Path
import os
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

# Enable inline plotting
%matplotlib inline
plt.style.use('seaborn')

# Set the working directory to the folder containing the files
working_dir = r"C:\Users\Service Casket\Desktop\Network Analysis Tool"
os.chdir(working_dir)

# Configuration
CONFIG = {
    'clean_nodes': 'ICS_OT Nodes.csv',
    'clean_edges': 'ICS_OT Edges.csv',
    'infected_nodes': 'ICS_OT NodesInfected.csv',
    'infected_edges': 'ICS_OT EdgesInfected.csv',
    'anomaly_threshold': 0.1,
    'output_dir': 'network_analysis_outputs'
}

# Create output directory
Path(CONFIG['output_dir']).mkdir(exist_ok=True)

def load_graph(nodes_file, edges_file):
    """Load network graph from CSV files"""
    nodes_df = pd.read_csv(nodes_file)
    edges_df = pd.read_csv(edges_file)
    G = nx.Graph()
    
    for _, row in nodes_df.iterrows():
        G.add_node(row['Id'], label=row['Label'])
    
    for _, row in edges_df.iterrows():
        G.add_edge(row['Source'], row['Target'], weight=row.get('Weight', 1))
    
    return G, nodes_df, edges_df

def calculate_centrality(G):
    """Calculate various centrality measures"""
    centrality_measures = {
        'Degree Centrality': nx.degree_centrality(G),
        'Betweenness Centrality': nx.betweenness_centrality(G),
        'Closeness Centrality': nx.closeness_centrality(G),
        'Eigenvector Centrality': nx.eigenvector_centrality(G),
        'Clustering Coefficient': nx.clustering(G)
    }
    return pd.DataFrame(centrality_measures).sort_index()

def visualize_network_matplotlib(G, title, node_color='lightblue', figsize=(12, 8)):
    """Create network visualization using matplotlib"""
    plt.figure(figsize=figsize)
    pos = nx.spring_layout(G, k=1, iterations=50)
    
    nx.draw(G, pos, 
           node_color=node_color,
           node_size=1000,
           with_labels=True,
           labels=nx.get_node_attributes(G, 'label'),
           font_size=8,
           font_weight='bold',
           edge_color='gray')
    
    plt.title(title)
    output_path = os.path.join(CONFIG['output_dir'], f"{title.replace(' ', '_')}.png")
    plt.savefig(output_path, format="png", dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()

def visualize_centrality_heatmap(centrality_changes):
    """Create heatmap of centrality changes"""
    plt.figure(figsize=(12, 8))
    sns.heatmap(centrality_changes, 
                annot=True, 
                cmap='coolwarm', 
                center=0,
                fmt='.3f')
    plt.title('Changes in Centrality Measures')
    plt.tight_layout()
    output_path = os.path.join(CONFIG['output_dir'], 'centrality_changes_heatmap.png')
    plt.savefig(output_path, format="png", dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()

def detect_anomalies(df, contamination=0.1):
    """Detect anomalies in centrality changes"""
    iso_forest = IsolationForest(contamination=contamination, random_state=42)
    df_copy = df.copy()
    df_copy['anomaly'] = iso_forest.fit_predict(df_copy)
    return df_copy

def explain_anomalies(df, threshold=1.5):
    """Explain detected anomalies"""
    anomalies = df[df['anomaly'] == -1]
    explanations = {}
    
    for idx, row in anomalies.iterrows():
        explanations[idx] = []
        for col in df.columns[:-1]:  # Exclude the 'anomaly' column
            z_score = (row[col] - df[col].mean()) / df[col].std()
            if abs(z_score) > threshold:
                change = "increased" if z_score > 0 else "decreased"
                explanations[idx].append(f"{col} {change} significantly (z-score: {z_score:.2f})")
    
    return explanations

def analyze_networks():
    """Main analysis function"""
    # Load graphs
    print("Loading and analyzing networks...\n")
    G1, nodes_df1, edges_df1 = load_graph(CONFIG['clean_nodes'], CONFIG['clean_edges'])
    G2, nodes_df2, edges_df2 = load_graph(CONFIG['infected_nodes'], CONFIG['infected_edges'])
    
    # Calculate centrality measures
    centrality_df1 = calculate_centrality(G1)
    centrality_df2 = calculate_centrality(G2)
    centrality_changes = centrality_df2 - centrality_df1
    
    # Create visualizations
    print("Generating network visualizations...")
    visualize_network_matplotlib(G1, "Clean Network")
    visualize_network_matplotlib(G2, "Infected Network", node_color='lightcoral')
    visualize_centrality_heatmap(centrality_changes)
    
    # Detect and explain anomalies
    anomalies = detect_anomalies(centrality_changes, CONFIG['anomaly_threshold'])
    anomaly_explanations = explain_anomalies(anomalies)
    
    # Compare graph structures
    comparison = {
        'added_edges': set(G2.edges()) - set(G1.edges()),
        'removed_edges': set(G1.edges()) - set(G2.edges()),
        'added_nodes': set(G2.nodes()) - set(G1.nodes()),
        'removed_nodes': set(G1.nodes()) - set(G2.nodes())
    }
    
    # Generate and save analysis report
    report = f"""# ICS/OT Network Analysis Report

## Network Statistics
- Clean Network: {G1.number_of_nodes()} nodes, {G1.number_of_edges()} edges
- Infected Network: {G2.number_of_nodes()} nodes, {G2.number_of_edges()} edges

## Structure Changes
- Added Edges: {comparison['added_edges']}
- Removed Edges: {comparison['removed_edges']}
- Added Nodes: {comparison['added_nodes']}
- Removed Nodes: {comparison['removed_nodes']}

## Detected Anomalies
{anomalies[anomalies['anomaly'] == -1].to_string()}

## Anomaly Explanations
"""
    
    for node, explanations in anomaly_explanations.items():
        report += f"\nNode {node}:\n"
        for explanation in explanations:
            report += f"  - {explanation}\n"
    
    with open(os.path.join(CONFIG['output_dir'], 'analysis_report.md'), 'w', encoding='utf-8') as f:
        f.write(report)
    
    return centrality_changes, comparison, anomaly_explanations

def display_analysis_results(centrality_changes, comparison, anomaly_explanations):
    """Display analysis results in a notebook-friendly format"""
    print("\nAnalysis Results Summary")
    print("=====================\n")
    
    print("Structure Changes:")
    print("-----------------")
    for key, value in comparison.items():
        print(f"{key}: {value}")
    
    print("\nDetected Anomalies:")
    print("-----------------")
    for node, explanations in anomaly_explanations.items():
        print(f"\nNode {node}:")
        for explanation in explanations:
            print(f"  - {explanation}")
    
    print(f"\nDetailed results have been saved to: {CONFIG['output_dir']}")

# Run the analysis
centrality_changes, comparison, anomaly_explanations = analyze_networks()
display_analysis_results(centrality_changes, comparison, anomaly_explanations)



OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)