In [9]:
# Required packages:
# pip install networkx numpy pandas matplotlib scikit-learn seaborn pyvis python-louvain

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from pyvis.network import Network
import community.community_louvain as community_louvain
from collections import Counter
import os
import logging
from pathlib import Path
import warnings
from itertools import combinations

# Suppress warnings
warnings.filterwarnings('ignore')

class NetworkAnalysisTool:
    def __init__(self, base_path):
        """Initialize the Network Analysis Tool"""
        self.base_path = os.path.expanduser(base_path)
        self.output_path = os.path.join(self.base_path, 'output')
        os.makedirs(self.output_path, exist_ok=True)
        
        # Set up logging
        logging.basicConfig(
            filename=os.path.join(self.output_path, 'analysis.log'),
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )
        
        # Load data
        self.load_data()
        
        # Initialize graph
        self.create_graph()
        
    def load_data(self):
        """Load nodes and edges data"""
        try:
            self.nodes_df = pd.read_csv(os.path.join(self.base_path, 'ICS_OT Nodes.csv'))
            self.edges_df = pd.read_csv(os.path.join(self.base_path, 'ICS_OT Edges.csv'))
        except FileNotFoundError as e:
            raise FileNotFoundError(f"Error loading CSV files: {e}")
            
    def create_graph(self):
        """Create the network graph"""
        self.G = nx.Graph()
        
        # Add nodes with labels and types
        for _, row in self.nodes_df.iterrows():
            self.G.add_node(row['Id'], label=row['Label'], shape=row.get('Shape', 'ellipse'))
            
        # Add edges
        for _, row in self.edges_df.iterrows():
            self.G.add_edge(row['Source'], row['Target'])
            
    def analyze_network_structure(self):
        """Analyze basic network structure"""
        logging.info("Analyzing network structure")
        
        components = list(nx.connected_components(self.G))
        cycles = list(nx.cycle_basis(self.G))
        endpoints = [node for node, degree in dict(self.G.degree()).items() if degree == 1]
        
        # Calculate centrality measures
        self.centrality_measures = {
            'Degree_Centrality': nx.degree_centrality(self.G),
            'Betweenness_Centrality': nx.betweenness_centrality(self.G),
            'Closeness_Centrality': nx.closeness_centrality(self.G),
            'Eigenvector_Centrality': nx.eigenvector_centrality(self.G, max_iter=1000)
        }
        
        return {
            'components': components,
            'cycles': cycles,
            'endpoints': endpoints,
            'density': nx.density(self.G),
            'is_tree': nx.is_tree(self.G),
            'is_forest': nx.is_forest(self.G)
        }
        
    def calculate_node_criticality(self):
        """Calculate node criticality based on network segmentation"""
        original_components = nx.number_connected_components(self.G)
        criticality = {}
        
        for node in self.G.nodes():
            G_temp = self.G.copy()
            G_temp.remove_node(node)
            new_components = nx.number_connected_components(G_temp)
            criticality[node] = new_components - original_components
            
        return criticality
        
    def calculate_security_metrics(self):
        """Calculate ICS/SCADA-specific security metrics"""
        control_systems = ['PLC 1', 'PLC 2', 'PLC 3', 'PLC 4', 'PLC 5']
        hmi_systems = ['HMI 1', 'HMI 2', 'HMI 3']
        field_devices = ['Sensor 1', 'Actuator 1', 'Device 1', 'Device 2', 'Device 3',
                        'Device 4', 'Device 5', 'Device 6', 'Device 7', 'Device 8',
                        'Device 9', 'Device 10', 'Device 11']
                        
        plc_nodes = [node for node, attr in self.G.nodes(data=True) if attr['label'] in control_systems]
        hmi_nodes = [node for node, attr in self.G.nodes(data=True) if attr['label'] in hmi_systems]
        field_nodes = [node for node, attr in self.G.nodes(data=True) if attr['label'] in field_devices]
        
        metrics = {}
        for node in self.G.nodes():
            metrics[node] = self._calculate_node_security_metrics(
                node, plc_nodes, hmi_nodes, field_nodes,
                control_systems, hmi_systems, field_devices
            )
            
        return metrics
        
    def _calculate_node_security_metrics(self, node, plc_nodes, hmi_nodes, field_nodes,
                                       control_systems, hmi_systems, field_devices):
        """Calculate security metrics for a single node"""
        node_label = self.G.nodes[node]['label']
        
        min_path_to_plc = float('inf')
        min_path_to_hmi = float('inf')
        
        for plc in plc_nodes:
            if nx.has_path(self.G, node, plc):
                min_path_to_plc = min(min_path_to_plc, nx.shortest_path_length(self.G, node, plc))
                
        for hmi in hmi_nodes:
            if nx.has_path(self.G, node, hmi):
                min_path_to_hmi = min(min_path_to_hmi, nx.shortest_path_length(self.G, node, hmi))
                
        return {
            'plc_connections': sum(1 for n in self.G.neighbors(node) if n in plc_nodes),
            'hmi_connections': sum(1 for n in self.G.neighbors(node) if n in hmi_nodes),
            'field_connections': sum(1 for n in self.G.neighbors(node) if n in field_nodes),
            'min_path_to_plc': min_path_to_plc,
            'min_path_to_hmi': min_path_to_hmi,
            'is_control_system': int(node_label in control_systems),
            'is_hmi': int(node_label in hmi_systems),
            'is_field_device': int(node_label in field_devices)
        }
        
    def detect_anomalies(self, security_metrics):
        """Enhanced anomaly detection with ICS-specific features"""
        features = pd.DataFrame()
        
        # Add centrality measures
        for measure, values in self.centrality_measures.items():
            features[measure] = pd.Series(values)
            
        # Add security metrics
        for node in self.G.nodes():
            for metric, value in security_metrics[node].items():
                if metric not in features:
                    features[metric] = 0
                features.loc[node, metric] = float(value if value != float('inf') else 1000)
                
        # Calculate exposure scores
        features['plc_path_factor'] = features['min_path_to_plc'].apply(
            lambda x: 1/(x+1) if x < 1000 else 0)
        features['hmi_path_factor'] = features['min_path_to_hmi'].apply(
            lambda x: 1/(x+1) if x < 1000 else 0)
            
        features['exposure_score'] = (
            features['plc_connections'] * 3 +
            features['hmi_connections'] * 2 +
            features['field_connections'] +
            features['plc_path_factor'] * 2 +
            features['hmi_path_factor'] * 1.5
        )
        
        # Standardize features
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)
        
        # Detect anomalies
        iso_forest = IsolationForest(contamination=0.1, n_estimators=100, random_state=42)
        features['anomaly'] = iso_forest.fit_predict(features_scaled)
        features['anomaly_score'] = iso_forest.score_samples(features_scaled)
        
        # Convert to risk score (0-100)
        min_score = features['anomaly_score'].min()
        max_score = features['anomaly_score'].max()
        features['risk_score'] = 100 * (features['anomaly_score'] - max_score) / (min_score - max_score)
        
        return features
        
    def visualize_risk_levels(self, risk_scores):
        """Create network visualization with risk levels"""
        plt.figure(figsize=(15, 10))
        
        pos = nx.spring_layout(self.G, k=2, iterations=50)
        
        # Create node colors and sizes based on risk scores
        risk_colors = []
        node_sizes = []
        for node in self.G.nodes():
            risk_score = risk_scores.loc[node, 'risk_score']
            
            if risk_score >= 75:
                color = 'red'
                size = 3000
            elif risk_score >= 50:
                color = 'orange'
                size = 2500
            elif risk_score >= 25:
                color = 'yellow'
                size = 2000
            else:
                color = 'green'
                size = 1500
                
            risk_colors.append(color)
            node_sizes.append(size)
            
        # Draw network
        nx.draw_networkx_edges(self.G, pos, edge_color='gray', alpha=0.5)
        nodes = nx.draw_networkx_nodes(self.G, pos,
                                     node_color=risk_colors,
                                     node_size=node_sizes)
                                     
        # Add labels
        labels = nx.get_node_attributes(self.G, 'label')
        nx.draw_networkx_labels(self.G, pos, labels, font_size=8)
        
        # Add legend
        legend_elements = [plt.Line2D([0], [0], marker='o', color='w',
                                    label=f'{level} Risk',
                                    markerfacecolor=color, markersize=10)
                          for level, color in [('Critical', 'red'),
                                             ('High', 'orange'),
                                             ('Moderate', 'yellow'),
                                             ('Low', 'green')]]
                                             
        plt.legend(handles=legend_elements, loc='upper left',
                  title='Risk Levels', bbox_to_anchor=(1, 1))
                  
        plt.title('Network Risk Level Visualization')
        plt.axis('off')
        plt.tight_layout()
        
        # Save visualization
        plt.savefig(os.path.join(self.output_path, 'risk_level_visualization.png'),
                    bbox_inches='tight', dpi=300)
        plt.close()

    def generate_comprehensive_report(self, results):
        """Generate a comprehensive network analysis report in markdown format"""
        timestamp = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
        structure_analysis = results['structure_analysis']
        criticality_scores = results['criticality_scores']
        security_metrics = results['security_metrics']
        risk_scores = results['risk_scores']
        
        report = f"""# ICS/OT Network Analysis Report

## Network Statistics
- Total Nodes: {self.G.number_of_nodes()}
- Total Edges: {self.G.number_of_edges()}
- Network Density: {structure_analysis['density']:.3f}
- Network Diameter: {nx.diameter(self.G)}
- Is Tree: {structure_analysis['is_tree']}
- Is Forest: {structure_analysis['is_forest']}
- Number of Endpoints: {len(structure_analysis['endpoints'])}

## Network Structure
### Components
- Number of Connected Components: {len(structure_analysis['components'])}
- Largest Component Size: {max(len(comp) for comp in structure_analysis['components'])}

### Cycles
- Number of Cycles: {len(structure_analysis['cycles'])}
"""
        # Add cycle details
        for i, cycle in enumerate(structure_analysis['cycles'], 1):
            cycle_nodes = [self.G.nodes[n]['label'] for n in cycle]
            report += f"- Cycle {i}: {' -> '.join(cycle_nodes)} -> {cycle_nodes[0]}\n"

        report += "\n## Critical Nodes Analysis\n"
        # Sort nodes by criticality
        sorted_critical = dict(sorted(criticality_scores.items(), key=lambda x: x[1], reverse=True))
        for node, score in sorted_critical.items():
            node_label = self.G.nodes[node]['label']
            report += f"- {node_label}:\n"
            report += f"  * Splits network into {score + 1} components\n"
            report += f"  * Degree: {self.G.degree(node)}\n"
            report += f"  * Betweenness Centrality: {self.centrality_measures['Betweenness_Centrality'][node]:.3f}\n"

        report += "\n## Security Risk Assessment\n"
        # Sort nodes by risk score
        high_risk_nodes = risk_scores[risk_scores['risk_score'] >= 50].sort_values('risk_score', ascending=False)
        
        report += "### High Risk Nodes (Risk Score >= 50)\n"
        for node in high_risk_nodes.index:
            node_label = self.G.nodes[node]['label']
            metrics = security_metrics[node]
            risk_score = risk_scores.loc[node, 'risk_score']
            
            report += f"\n#### {node_label}\n"
            report += f"Risk Score: {risk_score:.1f}/100\n"
            report += "Security Metrics:\n"
            report += f"- PLC Connections: {metrics['plc_connections']}\n"
            report += f"- HMI Connections: {metrics['hmi_connections']}\n"
            report += f"- Field Device Connections: {metrics['field_connections']}\n"
            report += f"- Minimum Path to PLC: {metrics['min_path_to_plc'] if metrics['min_path_to_plc'] != float('inf') else 'N/A'}\n"
            report += f"- Minimum Path to HMI: {metrics['min_path_to_hmi'] if metrics['min_path_to_hmi'] != float('inf') else 'N/A'}\n"
            report += f"- Exposure Score: {risk_scores.loc[node, 'exposure_score']:.2f}\n"

        report += "\n## Detected Anomalies\n"
        # Get nodes with anomalies
        anomalous_nodes = risk_scores[risk_scores['anomaly'] == -1].sort_values('risk_score', ascending=False)
        
        report += "### Centrality Measures for Anomalous Nodes\n"
        centrality_cols = ['Degree_Centrality', 'Betweenness_Centrality', 
                          'Closeness_Centrality', 'Eigenvector_Centrality']
        
        report += "```\n"  # Start of formatted table
        # Add header
        report += "Node Label".ljust(20)
        for col in centrality_cols:
            report += col.replace('_', ' ').ljust(25)
        report += "\n" + "-" * 120 + "\n"
        
        # Add data rows
        for node in anomalous_nodes.index:
            node_label = self.G.nodes[node]['label']
            report += f"{node_label[:19].ljust(20)}"
            for col in centrality_cols:
                value = self.centrality_measures[col][node]
                report += f"{value:.6f}".ljust(25)
            report += "\n"
        report += "```\n"  # End of formatted table

        report += "\n## Path Analysis\n"
        # Add path analysis for critical assets
        critical_assets = ['Crown Jewel', 'SCADA Server', 'HMI 1', 'HMI 2', 'HMI 3']
        critical_nodes = [node for node, attr in self.G.nodes(data=True) 
                         if attr['label'] in critical_assets]
        
        report += "### Shortest Paths to Critical Assets\n"
        for start_node in self.G.nodes():
            start_label = self.G.nodes[start_node]['label']
            report += f"\nFrom {start_label}:\n"
            for end_node in critical_nodes:
                end_label = self.G.nodes[end_node]['label']
                if nx.has_path(self.G, start_node, end_node):
                    path = nx.shortest_path(self.G, start_node, end_node)
                    path_labels = [self.G.nodes[n]['label'] for n in path]
                    report += f"- To {end_label}: {' -> '.join(path_labels)} ({len(path)-1} hops)\n"
                else:
                    report += f"- To {end_label}: No path exists\n"

        report += "\n## Analysis Details\n"
        report += f"- Analysis completed at: {timestamp}\n"
        report += f"- Output directory: {self.output_path}\n"
        report += "\n## Generated Files\n"
        report += "- risk_level_visualization.png\n"
        report += "- network_analysis_report.txt\n"
        report += "- security_analysis_report.txt\n"
        report += "- comprehensive_analysis_report.md\n"

        # Add summary statistics
        report += "\n## Summary Statistics\n"
        report += f"- Average node degree: {round(np.mean([d for n, d in self.G.degree()]), 2)}\n"
        report += f"- Graph diameter: {nx.diameter(self.G)}\n"
        report += f"- Average shortest path length: {round(nx.average_shortest_path_length(self.G), 2)}\n"
        report += f"- Graph density: {round(nx.density(self.G), 3)}\n"
        report += f"- Number of high-risk nodes: {len(high_risk_nodes)}\n"
        report += f"- Number of anomalous nodes: {len(anomalous_nodes)}\n"
        
        # Save the report
        report_path = os.path.join(self.output_path, 'comprehensive_analysis_report.md')
        with open(report_path, 'w', encoding='utf-8') as f:
            f.write(report)
            
        print(f"\nComprehensive report saved to: {report_path}")
        return report

    def run_analysis(self):
        """Run complete network analysis"""
        print("Starting network analysis...")
        
        # Basic network structure analysis
        structure_analysis = self.analyze_network_structure()
        
        # Calculate node criticality
        criticality_scores = self.calculate_node_criticality()
        
        # Security analysis
        security_metrics = self.calculate_security_metrics()
        risk_scores = self.detect_anomalies(security_metrics)
        
        # Store results
        results = {
            'structure_analysis': structure_analysis,
            'criticality_scores': criticality_scores,
            'security_metrics': security_metrics,
            'risk_scores': risk_scores
        }
        
        # Generate visualizations
        print("Generating network visualizations...")
        self.visualize_risk_levels(risk_scores)
        
        # Generate comprehensive report
        print("Generating comprehensive analysis report...")
        self.generate_comprehensive_report(results)
        
        print("\nAnalysis complete. All reports and visualizations have been saved.")
        return results

# Main execution
if __name__ == "__main__":
    try:
        # Initialize and run the analysis
        base_path = os.path.join(os.path.expanduser('~'), 'Desktop', 'Network Analysis Tool')
        analyzer = NetworkAnalysisTool(base_path)
        results = analyzer.run_analysis()
        
    except Exception as e:
        logging.error(f"Error during analysis: {str(e)}")
        raise

Starting network analysis...
Generating network visualizations...
Generating comprehensive analysis report...

Comprehensive report saved to: C:\Users\Service Casket\Desktop\Network Analysis Tool\output\comprehensive_analysis_report.md

Analysis complete. All reports and visualizations have been saved.
