In [6]:
import pandas as pd
import csv
from collections import defaultdict
from typing import Dict, List, Tuple

class FlowLogsAnalyzer:
    def __init__(self, lookup_file: str):
        """Initialize analyzer with lookup table file."""
        self.lookup_map = self._load_lookup_table(lookup_file)
        
    def _load_lookup_table(self, lookup_file: str) -> Dict[Tuple[int, str], str]:
        """Load and parse the lookup table into a dictionary."""
        lookup_map = {}
        with open(lookup_file, 'r') as f:
            # Use pandas to handle the CSV properly
            df = pd.read_csv(lookup_file)
            # Clean column names by stripping whitespace
            df.columns = df.columns.str.strip()
            
            # Clean the data and create lookup map
            for _, row in df.iterrows():
                # Convert port to int and protocol to lowercase, strip whitespace from tag
                port = int(row['dstport'])
                protocol = row['protocol'].strip().lower()
                tag = row['tag'].strip()
                lookup_map[(port, protocol)] = tag
                
        return lookup_map
    
    def _parse_flow_log_line(self, line: str) -> Tuple[int, str]:
        """Parse a single flow log line and return (destination_port, protocol)."""
        # Skip empty lines
        if not line.strip():
            raise ValueError("Empty line")
            
        parts = line.strip().split()
        if len(parts) < 8:
            raise ValueError(f"Invalid log line format: {line}")
            
        # Flow log format: destination port is at index 5, protocol at index 7
        dst_port = int(parts[5])
        # Convert protocol number to tcp/udp/icmp
        protocol_map = {6: 'tcp', 17: 'udp', 1: 'icmp'}
        protocol = protocol_map.get(int(parts[7]), 'unknown').lower()
        
        return dst_port, protocol
    
    def process_logs(self, log_file: str) -> Tuple[Dict[str, int], Dict[Tuple[int, str], int]]:
        """Process flow logs and return tag counts and port/protocol combination counts."""
        tag_counts = defaultdict(int)
        combo_counts = defaultdict(int)
        
        with open(log_file, 'r') as f:
            for line in f:
                try:
                    dst_port, protocol = self._parse_flow_log_line(line)
                    
                    # Count port/protocol combinations
                    combo_counts[(dst_port, protocol)] += 1
                    
                    # Look up tag and count it
                    tag = self.lookup_map.get((dst_port, protocol), 'Untagged')
                    tag_counts[tag] += 1
                    
                except (ValueError, IndexError) as e:
                    # Skip empty lines and invalid formats
                    continue
                    
        return dict(tag_counts), dict(combo_counts)
    
    def generate_report(self, tag_counts: Dict[str, int], combo_counts: Dict[Tuple[int, str], int], output_file: str):
        """Generate and save the analysis report."""
        with open(output_file, 'w', newline='') as f:
            # Write tag counts
            f.write("Tag Counts:\n")
            f.write("Tag,Count\n")
            for tag, count in sorted(tag_counts.items()):
                f.write(f"{tag},{count}\n")
            
            f.write("\nPort/Protocol Combination Counts:\n")
            f.write("Port,Protocol,Count\n")
            for (port, protocol), count in sorted(combo_counts.items()):
                f.write(f"{port},{protocol},{count}\n")

def main():
    # File paths
    lookup_file = "lookup_table.csv"
    log_file = "log_data.csv"
    output_file = "analysis_report.csv"
    
    try:
        # Create analyzer and process logs
        analyzer = FlowLogsAnalyzer(lookup_file)
        tag_counts, combo_counts = analyzer.process_logs(log_file)
        
        # Generate report
        analyzer.generate_report(tag_counts, combo_counts, output_file)
        print(f"Analysis complete. Results saved to {output_file}")
        
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    main()

Analysis complete. Results saved to analysis_report.csv
