In [1]:
#!/usr/bin/env python3

import os
import csv
import re
import sys
from datetime import datetime

def parse_darshan_log(file_path):
    """
    Parse a single Darshan log file and extract relevant information
    """
    parsed_data = []
    current_record = {}
    
    try:
        with open(file_path, 'r') as f:
            for line in f:
                # Skip comments and empty lines
                if line.startswith('#') or line.strip() == '':
                    continue
                
                # Split the line
                parts = line.strip().split('\t')
                
                # Ensure we have enough parts
                if len(parts) < 7:
                    continue
                
                module, rank, record_id, counter, value, filename, mount_pt, fs_type = parts
                
                # Only process POSIX module
                if module != 'POSIX':
                    continue
                
                # Determine disk type
                disk_type = None
                if 'nvme' in mount_pt.lower():
                    disk_type = 'nvme'
                elif 'ssd' in mount_pt.lower():
                    disk_type = 'ssd'
                elif 'hdd' in mount_pt.lower():
                    disk_type = 'hdd'
                
                # Initialize or update current record
                if record_id not in current_record:
                    current_record[record_id] = {
                        'filename': filename,
                        'mount_pt': mount_pt,
                        'disk_type': disk_type
                    }
                
                # Track specific counters
                current_record[record_id][counter] = value
        
        # Process records
        for record_id, record in current_record.items():
            # Determine operation type
            op_type = None
            response_io = None
            start_time = None
            end_time = None
            dsik_time = None
            
            # Check for read operation
            if 'POSIX_READS' in record and int(record.get('POSIX_READS', 0)) > 0:
                op_type = 'read'
                response_io = record.get('POSIX_BYTES_READ', 0)
                start_time = record.get('POSIX_F_READ_START_TIMESTAMP', 0)
                end_time = record.get('POSIX_F_READ_END_TIMESTAMP', 0)
                disk_time = record.get('POSIX_F_READ_TIME', 0)
            
            # Check for write operation
            elif 'POSIX_WRITES' in record and int(record.get('POSIX_WRITES', 0)) > 0:
                op_type = 'write'
                response_io = record.get('POSIX_BYTES_WRITTEN', 0)
                start_time = record.get('POSIX_F_WRITE_START_TIMESTAMP', 0)
                end_time = record.get('POSIX_F_WRITE_END_TIMESTAMP', 0)
                disk_time = record.get('POSIX_F_WRITE_TIME', 0)
            
            # Only add record if we have a valid operation
            if op_type:
                parsed_data.append({
                    'record_id': record_id,
                    'filename': record.get('filename', 'N/A'),
                    'disk_type': record.get('disk_type', 'N/A'),
                    'op_type': op_type,
                    'response_io': response_io,
                    'start_time': start_time,
                    'end_time': end_time,
                    'disk_time': disk_time
                })
        
        return parsed_data
    
    except Exception as e:
        print(f"Error parsing {file_path}: {e}", file=sys.stderr)
        return []

def main():
    # Directory containing parsed Darshan log files
    parsed_logs_dir = "darshan_parsed_24_all_20241124_173911"
    
    # Output CSV file
    output_csv = "darshan_summary_" + "24_all_20241124_173911" + ".csv"
    
    # Collect all parsed data
    all_parsed_data = []
    
    # Iterate through all parsed log files in the directory
    for filename in os.listdir(parsed_logs_dir):
        if filename.endswith('_parsed.txt'):
            file_path = os.path.join(parsed_logs_dir, filename)
            parsed_data = parse_darshan_log(file_path)
            all_parsed_data.extend(parsed_data)
    
    # Write to CSV
    if all_parsed_data:
        csv_columns = [
            'record_id', 'filename', 'disk_type', 'op_type', 
            'response_io', 'start_time', 'end_time', 'disk_time'
        ]
        
        try:
            with open(output_csv, 'w', newline='') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
                writer.writeheader()
                for data in all_parsed_data:
                    writer.writerow(data)
            
            print(f"CSV file created successfully: {output_csv}")
            print(f"Total records processed: {len(all_parsed_data)}")
        
        except IOError as e:
            print(f"I/O error occurred while writing CSV: {e}", file=sys.stderr)
    else:
        print("No data to write to CSV.", file=sys.stderr)

if __name__ == "__main__":
    main()

CSV file created successfully: darshan_summary_24_all_20241124_173911.csv
Total records processed: 118272
