In [3]:
import pandas as pd
import io

def parse_stress_ng_log(log_content):
    """
    Parse stress-ng log content and extract the metrics row into a pandas DataFrame
    
    Parameters:
    log_content (str): The content of the stress-ng log file
    
    Returns:
    pandas.DataFrame: DataFrame containing the metrics
    """
    # Split the log into lines
    lines = log_content.split('\n')
    
    # Find the metrics header and data lines
    metrics_data = None
    header_line = None
    subheader_line = None
    
    for i, line in enumerate(lines):
        if 'stressor       bogo ops' in line:
            header_line = line
            subheader_line = lines[i + 1]
            metrics_data = lines[i + 2]
            break
    
    if metrics_data is None:
        raise ValueError("Could not find metrics data in log content")

    # Define the columns we expect based on the log format
    columns = [
        'stressor',
        'bogo_ops',
        'real_time',
        'usr_time',
        'sys_time',
        'bogo_ops_real_time',
        'bogo_ops_usr_sys_time',
        'cpu_used_per_instance',
        'rss_max'
    ]
    
    # Extract just the data part after the process ID
    data = metrics_data.split('[63919]')[1].strip()
    
    # Split the data while preserving whitespace for the stressor name
    parts = data.split()
    
    # Create a single row of data
    row_data = [
        parts[0],                    # stressor
        float(parts[1]),            # bogo_ops
        float(parts[2]),            # real_time
        float(parts[3]),            # usr_time
        float(parts[4]),            # sys_time
        float(parts[5]),            # bogo_ops_real_time
        float(parts[6]),            # bogo_ops_usr_sys_time
        float(parts[7]),            # cpu_used_per_instance
        float(parts[8])             # rss_max
    ]
    
    # Create DataFrame with a single row
    df = pd.DataFrame([row_data], columns=columns)
    
    return df

# Example usage with your log content
log_content = """stress-ng: info:  [63919] setting to a 1 min run per stressor
stress-ng: info:  [63919] dispatching hogs: 24 aio
stress-ng: metrc: [63919] stressor       bogo ops real time  usr time  sys time   bogo ops/s     bogo ops/s CPU used per       RSS Max
stress-ng: metrc: [63919]                           (secs)    (secs)    (secs)   (real time) (usr+sys time) instance (%)          (KB)
stress-ng: metrc: [63919] aio              131245     60.05      0.40      0.98      2185.70       95571.29         0.10          3284"""

# Parse the log and create DataFrame
df = parse_stress_ng_log(log_content)

# Display the results
print("\nExtracted metrics as DataFrame:")
print(df)


Extracted metrics as DataFrame:
  stressor  bogo_ops  real_time  usr_time  sys_time  bogo_ops_real_time  \
0      aio  131245.0      60.05       0.4      0.98              2185.7   

   bogo_ops_usr_sys_time  cpu_used_per_instance  rss_max  
0               95571.29                    0.1   3284.0  
