In [1]:
import re
import numpy as np

def parse_execution_times_sec(log_file_path):
    """
    Reads a log file, extracts execution times in nanoseconds for any function name, 
    and maps them into buckets (0-1s), (1-2s), etc., only returning non-empty buckets.

    :param log_file_path: Path to the log file.
    :return: Dictionary mapping execution time ranges (in seconds) to their count (only non-empty buckets).
    """
    execution_times_ns = []

    # Regex pattern to extract execution time for any function
    pattern = re.compile(r"Execution time of (\w+):\s*(\d+)\s*ns")

    # Read the log file
    with open(log_file_path, "r") as file:
        for line in file:
            match = pattern.search(line)
            if match:
                execution_times_ns.append(int(match.group(2)))  # Extract nanoseconds value

    # Return empty dictionary if no valid data
    if not execution_times_ns:
        return {}

    # Convert nanoseconds to seconds
    execution_times_sec = [t / 1e9 for t in execution_times_ns]

    max_time = max(execution_times_sec)
    bin_edges = np.arange(0, max_time + 1, 1)  # Create bins (0-1s, 1-2s, etc.)

    # Compute histogram
    counts, bin_edges = np.histogram(execution_times_sec, bins=bin_edges)

    # Return only non-empty buckets
    execution_time_buckets = {
        (bin_edges[i], bin_edges[i+1]): counts[i]
        for i in range(len(counts)) if counts[i] > 0
    }

    return execution_time_buckets

In [5]:
parse_execution_times_sec("/tmp/zellular-simulation-logs/put_batch_executions.log")

{(np.float64(0.0), np.float64(1.0)): np.int64(157455)}

In [6]:
parse_execution_times_sec("/tmp/zellular-simulation-logs/simulations.log")

{(np.float64(0.0), np.float64(1.0)): np.int64(158896),
 (np.float64(1.0), np.float64(2.0)): np.int64(2),
 (np.float64(3.0), np.float64(4.0)): np.int64(1),
 (np.float64(6.0), np.float64(7.0)): np.int64(13),
 (np.float64(13.0), np.float64(14.0)): np.int64(5),
 (np.float64(19.0), np.float64(20.0)): np.int64(4)}