In [1]:

import numpy as np
import time
import os

def generate_count_mat(rows, cols, min_val=0, max_val=100, seed=42):
    """Generate a matrix of random integer counts with specified dimensions"""
    rng = np.random.default_rng(seed)
    count_mat = rng.integers(min_val, max_val + 1, size=(rows, cols))
    return count_mat

def write_matrix_to_txt(matrix, filename="temp_matrix.txt"):
    np.savetxt(filename, matrix, fmt='%d', delimiter="\t")

def read_matrix_from_txt(filename="temp_matrix.txt"):
    return np.loadtxt(filename, dtype=int, delimiter="\t")

def profile_operations(matrix_row, matrix_col, label="Matrix", n_iter=100, filename_prefix="temp_matrix"):
    print(f"--- {label} Test ({matrix_row.shape[0]} x {matrix_row.shape[1]}) ---")
    print("Matrix shapes:")
    print(f"- row-major: {matrix_row.shape}")
    print(f"- col-major: {matrix_col.shape}")

    operations = {
        f"row-major sum": lambda: np.sum(matrix_row, axis=1),
        f"col-major sum": lambda: np.sum(matrix_col, axis=0),
        f"row-major mean": lambda: np.mean(matrix_row, axis=1),
        f"col-major mean": lambda: np.mean(matrix_col, axis=0),
        f"row-major std": lambda: np.std(matrix_row, axis=1),
        f"col-major std": lambda: np.std(matrix_col, axis=0),
        f"row-major transpose": lambda: matrix_row.T,
        f"col-major transpose": lambda: matrix_col.T,
        f"row-major reshape": lambda: matrix_row.reshape(-1, 50),
        f"col-major reshape": lambda: matrix_col.reshape(-1, 50),
        f"row-major write to txt": lambda: write_matrix_to_txt(matrix_row, filename=f"{filename_prefix}_row.txt"),
        f"col-major write to txt": lambda: write_matrix_to_txt(matrix_col, filename=f"{filename_prefix}_col.txt"),
        f"row-major read from txt": lambda: read_matrix_from_txt(filename=f"{filename_prefix}_row.txt"),
        f"col-major read from txt": lambda: read_matrix_from_txt(filename=f"{filename_prefix}_col.txt")
    }

    print("\n=== Multiple Operations Profiling ===")
    for op_name, operation in operations.items():
        times = []
        n_iter = n_iter if "read" not in op_name and "write" not in op_name else 10
        for _ in range(n_iter):
            start_time = time.perf_counter()
            operation()
            end_time = time.perf_counter()
            times.append(end_time - start_time)
        mean_time = np.mean(times)
        std_time = np.std(times)

        print(f"{op_name:25s}: {mean_time:.6f} ± {std_time:.6f} seconds (mean ± std over {n_iter} runs)")

    # Clean up

    if os.path.exists(f"{filename_prefix}_row.txt"):
        os.remove(f"{filename_prefix}_row.txt")
    if os.path.exists(f"{filename_prefix}_col.txt"):
        os.remove(f"{filename_prefix}_col.txt")


# Small matrix profiling
n_row_small = 10_000
n_col_small = 100
mat1_row = generate_count_mat(n_row_small, n_col_small)
mat1_col = generate_count_mat(n_col_small, n_row_small)
profile_operations(mat1_row, mat1_col, label="Small", filename_prefix="temp_small_matrix")
print("\n\n")

# Medium matrix profiling
n_row_medium = 100_000
n_col_medium = 500
mat1_row = generate_count_mat(n_row_medium, n_col_medium)
mat1_col = generate_count_mat(n_col_medium, n_row_medium)
profile_operations(mat1_row, mat1_col, label="Medium", filename_prefix="temp_medium_matrix")
print("\n\n")

# Large matrix profiling
n_row_large = 1_000_000
n_col_large = 500
mat2_row = generate_count_mat(n_row_large, n_col_large)
mat2_col = generate_count_mat(n_col_large, n_row_large)
profile_operations(mat2_row, mat2_col, label="Large", filename_prefix="temp_large_matrix")
print("\n\n")

--- Small Test (10000 x 100) ---
Matrix shapes:
- row-major: (10000, 100)
- col-major: (100, 10000)

=== Multiple Operations Profiling ===
row-major sum            : 0.000344 ± 0.000069 seconds (mean ± std over 100 runs)
col-major sum            : 0.000388 ± 0.000049 seconds (mean ± std over 100 runs)
row-major mean           : 0.000980 ± 0.000028 seconds (mean ± std over 100 runs)
col-major mean           : 0.000806 ± 0.000064 seconds (mean ± std over 100 runs)
row-major std            : 0.003473 ± 0.001114 seconds (mean ± std over 100 runs)
col-major std            : 0.002965 ± 0.000127 seconds (mean ± std over 100 runs)
row-major transpose      : 0.000000 ± 0.000000 seconds (mean ± std over 100 runs)
col-major transpose      : 0.000000 ± 0.000000 seconds (mean ± std over 100 runs)
row-major reshape        : 0.000000 ± 0.000000 seconds (mean ± std over 100 runs)
col-major reshape        : 0.000000 ± 0.000000 seconds (mean ± std over 100 runs)
row-major write to txt   : 0.139789 ± 0.0