In [None]:
import src.performance_graphs as pg

user_line = "2025-02-05 06:48:42,548 root[7] INFO Added users/user entry uid=user_202502041219_39992,ou=ad-sync,dc=univention-organization,dc=intranet with primary key '7c1fbd1f-317f-43c9-8a29-59b33d30050e'"

group_line = "2025-02-05 07:07:16,967 root[7] INFO Added groups/group entry cn=202502041219_group-4_max-members-10_0,ou=ad-sync,dc=univention-organization,dc=intranet with primary key 'd1552048-bd3d-44b9-b417-34dcb5a57c00'"
print(pg.parse_user_line(user_line))
print(pg.parse_group_line(group_line))

simple_user_line = "2025-01-28 07:31:07,186 root[7] INFO Added users/user entry uid=user24,ou=ad-sync,dc=univention-organization,dc=intranet with primary key 'a6b7fc3d-6151-407f-b07c-c7d4e930984d'"

print(pg.parse_simple_user_line(simple_user_line))

In [None]:
import matplotlib.pyplot
import matplotlib.ticker

def plot_time_delta(df, prefix, column_id, column_description):
    matplotlib.pyplot.figure(figsize=(12, 6))
    matplotlib.pyplot.scatter(df[column_id], df['time_delta'], alpha=0.7, s=5)
    matplotlib.pyplot.title(f"Time Delta vs {column_description}")
    matplotlib.pyplot.xlabel(column_description)
    matplotlib.pyplot.ylabel("Time Delta (seconds)")
    matplotlib.pyplot.grid(True)
    matplotlib.pyplot.savefig(f"{prefix}_time-delta.jpeg", dpi=600, bbox_inches="tight", format="jpeg")
    matplotlib.pyplot.show()


def plot_time_delta_logarythmic(df, prefix, column_id, column_description):
    matplotlib.pyplot.figure(figsize=(12, 6))
    matplotlib.pyplot.scatter(df[column_id], df['time_delta'], label="Time Delta", alpha=0.7, s=5)
    matplotlib.pyplot.yscale('log')
    matplotlib.pyplot.title(f"Time Delta vs {column_description} (Logarithmic Scale)")
    matplotlib.pyplot.xlabel(column_description)
    matplotlib.pyplot.ylabel("Time Delta (seconds)")

    ax = matplotlib.pyplot.gca()
    ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, _: f"{int(x)}"))
    ax.yaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0, subs=None, numticks=10))

    matplotlib.pyplot.grid(True, which="both", linestyle="--", linewidth=0.5)
    matplotlib.pyplot.legend()

    matplotlib.pyplot.savefig(f"{prefix}_time-delta-logarythmic.jpeg", dpi=600, bbox_inches="tight", format="jpeg")

    matplotlib.pyplot.show()


def plot_entries_created_per_hour(df, prefix, column_id, column_description):
    start_time = df['timestamp'].min()
    df['hours_since_start'] = (df['timestamp'] - start_time).dt.total_seconds() / 3600

    matplotlib.pyplot.figure(figsize=(12, 6))
    matplotlib.pyplot.plot(df['hours_since_start'], df[column_id], label="Cumulative Users", alpha=0.8)

    matplotlib.pyplot.title(f"{column_description} Created Over Time")
    matplotlib.pyplot.xlabel("Time (Hours Since Start)")
    matplotlib.pyplot.ylabel(column_description)

    ax = matplotlib.pyplot.gca()
    ax.xaxis.set_major_locator(matplotlib.pyplot.MultipleLocator(10))
    ax.xaxis.set_minor_locator(matplotlib.pyplot.MultipleLocator(1))

    matplotlib.pyplot.grid(True, linestyle="--", linewidth=0.5)
    matplotlib.pyplot.legend()
    matplotlib.pyplot.savefig(f"{prefix}_over-time.jpeg", dpi=600, bbox_inches="tight", format="jpeg")
    matplotlib.pyplot.show()


In [None]:
import src.performance_graphs as pg

def total_time(df):
    return df.iloc[-1]['timestamp'] - df.iloc[0]['timestamp']

def create_users_graphs(users_raw, user_prefix, outlier_seconds):
    users = pg.remove_outliers(users_raw, outlier_seconds)
    users = users.reset_index()

    print(f"Total users created: {len(users)}")
    print(f"Total user creation time: {total_time(users)}")
    print(f"average user creation time: {users['time_delta'].mean()}")

    plot_time_delta(users, user_prefix, "index", "User ID")
    plot_time_delta_logarythmic(users, user_prefix, "index", "User ID")
    plot_entries_created_per_hour(users, user_prefix, "index", "Number of users")

In [None]:
import src.performance_graphs as pg

users_raw_local = pg.process_log_file("./40k-local-storage.log", pg.parse_user_line)
groups_raw_local = pg.process_log_file("./40k-local-storage.log", pg.parse_group_line)
display(users_raw_local.head())
display(groups_raw_local.head())

In [None]:
prefix = "./results_2025-02/40k-local-storage-scatter"
user_prefix = f"{prefix}_users"

create_users_graphs(users_raw_local, user_prefix, 3)

In [None]:
import src.performance_graphs as pg

group_prefix = f"{prefix}_groups"

groups = pg.remove_outliers(groups_raw_local, 2)
groups = groups.reset_index()

print(f"groups created: {len(groups)}")
print(f"Total group creation time: {total_time(groups)}")
print("Average group creation times:")
print(pg.group_stats(groups))

plot_time_delta(groups, group_prefix, "index", "Group Counter")
plot_time_delta_logarythmic(groups, group_prefix, "index", "Group Counter")
plot_entries_created_per_hour(groups, group_prefix, "index", "Group Counter")

In [None]:
import src.performance_graphs as pg

users_raw_ceph = pg.process_log_file("./40k-users-ceph-pvc-full-sync.log", pg.parse_user_line)
groups_raw_ceph = pg.process_log_file("./40k-users-ceph-pvc-full-sync.log", pg.parse_group_line)
display(users_raw_ceph.head())
display(groups_raw_ceph.head())

In [None]:
import src.performance_graphs as pg

prefix = "./results_2025-02/40k-ceph-pvc-scatter"
user_prefix = f"{prefix}_users"

create_users_graphs(users_raw_ceph, user_prefix, 120)

In [None]:
import src.performance_graphs as pg

group_prefix = f"{prefix}_groups"


groups = pg.remove_outliers(groups_raw_ceph, 8)
groups = groups.reset_index()

print(f"groups created: {len(groups)}")
print(f"Total group creation time: {total_time(groups)}")
print("Average group creation times:")
print(pg.group_stats(groups))

plot_time_delta(groups, group_prefix, "index", "Group Counter")
plot_time_delta_logarythmic(groups, group_prefix, "index", "Group Counter")
plot_entries_created_per_hour(groups, group_prefix, "index", "Group Counter")

In [None]:
import src.performance_graphs as pg

users_raw_unfinished = pg.process_log_file("./ldap-index-deployment-initial-sync_failed-at-group-sync.log", pg.parse_simple_user_line)
display(users_raw_ceph.head())

In [None]:
display(users.iloc[-850:-800])
display(len(users)-100170)

users_cleaned = users.iloc[:-821]
display(users_cleaned.tail())

In [None]:
import src.performance_graphs as pg

prefix = "./results_2025-02/100k-ldap-index-failed-group"
user_prefix = f"{prefix}_users_cleaned"

create_users_graphs(users_raw_ceph, user_prefix, 4)