In [13]:
from xetrack import Reader
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "iframe"


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

df = Reader('../output/stats.db').to_df()
df['time'] = df['function_time']
# Filter last experiemnt
df = df[df['track_id']==df.tail(1)['track_id'].iloc[0]]
# df  = df[df['p_memory_percent'].isna()] # split total count
df = df[df['tech']!='lfs-git']

df['name'] = df['name'].str.replace('_upload', '').str.replace('_',' ')
df['mb/s'] = df['file_size']/df['time']
df.to_csv('../output/results.csv', index=False)
# errors = set(df['error'].fillna('').values)
# if len(errors) > 1:
    # print(f"Errors: {errors}")
print(f"Steps: {df['step'].max()+1}")
print(f"Data size: {len(df)}")
print("\nTime per tech - lower is better")
print(f"track_id: {df['track_id'].iloc[-1]}")
fig = px.bar(df, x='tech', y='time', color='tech')
fig.update_layout(xaxis={'categoryorder':'total ascending'})
fig.show()

Steps: 1
Data size: 6

Time per tech - lower is better
track_id: d74547a3-d8e8-4328-9f0e-5fe75717310b


In [22]:
print("MB per Second - higher is better")
fig = px.bar(df, x='name', y='mb/s', color='tech')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

MB per Second - higher is better


In [23]:
pio.renderers.default='iframe'
groups = df.groupby('name')
sums = groups.sum()
fig1 = px.pie(sums, values='time', names=sums.index)
fig1.update_traces(textposition='inside', textinfo='percent+label')

fig2 = go.Figure()
for name, group in groups:
    fig2.add_trace(go.Scatter(x=group['step'], y=group['time'], mode='lines', name=str(name)))
fig2.update_layout(xaxis_title='Step', yaxis_title='Time')

df['cumulative_time'] = df.groupby('name')['time'].cumsum()
fig3 = px.line(df, x='step', y='cumulative_time', color='name')
fig3.update_layout(title='Cumulative Lines of Time per Steps', xaxis_title='Steps', yaxis_title='Cumulative Time')

df['tech_cumulative_time'] = df.groupby('tech')['time'].cumsum()

fig4 = px.line(df, x='step', y='tech_cumulative_time', color='tech')
fig4.update_layout(title='Cumulative Lines of Time per Steps',
                  xaxis_title='Steps',
                  yaxis_title='Cumulative Time')

fig1.show()

In [24]:
fig3.show()

In [25]:
fig2.show()

In [26]:
fig4.show()

In [27]:
for step in set(df['step']):
    print(f"Step {step} took {df[df['step']==step]['time'].sum()/60:.2f} minutes")
print(f"total time is: {df['time'].sum()/(60*60):.2f} hours")

Step 0 took 16.34 minutes
Step 1 took 23.68 minutes
Step 2 took 36.45 minutes
Step 3 took 53.81 minutes
Step 4 took 41.44 minutes
Step 5 took 45.24 minutes
total time is: 3.62 hours


In [28]:
times = df.groupby('function')['time'].mean()
ratios = times / times.min()
print(f"Average time:")
fig5 = px.pie(times, values='time', names=times.index)
fig5.update_traces(textposition='inside', textinfo='percent+label')
fig5.show()
ratios.sort_values()

Average time:


function
split-gitxet_upload    1.000000
split-pyxet_upload     1.003936
split-s3_upload        1.489194
split-lakefs_upload    2.265403
split-dvc_upload       5.277672
split-lfs_s3_upload    6.153035
Name: time, dtype: float64

In [30]:
df

Unnamed: 0,timestamp,track_id,pyxet,gitxet,branch,numeric,start_rows,suffix,memory_percent,bytes_recv,error,merge,tech,seed,name,function,args,kwargs,add_rows,file_size,workflow,filename,time,label,step,cpu,p_memory_percent,bytes_sent,disk_percent,out,mb/s,cumulative_time,tech_cumulative_time
207,15-09-2023 15:27:41.225866,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,pyxet,0,pyxet-split,split-pyxet_upload,,,10000000,2297.098632,split,splits.parquet,138.643967,split-10,0,,,,,,16.568327,138.643967,138.643967
211,15-09-2023 15:30:44.059398,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,s3,0,s3-split,split-s3_upload,,,10000000,2297.098632,split,splits.parquet,161.028302,split-10,0,,,,,,14.265186,161.028302,161.028302
215,15-09-2023 15:33:02.478592,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,gitxet,0,gitxet-split,split-gitxet_upload,,,10000000,2297.098632,split,splits.parquet,114.311155,split-10,0,,,,,,20.09514,114.311155,114.311155
219,15-09-2023 15:37:12.512899,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,lakefs,0,lakefs-split,split-lakefs_upload,,,10000000,2297.098632,split,splits.parquet,230.882549,split-10,0,,,,,,9.949209,230.882549,230.882549
227,15-09-2023 15:42:50.742565,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,lfs-s3,0,lfs-s3-split,split-lfs_s3_upload,,,10000000,2297.098632,split,splits.parquet,186.79685,split-10,0,,,,,,12.297309,186.79685,186.79685
231,15-09-2023 15:45:42.631758,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,dvc,0,dvc-split,split-dvc_upload,,,10000000,2297.098632,split,splits.parquet,148.506406,split-10,0,,,,,,15.46801,148.506406,148.506406
235,15-09-2023 15:47:31.173300,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,pyxet,0,pyxet-split,split-pyxet_upload,,,10000000,2297.09862,split,splits.parquet,88.495546,split-10,1,,,,,,25.957223,227.139513,227.139513
239,15-09-2023 15:50:45.583926,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,s3,0,s3-split,split-s3_upload,,,10000000,2297.09862,split,splits.parquet,173.99636,split-10,1,,,,,,13.201992,335.024662,335.024662
243,15-09-2023 15:52:51.692843,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,gitxet,0,gitxet-split,split-gitxet_upload,,,10000000,2297.09862,split,splits.parquet,101.511793,split-10,1,,,,,,22.628884,215.822948,215.822948
247,15-09-2023 15:57:35.980344,de21394d-03dc-47c6-9115-50715d6e0f6a,0.1.4,gitxetcore 0.11.0-5dc8c78\n,ex-split-10,True,100000000,parquet,,,,True,lakefs,0,lakefs-split,split-lakefs_upload,,,10000000,2297.09862,split,splits.parquet,262.747285,split-10,1,,,,,,8.742616,493.629834,493.629834
