In [1]:
import pandas as pd

from pandas.api.types import CategoricalDtype

# Define the specific order for adapter_config_string
# adapter_order = ['full_tuning', 'houlsby', 'pfeiffer', 'scaled_parallel', 'ln_tuning',
#                  'lora', 'hf_lora_all', 'hf_krona', 'compacter', 'compacter++', 'ia3',
#                  'prefix_tuning[bottleneck_size=800,kv_size=64]|par_bn', 'prefix_tuning[kv_size=64]', 'prefix_tuning_flat[kv_size=64]', 'lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=64]|seq_bn[reduction_factor=16,use_gating=True]']
adapter_order = ['full_tuning', 'houlsby', 'pfeiffer', 'scaled_parallel', 'ln_tuning',
                 'lora', 'hf_lora', 'hf_lora_all', 'hf_krona', 'compacter', 'compacter++', 'ia3',
                 'mam', 'prefix_tuning', 'prefix_tuning_flat', 'unipelt']
adapter_order = adapter_order[::-1]


In [2]:
df = pd.read_csv("../wandb_results/wandb_export_2023-12-22T05_29_37.476-05_00.csv")

relevant_columns = ['model_name_or_path', 'adapter_config_string', 'total_parameters',
                    'trainable_parameters', 'throughput_examples', 'test/throughput_examples',
                    'total_batch_size', 'per_device_train_batch_size', 'gradient_accumulation_steps',
                    'dataset_name', 'dataset_config_name', 'Name']

df = df[relevant_columns]
df = df.dropna()
# df = df[df.model_name_or_path != "t5-11b"]

df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[bottleneck_size=800,kv_size=64]|par_bn", "mam"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[bottleneck_size=800,kv_size=128]|par_bn", "mam"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning_flat[kv_size=128]", "prefix_tuning_flat"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning_flat[kv_size=64]", "prefix_tuning_flat"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[kv_size=128]", "prefix_tuning"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "prefix_tuning[kv_size=64]", "prefix_tuning"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=128]|seq_bn[reduction_factor=16,use_gating=True]", "unipelt"
)
df['adapter_config_string'] = df['adapter_config_string'].replace(
    "lora[r=8,use_gating=True]|prefix_tuning[prefix_length=10,use_gating=True,kv_size=64]|seq_bn[reduction_factor=16,use_gating=True]", "unipelt"
)

# num_gpus = total_batch_size / (per_device_train_batch_size * gradient_accumulation_steps)
df = df.assign(num_gpus=df['total_batch_size'] / (
        df['per_device_train_batch_size'] * df['gradient_accumulation_steps']))

# throughput_examples_per_gpu = throughput_examples / num_gpus
df = df.assign(
    throughput_examples_per_gpu=df['throughput_examples'] / df['num_gpus'])

# test/throughput_examples
df = df.assign(
    throughput_examples_per_gpu_test=df['test/throughput_examples'] / df['num_gpus'])

table = df.groupby(
    ['model_name_or_path', 'adapter_config_string', 'num_gpus'])\
    .agg({
        'total_parameters': 'mean',
        'trainable_parameters': 'mean',
        'throughput_examples_per_gpu': ['mean', 'std'],
        'throughput_examples_per_gpu_test': ['mean', 'std'],
        'Name': 'count',
    }).reset_index()

table['adapter_config_string'] = table['adapter_config_string'].astype(
    CategoricalDtype(categories=adapter_order, ordered=True)
)
table = table.sort_values(
    by=['model_name_or_path', 'num_gpus', 'adapter_config_string'],
    ascending=[False, True, False]
)

print("Unique models:", len(table['model_name_or_path'].unique()))
print("Unique adapter configs:", len(table['adapter_config_string'].unique()))

print(table.shape)
table.to_csv('../wandb_results/throughput.csv', index=False)
table

Unique models: 3
Unique adapter configs: 16
(48, 10)


Unnamed: 0_level_0,model_name_or_path,adapter_config_string,num_gpus,total_parameters,trainable_parameters,throughput_examples_per_gpu,throughput_examples_per_gpu,throughput_examples_per_gpu_test,throughput_examples_per_gpu_test,Name
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,mean,mean,std,mean,std,count
34,t5-large,full_tuning,1.0,737668100.0,737668100.0,214.719403,55.264638,115.174829,59.766666,3
38,t5-large,houlsby,1.0,783254500.0,12687360.0,138.443747,10.812014,35.710892,25.454842,4
43,t5-large,pfeiffer,1.0,776910800.0,6343680.0,122.391611,31.848107,39.61146,29.724454,4
46,t5-large,scaled_parallel,1.0,820972600.0,50405420.0,151.069752,16.356705,42.550947,34.734505,4
40,t5-large,ln_tuning,1.0,737668100.0,124928.0,188.842627,16.314189,58.052051,53.865697,4
41,t5-large,lora,1.0,772926500.0,2359296.0,134.322818,2.925753,42.027567,36.207838,4
36,t5-large,hf_lora,1.0,740027400.0,2359296.0,152.381361,9.47381,51.150351,45.099972,4
37,t5-large,hf_lora_all,1.0,746318800.0,8650752.0,102.774782,9.835955,41.553132,37.644594,4
35,t5-large,hf_krona,1.0,737889300.0,221184.0,138.711967,10.167677,38.443289,45.264955,5
32,t5-large,compacter,1.0,770871400.0,304192.0,100.868252,14.46249,29.621042,26.289868,4


In [140]:
table["Name"]["count"].sum()

112