In [1]:
import pandas as pd

import lass
import lass.datasets
import lass.pipeline
from lass.log_handling import LogLoader, LogLoaderArgs, PaperTasks

In [2]:
log_loader_args = LogLoaderArgs(
    logdir='../artifacts/logs/',
    tasks='paper-full',
    model_families=['BIG-G T=0'],
    model_sizes=['4b', '8b', '27b', '128b'],
    shots=[0],
    query_types=['multiple_choice'],
)

loader = LogLoader(log_loader_args)
data = lass.datasets.to_dataframe(loader)

data = lass.pipeline.binarize(data)
data = lass.pipeline.augment(data)
data = lass.pipeline.clean(data)

train, test = lass.datasets.split("instance", data, test_fraction=0.2, seed=42)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'correct'] = df['correct'].astype(int)




In [3]:
# Check wether instances are the same
bad_tasks = set()
good_tasks = set()
for task in test.task.unique():
    tmp = test[test.task == task]
    ref = tmp[tmp.model_name == tmp.model_name.unique()[0]]
    good_tasks.add(task)
    for model in tmp.model_name.unique()[1:]:
        cmp = tmp[tmp.model_name == model]
        if len(cmp) != len(ref):
            print(f"Number of instances is not the same for task {task} and model {model}.")
            bad_tasks.add(task)
            good_tasks.discard(task)
            continue

        equality = cmp.input.values == ref.input.values
        if not equality.all():
            print(f"Instances are not the same for task {task} and model {model}.")
            bad_tasks.add(task)
            good_tasks.discard(task)

print(bad_tasks)
print(good_tasks)

Number of instances is not the same for task emojis_emotion_prediction and model 4b.
Number of instances is not the same for task emojis_emotion_prediction and model 27b.
Number of instances is not the same for task emojis_emotion_prediction and model 8b.
Number of instances is not the same for task real_or_fake_text and model 4b.
Number of instances is not the same for task real_or_fake_text and model 27b.
Number of instances is not the same for task real_or_fake_text and model 8b.
Number of instances is not the same for task strange_stories and model 4b.
Instances are not the same for task strange_stories and model 27b.
Instances are not the same for task strange_stories and model 8b.
Instances are not the same for task swedish_to_german_proverbs and model 4b.
Instances are not the same for task swedish_to_german_proverbs and model 27b.
Number of instances is not the same for task swedish_to_german_proverbs and model 8b.
{'strange_stories', 'emojis_emotion_prediction', 'real_or_fake_

In [13]:
# Check wether there are at least an equal amount of instances for each model per task in the test set.
tmp = test.groupby(['task', 'model_name']).size().unstack() # Get the number of instances per model per task.
tmp = tmp.eq(tmp.iloc[:, 0], axis=0) # Check wether all values for a row are equal that the one from the first column.
tmp = tmp.all(axis=1)
print(tmp[tmp == False]) # Check wether there are any False values.


task
emojis_emotion_prediction     False
real_or_fake_text             False
strange_stories               False
swedish_to_german_proverbs    False
dtype: bool


In [5]:
test.model_name.unique()

array(['128b', '4b', '27b', '8b'], dtype=object)

In [6]:
a = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
b = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
c = pd.concat([a, b], ignore_index=True)
c

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6
3,1,4
4,2,5
5,3,6


In [7]:
pd.set_option('display.max_rows', None)
data.groupby(["task"]).agg({"n_targets": ["mean", "var", "count"]}).reset_index()[('n_targets', 'var')].value_counts()

0.000000       80
0.025145        1
0.159336        1
2.084020        1
1.476378        1
0.510950        1
0.005826        1
0.006678        1
0.222910        1
0.008667        1
1906.021348     1
0.007227        1
81.561157       1
0.002228        1
0.221470        1
0.001950        1
0.883971        1
2.667319        1
2.041523        1
2.983055        1
2.604688        1
1.210203        1
0.042374        1
0.220471        1
87.800388       1
0.032357        1
15.152482       1
0.132565        1
0.011504        1
0.090852        1
0.244880        1
4.374808        1
0.160446        1
0.891387        1
0.704957        1
0.049900        1
0.018600        1
0.218455        1
0.640250        1
Name: (n_targets, var), dtype: int64