In [2]:
import pandas as pd
import re


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
def import_csv_file(file_path):
    try:
        df = pd.read_csv(file_path, on_bad_lines='skip')
        # delete unnecessary columns
        df = df.drop_duplicates().reset_index(drop=True)
        print(f"✅ データファイル '{file_path}' を正常に読み込みました。")
        return df
    except FileNotFoundError:
        print(f"❌ エラー: ファイルが見つかりません。パスを確認してください: '{file_path}'")
        return None

In [4]:
def create_accuracy_matrix(df: pd.DataFrame) -> list:
    accuracy_matrix = []
    current_list = []
    previous_seed = -1  # 初期値を -1 にしておく

    for _, row in df.iterrows():
        seed = int(row['dataset_seed'])  
        acc = row['テスト精度 (Test Acc)']

        if previous_seed != -1 and seed == 0:
            accuracy_matrix.append(current_list)
            current_list = [acc]
        else:
            current_list.append(acc)

        previous_seed = seed

    if current_list:
        accuracy_matrix.append(current_list)

    return accuracy_matrix


In [5]:
def compute_aa_and_aia(acc_matrix: list[list[float]]) -> tuple[list[float], list[float]]:
    """
    acc_matrix[k][j] = 第kタスク学習後の、第jタスクに対する精度 a_{k,j}
    を前提として、AA_kとAIA_kをそれぞれ計算する。
    
    Returns:
        AA_list: 各ステップにおけるAA_k（平均精度）
        AIA_list: 各ステップにおけるAIA_k（累積平均精度）
    """
    AA_list = []
    AIA_list = []

    for k in range(len(acc_matrix)):
        # k番目のタスクまで学習後の精度リスト: acc_matrix[k][0〜k]
        current_accs = acc_matrix[k][:k+1]  # j <= k
        AA_k = sum(current_accs) / len(current_accs)
        AA_list.append(AA_k)

        # AIA_k = 平均(AA_0 〜 AA_k)
        AIA_k = sum(AA_list) / len(AA_list)
        AIA_list.append(AIA_k)

    return AA_list, AIA_list

In [6]:
df= import_csv_file("/work/csv/DIL/random_baseline.csv")
acc_matrix= create_accuracy_matrix(df)
print(compute_aa_and_aia(acc_matrix))

✅ データファイル '/work/csv/DIL/random_baseline.csv' を正常に読み込みました。
([0.9472155570983888, 0.6260015964508057, 0.5400640964508057, 0.4209485165774822, 0.4340544819831848, 0.3721454367041587, 0.34226190405232565, 0.3035982586443424, 0.2829304676916864, 0.26101762950420376], [0.9472155570983888, 0.7866085767745972, 0.7044270833333334, 0.6335574416443706, 0.5936568497121334, 0.5567382808774709, 0.5260987984738787, 0.4982862309951867, 0.47435781285035333, 0.4530237945157384])
